Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Deploy Mixtral 8x7B on AWS Inferentia2","local":"deploy-mixtral-8x7b-on-aws-inferentia2","sections":[{"title":"1. Setup development environment","local":"1-setup-development-environment","sections":[],"depth":2},{"title":"2. Retrieve the latest Hugging Face vLLM Neuron DLC","local":"2-retrieve-the-latest-hugging-face-vllm-neuron-dlc","sections":[],"depth":2},{"title":"3. Deploy Mixtral 8x7B to Inferentia2","local":"3-deploy-mixtral-8x7b-to-inferentia2","sections":[],"depth":2},{"title":"4. Clean up","local":"4-clean-up","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/optimum.neuron/pr_1097/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/entry/start.82e2ff17.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/scheduler.56725da7.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/singletons.2080b4fc.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/paths.90dabf70.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/entry/app.9b0c0103.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/preload-helper.9dba61fb.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/index.18a26576.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/nodes/0.912aab06.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/nodes/20.b88c61e9.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/CopyLLMTxtMenu.fb3856d8.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/globals.7f7f1b26.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.a16844e0.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/CodeBlock.2d00672f.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Deploy Mixtral 8x7B on AWS Inferentia2","local":"deploy-mixtral-8x7b-on-aws-inferentia2","sections":[{"title":"1. Setup development environment","local":"1-setup-development-environment","sections":[],"depth":2},{"title":"2. Retrieve the latest Hugging Face vLLM Neuron DLC","local":"2-retrieve-the-latest-hugging-face-vllm-neuron-dlc","sections":[],"depth":2},{"title":"3. Deploy Mixtral 8x7B to Inferentia2","local":"3-deploy-mixtral-8x7b-to-inferentia2","sections":[],"depth":2},{"title":"4. Clean up","local":"4-clean-up","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="deploy-mixtral-8x7b-on-aws-inferentia2" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deploy-mixtral-8x7b-on-aws-inferentia2"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Deploy Mixtral 8x7B on AWS Inferentia2</span></h1> <p data-svelte-h="svelte-17okc9y">Mixtral 8x7B is an open-source LLM from Mistral AI. It is a Sparse Mixture of Experts and has a similar architecture to Mistral 7B, but comes with a twist: it’s actually 8 “expert” models in one. If you want to learn more about MoEs check out <a href="https://huggingface.co/blog/moe" rel="nofollow">Mixture of Experts Explained</a>.</p> <p data-svelte-h="svelte-pam5v6">In this tutorial you will learn how to deploy <a href="https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1" rel="nofollow">mistralai/Mixtral-8x7B-Instruct-v0.1</a> model on AWS Inferentia2 with Hugging Face Optimum Neuron on Amazon SageMaker. We are going to use the Hugging Face vLLM Neuron Container, a purpose-built Inference Container to easily deploy LLMs on AWS Inferentia2 powered by <a href="https://github.com/vllm-project/vllm.git" rel="nofollow">vLLM</a> and <a href="https://huggingface.co/docs/optimum-neuron/index" rel="nofollow">Optimum Neuron</a>.</p> <p data-svelte-h="svelte-df2280">We will cover how to:</p> <ol data-svelte-h="svelte-iuabs3"><li><a href="#1-setup-development-environment">Setup a development environment</a></li> <li><a href="#2-retrieve-the-latest-hugging-face-vllm-neuron-dlc">Retrieve the latest Hugging Face vLLM Neuron DLC</a></li> <li><a href="#3-deploy-Mixtral-8x7B-to-inferentia2">Deploy Mixtral 8x7B to Inferentia2</a></li> <li><a href="#4-clean-up">Clean up</a></li></ol> <p data-svelte-h="svelte-fedw35">Lets get started! 🚀</p> <p data-svelte-h="svelte-1q2zsrn"><a href="https://aws.amazon.com/ec2/instance-types/inf2/" rel="nofollow">AWS inferentia (Inf2)</a> are purpose-built EC2 for deep learning (DL) inference workloads. Here are the different instances of the Inferentia2 family.</p> <table data-svelte-h="svelte-1tmwmqe"><thead><tr><th>instance size</th> <th>accelerators</th> <th>Neuron Cores</th> <th>accelerator memory</th> <th>vCPU</th> <th>CPU Memory</th> <th>on-demand price ($/h)</th></tr></thead> <tbody><tr><td>inf2.xlarge</td> <td>1</td> <td>2</td> <td>32</td> <td>4</td> <td>16</td> <td>0.76</td></tr> <tr><td>inf2.8xlarge</td> <td>1</td> <td>2</td> <td>32</td> <td>32</td> <td>128</td> <td>1.97</td></tr> <tr><td>inf2.24xlarge</td> <td>6</td> <td>12</td> <td>192</td> <td>96</td> <td>384</td> <td>6.49</td></tr> <tr><td>inf2.48xlarge</td> <td>12</td> <td>24</td> <td>384</td> <td>192</td> <td>768</td> <td>12.98</td></tr></tbody></table> <h2 class="relative group"><a id="1-setup-development-environment" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#1-setup-development-environment"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>1. Setup development environment</span></h2> <p data-svelte-h="svelte-3gjk7m">For this tutorial, we are going to use a Notebook Instance in Amazon SageMaker with the Python 3 (ipykernel) and the <code>sagemaker</code> python SDK to deploy Mixtral 8x7B to a SageMaker inference endpoint.</p> <p data-svelte-h="svelte-gxxxnf">Make sur you have the latest version of the SageMaker SDK installed.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->!pip install sagemaker --upgrade --quiet<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-k2b9z7">Then, instantiate the sagemaker role and session.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> boto3 | |
| <span class="hljs-keyword">from</span> sagemaker.core.helper.session_helper <span class="hljs-keyword">import</span> get_execution_role | |
| <span class="hljs-keyword">try</span>: | |
| role = get_execution_role() | |
| <span class="hljs-keyword">except</span> ValueError: | |
| iam = boto3.client(<span class="hljs-string">"iam"</span>) | |
| role = iam.get_role(RoleName=<span class="hljs-string">"sagemaker_execution_role"</span>)[<span class="hljs-string">"Role"</span>][<span class="hljs-string">"Arn"</span>] | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"sagemaker role arn: <span class="hljs-subst">{role}</span>"</span>)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="2-retrieve-the-latest-hugging-face-vllm-neuron-dlc" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#2-retrieve-the-latest-hugging-face-vllm-neuron-dlc"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>2. Retrieve the latest Hugging Face vLLM Neuron DLC</span></h2> <p data-svelte-h="svelte-1awnn15">The latest Hugging Face vLLM Neuron DLCs can be used to run inference on AWS Inferentia2. To retrieve it you can use the method <code>image_uris.retrieve</code> of the Sagemaker SDK. However, if you have the Optimum Neuron package installed, you can use the <code>ecr.image_uri</code> function to retrieve the appropriate Hugging Face vLLM Neuron DLC URI based on your desired <code>region</code> and <code>version</code>. Default values can be deduced by your AWS credentials. For more details see the <a href="https://huggingface.co/docs/optimum-neuron/containers" rel="nofollow">containers</a> documentation.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->!pip install optimum-neuron[neuronx] | |
| <span class="hljs-keyword">from</span> optimum.neuron.utils <span class="hljs-keyword">import</span> ecr | |
| REGION = <span class="hljs-string">"us-east-1"</span> | |
| llm_image = ecr.image_uri(<span class="hljs-string">"vllm"</span>, region=REGION) | |
| <span class="hljs-comment"># print image uri</span> | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"llm image uri: <span class="hljs-subst">{llm_image}</span>"</span>)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="3-deploy-mixtral-8x7b-to-inferentia2" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#3-deploy-mixtral-8x7b-to-inferentia2"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>3. Deploy Mixtral 8x7B to Inferentia2</span></h2> <p data-svelte-h="svelte-4yozv7">At the time of writing, <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/v2.6.0/general/arch/neuron-features/dynamic-shapes.html#neuron-dynamic-shapes" rel="nofollow">AWS Inferentia2 does not support dynamic shapes for inference</a>, which means that we need to specify our sequence length and batch size ahead of time. | |
| To make it easier for customers to utilize the full power of Inferentia2, we created a <a href="https://huggingface.co/docs/optimum-neuron/guides/cache_system" rel="nofollow">neuron model cache</a>, which contains pre-compiled configurations for the most popular LLMs, including Mixtral 8x7B.</p> <p data-svelte-h="svelte-6rv5nu">This means we don’t need to compile the model ourselves, but we can use the pre-compiled model from the cache. You can find compiled/cached configurations on the | |
| <a href="https://huggingface.co/aws-neuron/optimum-neuron-cache/tree/main/inference-cache-config" rel="nofollow">Hugging Face Hub</a>. If your desired configuration is not yet cached, you can compile it yourself using the <a href="https://huggingface.co/docs/optimum-neuron/guides/export_model" rel="nofollow">Optimum CLI</a> or open a request at the <a href="https://huggingface.co/aws-neuron/optimum-neuron-cache/discussions" rel="nofollow">Cache repository</a>.</p> <p data-svelte-h="svelte-1quxa5j">Let’s check the different configurations that are in the cache. For that you first need to log in the Hugging Face Hub, using a <a href="https://huggingface.co/docs/hub/en/security-tokens" rel="nofollow">User Access Token</a> with read access.</p> <p data-svelte-h="svelte-bda3ge">Make sure you have the necessary permissions to access the model. You can request access to the model <a href="https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1" rel="nofollow">here</a>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login | |
| notebook_login()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-gvpj30">Then, we need to install the latest version of Optimum Neuron.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->!pip install optimum-neuron --upgrade --quiet<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9yxrws">Finally, we can query the cache and retrieve the existing set of configurations for which we maintained a compiled version of the model.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->!optimum-cli neuron cache lookup <span class="hljs-string">"mistralai/Mixtral-8x7B-Instruct-v0.1"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mgox28">You should retrieve two entries in the cache:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">***</span> <span class="hljs-number">2</span> <span class="hljs-string">entrie(s)</span> <span class="hljs-string">found</span> <span class="hljs-string">in</span> <span class="hljs-string">cache</span> <span class="hljs-string">for</span> <span class="hljs-string">mistralai/Mixtral-8x7B-Instruct-v0.1</span> <span class="hljs-string">for</span> <span class="hljs-string">inference.***</span> | |
| <span class="hljs-attr">auto_cast_type:</span> <span class="hljs-string">bf16</span> | |
| <span class="hljs-attr">batch_size:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">checkpoint_id:</span> <span class="hljs-string">mistralai/Mixtral-8x7B-Instruct-v0.1</span> | |
| <span class="hljs-attr">checkpoint_revision:</span> <span class="hljs-string">41bd4c9e7e4fb318ca40e721131d4933966c2cc1</span> | |
| <span class="hljs-attr">compiler_type:</span> <span class="hljs-string">neuronx-cc</span> | |
| <span class="hljs-attr">compiler_version:</span> <span class="hljs-number">2.16</span><span class="hljs-number">.372</span><span class="hljs-number">.0</span><span class="hljs-string">+4a9b2326</span> | |
| <span class="hljs-attr">num_cores:</span> <span class="hljs-number">24</span> | |
| <span class="hljs-attr">sequence_length:</span> <span class="hljs-number">4096</span> | |
| <span class="hljs-attr">task:</span> <span class="hljs-string">text-generation</span> | |
| <span class="hljs-attr">auto_cast_type:</span> <span class="hljs-string">bf16</span> | |
| <span class="hljs-attr">batch_size:</span> <span class="hljs-number">4</span> | |
| <span class="hljs-attr">checkpoint_id:</span> <span class="hljs-string">mistralai/Mixtral-8x7B-Instruct-v0.1</span> | |
| <span class="hljs-attr">checkpoint_revision:</span> <span class="hljs-string">41bd4c9e7e4fb318ca40e721131d4933966c2cc1</span> | |
| <span class="hljs-attr">compiler_type:</span> <span class="hljs-string">neuronx-cc</span> | |
| <span class="hljs-attr">compiler_version:</span> <span class="hljs-number">2.16</span><span class="hljs-number">.372</span><span class="hljs-number">.0</span><span class="hljs-string">+4a9b2326</span> | |
| <span class="hljs-attr">num_cores:</span> <span class="hljs-number">24</span> | |
| <span class="hljs-attr">sequence_length:</span> <span class="hljs-number">4096</span> | |
| <span class="hljs-attr">task:</span> <span class="hljs-string">text-generation</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ay2eoj"><strong>Deploying Mixtral 8x7B to a SageMaker Endpoint</strong></p> <p data-svelte-h="svelte-qkir6d">All we need when deploying the model to Amazon SageMaker, is to set the Hugging Face model id and token.</p> <ul data-svelte-h="svelte-1f783f3"><li><code>SM_ON_MODEL</code>: The Hugging Face model ID.</li> <li><code>HF_TOKEN</code>: The Hugging Face API token to access gated models.</li></ul> <p data-svelte-h="svelte-1uoxmdt">Note: even if your model is not gated, we recommend setting your Hugging Face token to avoid rate limitations when fetching weights or pre-compiled neuron artifacts.</p> <p data-svelte-h="svelte-rcd9mj">Optionally, you can specify some deployment parameters to select a specific cached configuration (otherwise a default one will be selected).</p> <ul data-svelte-h="svelte-sq03vv"><li><code>SM_ON_TENSOR_PARALLEL_SIZE</code>: Number of Neuron Cores used for the compilation.</li> <li><code>SM_ON_BATCH_SIZE</code>: The batch size that was used to compile the model.</li> <li><code>SM_ON_SEQUENCE_LENGTH</code>: The sequence length that was used to compile the model.</li></ul> <p data-svelte-h="svelte-1qiwbk5"><strong>Select the right instance type</strong></p> <p data-svelte-h="svelte-jofx65">Mixtral 8x7B is a large model and requires a lot of memory. We are going to use the <code>inf2.48xlarge</code> instance type, which has 192 vCPUs and 384 GB of accelerator memory. The <code>inf2.48xlarge</code> instance comes with 12 Inferentia2 accelerators that include 24 Neuron Cores. In our case we will use a batch size of 4 and a sequence length of 4096.</p> <p data-svelte-h="svelte-ttsgj0">After that we can create our endpoint configuration and deploy the model to Amazon SageMaker. It will be fully compatible with the OpenAI Chat Completion API.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> sagemaker.core.resources <span class="hljs-keyword">import</span> Model, ContainerDefinition | |
| <span class="hljs-comment"># Define Model and Endpoint configuration parameter</span> | |
| environment = { | |
| <span class="hljs-string">"SM_ON_MODEL"</span>: <span class="hljs-string">"mistralai/Mixtral-8x7B-Instruct-v0.1"</span>, | |
| <span class="hljs-string">"SM_ON_BATCH_SIZE"</span>: <span class="hljs-string">"1"</span>, <span class="hljs-comment"># Select the configuration with batch size 1</span> | |
| <span class="hljs-string">"HF_TOKEN"</span>: <span class="hljs-string">"<REPLACE WITH YOUR TOKEN>"</span>, | |
| } | |
| <span class="hljs-keyword">assert</span> environment[<span class="hljs-string">"HF_TOKEN"</span>] != <span class="hljs-string">"<REPLACE WITH YOUR TOKEN>"</span>, ( | |
| <span class="hljs-string">"Please replace '<REPLACE WITH YOUR TOKEN>' with your Hugging Face Hub API token"</span> | |
| ) | |
| container = ContainerDefinition(image=llm_image, environment=environment) | |
| <span class="hljs-comment"># create Model with the container definition</span> | |
| model = Model.create( | |
| model_name=<span class="hljs-string">"mixtral-8x7b-neuronx-model"</span>, primary_container=container, execution_role_arn=role, region=REGION | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-4gejcg">After we have created the <code>Model</code> we need to define a deployment configuration. We will deploy the model with the <code>ml.inf2.48xlarge</code> instance type. vLLM will automatically distribute and shard the model across all Inferentia devices.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> sagemaker.core.resources <span class="hljs-keyword">import</span> EndpointConfig, ProductionVariant | |
| <span class="hljs-comment"># sagemaker config</span> | |
| instance_type = <span class="hljs-string">"ml.inf2.48xlarge"</span> | |
| health_check_timeout = <span class="hljs-number">3600</span> <span class="hljs-comment"># additional time to load the model</span> | |
| volume_size = <span class="hljs-number">512</span> <span class="hljs-comment"># size in GB of the EBS volume</span> | |
| <span class="hljs-comment"># create EndpointConfig</span> | |
| endpoint_config = EndpointConfig( | |
| endpoint_config_name=<span class="hljs-string">"mixtral-8x7b-endpoint-config"</span>, | |
| production_variants=[ | |
| ProductionVariant( | |
| model_name=model.model_name, | |
| instance_type=instance_type, | |
| initial_instance_count=<span class="hljs-number">1</span>, | |
| container_startup_health_check_timeout=health_check_timeout, | |
| volume_size=volume_size, | |
| environment=config, | |
| image_uri=llm_image, | |
| ) | |
| ], | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-aha0vi">We can now deploy the <code>Model</code> to an <code>Endpoint</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> sagemaker.core.resources <span class="hljs-keyword">import</span> Endpoint | |
| endpoint = Endpoint.create( | |
| endpoint_name=<span class="hljs-string">"mixtral-8x7b-neuronx-endpoint"</span>, | |
| endpoint_config_name=endpoint_config.endpoint_config_name, | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-qvkvmm">SageMaker will now create our endpoint and deploy the model to it. It takes around 15 minutes for deployment.</p> <p data-svelte-h="svelte-1nsplg8">After our endpoint is deployed we can run inference on it. We will use the <code>invoke</code> method from the endpoint to run inference on our endpoint.</p> <p data-svelte-h="svelte-7vzs06">The endpoint supports the Messages API, which is fully compatible with the OpenAI Chat Completion API. The Messages API allows us to interact with the model in a conversational way. We can define the role of the message and the content. The role can be either <code>system</code>,<code>assistant</code> or <code>user</code>. The <code>system</code> role is used to provide context to the model and the <code>user</code> role is used to ask questions or provide input to the model.</p> <p data-svelte-h="svelte-iif644">Parameters can be defined as separate attributes of the payload. Check out the chat completion <a href="https://platform.openai.com/docs/api-reference/chat/create" rel="nofollow">documentation</a> to find supported parameters.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Prompt to generate</span> | |
| messages = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"You are a helpful assistant."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What is deep learning in one sentence?"</span>}, | |
| ]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1pq3qhh">Okay lets test it.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> json | |
| <span class="hljs-comment"># Generation arguments https://platform.openai.com/docs/api-reference/chat/create</span> | |
| result = endpoint.invoke( | |
| body=json.dumps( | |
| { | |
| <span class="hljs-string">"messages"</span>: messages, | |
| <span class="hljs-string">"max_tokens"</span>: <span class="hljs-number">50</span>, | |
| <span class="hljs-string">"top_k"</span>: <span class="hljs-number">50</span>, | |
| <span class="hljs-string">"top_p"</span>: <span class="hljs-number">0.9</span>, | |
| <span class="hljs-string">"temperature"</span>: <span class="hljs-number">0.7</span>, | |
| } | |
| ), | |
| content_type=<span class="hljs-string">"application/json"</span>, | |
| ) | |
| output = json.loads(result.body.read().decode(<span class="hljs-string">"utf-8"</span>)) | |
| message = output[<span class="hljs-string">"choices"</span>][<span class="hljs-number">0</span>][<span class="hljs-string">"message"</span>] | |
| <span class="hljs-keyword">assert</span> message[<span class="hljs-string">"role"</span>] == <span class="hljs-string">"assistant"</span> | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"Generated response:"</span>, message[<span class="hljs-string">"content"</span>])<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="4-clean-up" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#4-clean-up"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>4. Clean up</span></h2> <p data-svelte-h="svelte-100mxno">To clean up, we can delete the model and endpoint.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model.delete() | |
| endpoint_config.delete() | |
| endpoint.delete()<!-- HTML_TAG_END --></pre></div> <p></p> | |
| <script> | |
| { | |
| __sveltekit_19pvy0q = { | |
| assets: "/docs/optimum.neuron/pr_1097/en", | |
| base: "/docs/optimum.neuron/pr_1097/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/optimum.neuron/pr_1097/en/_app/immutable/entry/start.82e2ff17.js"), | |
| import("/docs/optimum.neuron/pr_1097/en/_app/immutable/entry/app.9b0c0103.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 20], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 45.7 kB
- Xet hash:
- 1f55481c0cc56574f5ed796d69ce1e26366013867ad9ae7dcec89014af1775f1
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.