Buckets:

rtrm's picture
download
raw
86.5 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Deploy models to Amazon SageMaker&quot;,&quot;local&quot;:&quot;deploy-models-to-amazon-sagemaker&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Installation and setup&quot;,&quot;local&quot;:&quot;installation-and-setup&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Deploy a 🤗 Transformers model trained in SageMaker&quot;,&quot;local&quot;:&quot;deploy-a--transformers-model-trained-in-sagemaker&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Deploy after training&quot;,&quot;local&quot;:&quot;deploy-after-training&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Deploy with model_data&quot;,&quot;local&quot;:&quot;deploy-with-modeldata&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Create a model artifact for deployment&quot;,&quot;local&quot;:&quot;create-a-model-artifact-for-deployment&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Deploy a model from the 🤗 Hub&quot;,&quot;local&quot;:&quot;deploy-a-model-from-the--hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Run batch transform with 🤗 Transformers and SageMaker&quot;,&quot;local&quot;:&quot;run-batch-transform-with--transformers-and-sagemaker&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Deploy an LLM to SageMaker using TGI&quot;,&quot;local&quot;:&quot;deploy-an-llm-to-sagemaker-using-tgi&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;User defined code and modules&quot;,&quot;local&quot;:&quot;user-defined-code-and-modules&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/sagemaker/pr_2188/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/sagemaker/pr_2188/en/_app/immutable/entry/start.48a18f09.js">
<link rel="modulepreload" href="/docs/sagemaker/pr_2188/en/_app/immutable/chunks/scheduler.aec39e6a.js">
<link rel="modulepreload" href="/docs/sagemaker/pr_2188/en/_app/immutable/chunks/singletons.8e7a9ddc.js">
<link rel="modulepreload" href="/docs/sagemaker/pr_2188/en/_app/immutable/chunks/paths.a4e52f32.js">
<link rel="modulepreload" href="/docs/sagemaker/pr_2188/en/_app/immutable/entry/app.8481cdda.js">
<link rel="modulepreload" href="/docs/sagemaker/pr_2188/en/_app/immutable/chunks/preload-helper.382cad4e.js">
<link rel="modulepreload" href="/docs/sagemaker/pr_2188/en/_app/immutable/chunks/index.4ee0a2d0.js">
<link rel="modulepreload" href="/docs/sagemaker/pr_2188/en/_app/immutable/nodes/0.52c1f0fb.js">
<link rel="modulepreload" href="/docs/sagemaker/pr_2188/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/sagemaker/pr_2188/en/_app/immutable/nodes/18.12508964.js">
<link rel="modulepreload" href="/docs/sagemaker/pr_2188/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.3c60bfa3.js">
<link rel="modulepreload" href="/docs/sagemaker/pr_2188/en/_app/immutable/chunks/CodeBlock.543f5448.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Deploy models to Amazon SageMaker&quot;,&quot;local&quot;:&quot;deploy-models-to-amazon-sagemaker&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Installation and setup&quot;,&quot;local&quot;:&quot;installation-and-setup&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Deploy a 🤗 Transformers model trained in SageMaker&quot;,&quot;local&quot;:&quot;deploy-a--transformers-model-trained-in-sagemaker&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Deploy after training&quot;,&quot;local&quot;:&quot;deploy-after-training&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Deploy with model_data&quot;,&quot;local&quot;:&quot;deploy-with-modeldata&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Create a model artifact for deployment&quot;,&quot;local&quot;:&quot;create-a-model-artifact-for-deployment&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Deploy a model from the 🤗 Hub&quot;,&quot;local&quot;:&quot;deploy-a-model-from-the--hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Run batch transform with 🤗 Transformers and SageMaker&quot;,&quot;local&quot;:&quot;run-batch-transform-with--transformers-and-sagemaker&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Deploy an LLM to SageMaker using TGI&quot;,&quot;local&quot;:&quot;deploy-an-llm-to-sagemaker-using-tgi&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;User defined code and modules&quot;,&quot;local&quot;:&quot;user-defined-code-and-modules&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="deploy-models-to-amazon-sagemaker" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deploy-models-to-amazon-sagemaker"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Deploy models to Amazon SageMaker</span></h1> <p data-svelte-h="svelte-ywypfi">Deploying a 🤗 Transformers models in SageMaker for inference is as easy as:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> sagemaker.huggingface <span class="hljs-keyword">import</span> HuggingFaceModel
<span class="hljs-comment"># create Hugging Face Model Class and deploy it as SageMaker endpoint</span>
huggingface_model = HuggingFaceModel(...).deploy()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-lx9xk7">This guide will show you how to deploy models with zero-code using the <a href="https://github.com/aws/sagemaker-huggingface-inference-toolkit" rel="nofollow">Inference Toolkit</a>. The Inference Toolkit builds on top of the <a href="https://huggingface.co/docs/transformers/main_classes/pipelines" rel="nofollow"><code>pipeline</code> feature</a> from 🤗 Transformers. Learn how to:</p> <ul data-svelte-h="svelte-lmxuwx"><li><a href="#installation-and-setup">Install and setup the Inference Toolkit</a>.</li> <li><a href="#deploy-a-transformer-model-trained-in-sagemaker">Deploy a 🤗 Transformers model trained in SageMaker</a>.</li> <li><a href="#deploy-a-model-from-the-hub">Deploy a 🤗 Transformers model from the Hugging Face [model Hub](https://huggingface.co/models)</a>.</li> <li><a href="#run-batch-transform-with-transformers-and-sagemaker">Run a Batch Transform Job using 🤗 Transformers and Amazon SageMaker</a>.</li> <li><a href="#user-defined-code-and-modules">Create a custom inference module</a>.</li></ul> <h2 class="relative group"><a id="installation-and-setup" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#installation-and-setup"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Installation and setup</span></h2> <p data-svelte-h="svelte-uia7yx">Before deploying a 🤗 Transformers model to SageMaker, you need to sign up for an AWS account. If you don’t have an AWS account yet, learn more <a href="https://docs.aws.amazon.com/sagemaker/latest/dg/gs-set-up.html" rel="nofollow">here</a>.</p> <p data-svelte-h="svelte-14pz1nf">Once you have an AWS account, get started using one of the following:</p> <ul data-svelte-h="svelte-nu9uzs"><li><a href="https://docs.aws.amazon.com/sagemaker/latest/dg/gs-studio-onboard.html" rel="nofollow">SageMaker Studio</a></li> <li><a href="https://docs.aws.amazon.com/sagemaker/latest/dg/gs-console.html" rel="nofollow">SageMaker notebook instance</a></li> <li>Local environment</li></ul> <p data-svelte-h="svelte-10vylvb">To start training locally, you need to setup an appropriate <a href="https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html" rel="nofollow">IAM role</a>.</p> <p data-svelte-h="svelte-183wm56">Upgrade to the latest <code>sagemaker</code> version.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install <span class="hljs-string">&#x27;sagemaker&lt;3.0.0&#x27;</span><!-- HTML_TAG_END --></pre></div> <blockquote data-svelte-h="svelte-p7runb"><p>[!WARNING][SageMaker Python SDK v3 has been recently released](<a href="https://github.com/aws/sagemaker-python-sdk" rel="nofollow">https://github.com/aws/sagemaker-python-sdk</a>), so unless specified otherwise, all the documentation and tutorials are still using the <a href="https://github.com/aws/sagemaker-python-sdk/tree/master-v2" rel="nofollow">SageMaker Python SDK v2</a>. We are actively working on updating all the tutorials and examples, but in the meantime make sure to install the SageMaker SDK as <code>pip install &quot;sagemaker&lt;3.0.0&quot;</code>.</p></blockquote> <p data-svelte-h="svelte-aqpf90"><strong>SageMaker environment</strong></p> <p data-svelte-h="svelte-1n86nit">Setup your SageMaker environment as shown below:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> sagemaker
sess = sagemaker.Session()
role = sagemaker.get_execution_role()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-bmcgfj"><em>Note: The execution role is only available when running a notebook within SageMaker. If you run <code>get_execution_role</code> in a notebook not on SageMaker, expect a <code>region</code> error.</em></p> <p data-svelte-h="svelte-12o7543"><strong>Local environment</strong></p> <p data-svelte-h="svelte-qjt50s">Setup your local environment as shown below:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> sagemaker
<span class="hljs-keyword">import</span> boto3
iam_client = boto3.client(<span class="hljs-string">&#x27;iam&#x27;</span>)
role = iam_client.get_role(RoleName=<span class="hljs-string">&#x27;role-name-of-your-iam-role-with-right-permissions&#x27;</span>)[<span class="hljs-string">&#x27;Role&#x27;</span>][<span class="hljs-string">&#x27;Arn&#x27;</span>]
sess = sagemaker.Session()<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="deploy-a--transformers-model-trained-in-sagemaker" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deploy-a--transformers-model-trained-in-sagemaker"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Deploy a 🤗 Transformers model trained in SageMaker</span></h2> <iframe width="700" height="394" src="https://www.youtube.com/embed/pfBGgSGnYLs" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen=""></iframe> <p data-svelte-h="svelte-gux9y">There are two ways to deploy your Hugging Face model trained in SageMaker:</p> <ul data-svelte-h="svelte-g9t1l6"><li>Deploy it after your training has finished.</li> <li>Deploy your saved model at a later time from S3 with the <code>model_data</code>.</li></ul> <p data-svelte-h="svelte-1kzoo7i">📓 Open the <a href="https://github.com/huggingface/notebooks/blob/main/sagemaker/10_deploy_model_from_s3/deploy_transformer_model_from_s3.ipynb" rel="nofollow">deploy_transformer_model_from_s3.ipynb notebook</a> for an example of how to deploy a model from S3 to SageMaker for inference.</p> <h3 class="relative group"><a id="deploy-after-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deploy-after-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Deploy after training</span></h3> <p data-svelte-h="svelte-1bnjjtb">To deploy your model directly after training, ensure all required files are saved in your training script, including the tokenizer and the model.</p> <p data-svelte-h="svelte-qrjq9z">If you use the Hugging Face <code>Trainer</code>, you can pass your tokenizer as an argument to the <code>Trainer</code>. It will be automatically saved when you call <code>trainer.save_model()</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> sagemaker.huggingface <span class="hljs-keyword">import</span> HuggingFace
<span class="hljs-comment">############ pseudo code start ############</span>
<span class="hljs-comment"># create Hugging Face Estimator for training</span>
huggingface_estimator = HuggingFace(....)
<span class="hljs-comment"># start the train job with our uploaded datasets as input</span>
huggingface_estimator.fit(...)
<span class="hljs-comment">############ pseudo code end ############</span>
<span class="hljs-comment"># deploy model to SageMaker Inference</span>
predictor = hf_estimator.deploy(initial_instance_count=<span class="hljs-number">1</span>, instance_type=<span class="hljs-string">&quot;ml.m5.xlarge&quot;</span>)
<span class="hljs-comment"># example request: you always need to define &quot;inputs&quot;</span>
data = {
<span class="hljs-string">&quot;inputs&quot;</span>: <span class="hljs-string">&quot;Camera - You are awarded a SiPix Digital Camera! call 09061221066 from landline. Delivery within 28 days.&quot;</span>
}
<span class="hljs-comment"># request</span>
predictor.predict(data)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1704hsx">After you run your request you can delete the endpoint as shown:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># delete endpoint</span>
predictor.delete_endpoint()<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="deploy-with-modeldata" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deploy-with-modeldata"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Deploy with model_data</span></h3> <p data-svelte-h="svelte-xplu1m">If you’ve already trained your model and want to deploy it at a later time, use the <code>model_data</code> argument to specify the location of your tokenizer and model weights.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> sagemaker.huggingface.model <span class="hljs-keyword">import</span> HuggingFaceModel
<span class="hljs-comment"># create Hugging Face Model Class</span>
huggingface_model = HuggingFaceModel(
model_data=<span class="hljs-string">&quot;s3://models/my-bert-model/model.tar.gz&quot;</span>, <span class="hljs-comment"># path to your trained SageMaker model</span>
role=role, <span class="hljs-comment"># IAM role with permissions to create an endpoint</span>
transformers_version=<span class="hljs-string">&quot;4.26&quot;</span>, <span class="hljs-comment"># Transformers version used</span>
pytorch_version=<span class="hljs-string">&quot;1.13&quot;</span>, <span class="hljs-comment"># PyTorch version used</span>
py_version=<span class="hljs-string">&#x27;py39&#x27;</span>, <span class="hljs-comment"># Python version used</span>
)
<span class="hljs-comment"># deploy model to SageMaker Inference</span>
predictor = huggingface_model.deploy(
initial_instance_count=<span class="hljs-number">1</span>,
instance_type=<span class="hljs-string">&quot;ml.m5.xlarge&quot;</span>
)
<span class="hljs-comment"># example request: you always need to define &quot;inputs&quot;</span>
data = {
<span class="hljs-string">&quot;inputs&quot;</span>: <span class="hljs-string">&quot;Camera - You are awarded a SiPix Digital Camera! call 09061221066 from landline. Delivery within 28 days.&quot;</span>
}
<span class="hljs-comment"># request</span>
predictor.predict(data)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-10df72h">After you run our request, you can delete the endpoint again with:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># delete endpoint</span>
predictor.delete_endpoint()<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="create-a-model-artifact-for-deployment" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#create-a-model-artifact-for-deployment"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Create a model artifact for deployment</span></h3> <p data-svelte-h="svelte-1bx7pn1">For later deployment, you can create a <code>model.tar.gz</code> file that contains all the required files, such as:</p> <ul data-svelte-h="svelte-1yevczk"><li><code>pytorch_model.bin</code></li> <li><code>tf_model.h5</code></li> <li><code>tokenizer.json</code></li> <li><code>tokenizer_config.json</code></li></ul> <p data-svelte-h="svelte-1fj2qwz">For example, your file should look like this:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model.tar.gz/
|- pytorch_model.bin
|- vocab.txt
|- tokenizer_config.json
|- config.json
|- special_tokens_map.json<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-6vx79s">Create your own <code>model.tar.gz</code> from a model from the 🤗 Hub:</p> <ol data-svelte-h="svelte-1oe4mbx"><li>Download a model:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->git xet install
git <span class="hljs-built_in">clone</span> git@hf.co:{repository}<!-- HTML_TAG_END --></pre></div> <ol start="2" data-svelte-h="svelte-1dpsw5r"><li>Create a <code>tar</code> file:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">cd</span> {repository}
tar zcvf model.tar.gz *<!-- HTML_TAG_END --></pre></div> <ol start="3" data-svelte-h="svelte-1jup9fl"><li>Upload <code>model.tar.gz</code> to S3:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->aws s3 <span class="hljs-built_in">cp</span> model.tar.gz &lt;s3://{my-s3-path}&gt;<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-fiqqyd">Now you can provide the S3 URI to the <code>model_data</code> argument to deploy your model later.</p> <h2 class="relative group"><a id="deploy-a-model-from-the--hub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deploy-a-model-from-the--hub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Deploy a model from the 🤗 Hub</span></h2> <iframe width="700" height="394" src="https://www.youtube.com/embed/l9QZuazbzWM" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen=""></iframe> <p data-svelte-h="svelte-1rqi6h3">To deploy a model directly from the 🤗 Hub to SageMaker, define two environment variables when you create a <code>HuggingFaceModel</code>:</p> <ul data-svelte-h="svelte-24bif2"><li><code>HF_MODEL_ID</code> defines the model ID which is automatically loaded from <a href="http://huggingface.co/models" rel="nofollow">huggingface.co/models</a> when you create a SageMaker endpoint. Access 10,000+ models on he 🤗 Hub through this environment variable.</li> <li><code>HF_TASK</code> defines the task for the 🤗 Transformers <code>pipeline</code>. A complete list of tasks can be found <a href="https://huggingface.co/docs/transformers/main_classes/pipelines" rel="nofollow">here</a>.</li></ul> <blockquote data-svelte-h="svelte-1me1jis"><p>⚠️ ** Pipelines are not optimized for parallelism (multi-threading) and tend to consume a lot of RAM. For example, on a GPU-based instance, the pipeline operates on a single vCPU. When this vCPU becomes saturated with the inference requests preprocessing, it can create a bottleneck, preventing the GPU from being fully utilized for model inference. Learn more <a href="https://huggingface.co/docs/transformers/en/pipeline_webserver#using-pipelines-for-a-webserver" rel="nofollow">here</a></p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> sagemaker.huggingface.model <span class="hljs-keyword">import</span> HuggingFaceModel
<span class="hljs-comment"># Hub model configuration &lt;https://huggingface.co/models&gt;</span>
hub = {
<span class="hljs-string">&#x27;HF_MODEL_ID&#x27;</span>:<span class="hljs-string">&#x27;distilbert-base-uncased-distilled-squad&#x27;</span>, <span class="hljs-comment"># model_id from hf.co/models</span>
<span class="hljs-string">&#x27;HF_TASK&#x27;</span>:<span class="hljs-string">&#x27;question-answering&#x27;</span> <span class="hljs-comment"># NLP task you want to use for predictions</span>
}
<span class="hljs-comment"># create Hugging Face Model Class</span>
huggingface_model = HuggingFaceModel(
env=hub, <span class="hljs-comment"># configuration for loading model from Hub</span>
role=role, <span class="hljs-comment"># IAM role with permissions to create an endpoint</span>
transformers_version=<span class="hljs-string">&quot;4.26&quot;</span>, <span class="hljs-comment"># Transformers version used</span>
pytorch_version=<span class="hljs-string">&quot;1.13&quot;</span>, <span class="hljs-comment"># PyTorch version used</span>
py_version=<span class="hljs-string">&#x27;py39&#x27;</span>, <span class="hljs-comment"># Python version used</span>
)
<span class="hljs-comment"># deploy model to SageMaker Inference</span>
predictor = huggingface_model.deploy(
initial_instance_count=<span class="hljs-number">1</span>,
instance_type=<span class="hljs-string">&quot;ml.m5.xlarge&quot;</span>
)
<span class="hljs-comment"># example request: you always need to define &quot;inputs&quot;</span>
data = {
<span class="hljs-string">&quot;inputs&quot;</span>: {
<span class="hljs-string">&quot;question&quot;</span>: <span class="hljs-string">&quot;What is used for inference?&quot;</span>,
<span class="hljs-string">&quot;context&quot;</span>: <span class="hljs-string">&quot;My Name is Philipp and I live in Nuremberg. This model is used with sagemaker for inference.&quot;</span>
}
}
<span class="hljs-comment"># request</span>
predictor.predict(data)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-10df72h">After you run our request, you can delete the endpoint again with:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># delete endpoint</span>
predictor.delete_endpoint()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-116ugu4">📓 Open the <a href="https://github.com/huggingface/notebooks/blob/main/sagemaker/11_deploy_model_from_hf_hub/deploy_transformer_model_from_hf_hub.ipynb" rel="nofollow">deploy_transformer_model_from_hf_hub.ipynb notebook</a> for an example of how to deploy a model from the 🤗 Hub to SageMaker for inference.</p> <h2 class="relative group"><a id="run-batch-transform-with--transformers-and-sagemaker" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#run-batch-transform-with--transformers-and-sagemaker"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Run batch transform with 🤗 Transformers and SageMaker</span></h2> <iframe width="700" height="394" src="https://www.youtube.com/embed/lnTixz0tUBg" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen=""></iframe> <p data-svelte-h="svelte-6zcket">After training a model, you can use <a href="https://docs.aws.amazon.com/sagemaker/latest/dg/how-it-works-batch.html" rel="nofollow">SageMaker batch transform</a> to perform inference with the model. Batch transform accepts your inference data as an S3 URI and then SageMaker will take care of downloading the data, running the prediction, and uploading the results to S3. For more details about batch transform, take a look <a href="https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform.html" rel="nofollow">here</a>.</p> <p data-svelte-h="svelte-11i511d">⚠️ The Hugging Face Inference DLC currently only supports <code>.jsonl</code> for batch transform due to the complex structure of textual data.</p> <p data-svelte-h="svelte-1aa5m67"><em>Note: Make sure your <code>inputs</code> fit the <code>max_length</code> of the model during preprocessing.</em></p> <p data-svelte-h="svelte-h98oit">If you trained a model using the Hugging Face Estimator, call the <code>transformer()</code> method to create a transform job for a model based on the training job (see <a href="https://sagemaker.readthedocs.io/en/stable/overview.html#sagemaker-batch-transform" rel="nofollow">here</a> for more details):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->batch_job = huggingface_estimator.transformer(
instance_count=<span class="hljs-number">1</span>,
instance_type=<span class="hljs-string">&#x27;ml.p3.2xlarge&#x27;</span>,
strategy=<span class="hljs-string">&#x27;SingleRecord&#x27;</span>)
batch_job.transform(
data=<span class="hljs-string">&#x27;s3://s3-uri-to-batch-data&#x27;</span>,
content_type=<span class="hljs-string">&#x27;application/json&#x27;</span>,
split_type=<span class="hljs-string">&#x27;Line&#x27;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ttbskl">If you want to run your batch transform job later or with a model from the 🤗 Hub, create a <code>HuggingFaceModel</code> instance and then call the <code>transformer()</code> method:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> sagemaker.huggingface.model <span class="hljs-keyword">import</span> HuggingFaceModel
<span class="hljs-comment"># Hub model configuration &lt;https://huggingface.co/models&gt;</span>
hub = {
<span class="hljs-string">&#x27;HF_MODEL_ID&#x27;</span>:<span class="hljs-string">&#x27;distilbert/distilbert-base-uncased-finetuned-sst-2-english&#x27;</span>,
<span class="hljs-string">&#x27;HF_TASK&#x27;</span>:<span class="hljs-string">&#x27;text-classification&#x27;</span>
}
<span class="hljs-comment"># create Hugging Face Model Class</span>
huggingface_model = HuggingFaceModel(
env=hub, <span class="hljs-comment"># configuration for loading model from Hub</span>
role=role, <span class="hljs-comment"># IAM role with permissions to create an endpoint</span>
transformers_version=<span class="hljs-string">&quot;4.26&quot;</span>, <span class="hljs-comment"># Transformers version used</span>
pytorch_version=<span class="hljs-string">&quot;1.13&quot;</span>, <span class="hljs-comment"># PyTorch version used</span>
py_version=<span class="hljs-string">&#x27;py39&#x27;</span>, <span class="hljs-comment"># Python version used</span>
)
<span class="hljs-comment"># create transformer to run a batch job</span>
batch_job = huggingface_model.transformer(
instance_count=<span class="hljs-number">1</span>,
instance_type=<span class="hljs-string">&#x27;ml.p3.2xlarge&#x27;</span>,
strategy=<span class="hljs-string">&#x27;SingleRecord&#x27;</span>
)
<span class="hljs-comment"># starts batch transform job and uses S3 data as input</span>
batch_job.transform(
data=<span class="hljs-string">&#x27;s3://sagemaker-s3-demo-test/samples/input.jsonl&#x27;</span>,
content_type=<span class="hljs-string">&#x27;application/json&#x27;</span>,
split_type=<span class="hljs-string">&#x27;Line&#x27;</span>
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1pml3h7">The <code>input.jsonl</code> looks like this:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;inputs&quot;</span><span class="hljs-punctuation">:</span><span class="hljs-string">&quot;this movie is terrible&quot;</span><span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;inputs&quot;</span><span class="hljs-punctuation">:</span><span class="hljs-string">&quot;this movie is amazing&quot;</span><span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;inputs&quot;</span><span class="hljs-punctuation">:</span><span class="hljs-string">&quot;SageMaker is pretty cool&quot;</span><span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;inputs&quot;</span><span class="hljs-punctuation">:</span><span class="hljs-string">&quot;SageMaker is pretty cool&quot;</span><span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;inputs&quot;</span><span class="hljs-punctuation">:</span><span class="hljs-string">&quot;this movie is terrible&quot;</span><span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;inputs&quot;</span><span class="hljs-punctuation">:</span><span class="hljs-string">&quot;this movie is amazing&quot;</span><span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1uayvx5">📓 Open the <a href="https://github.com/huggingface/notebooks/blob/main/sagemaker/12_batch_transform_inference/sagemaker-notebook.ipynb" rel="nofollow">sagemaker-notebook.ipynb notebook</a> for an example of how to run a batch transform job for inference.</p> <h2 class="relative group"><a id="deploy-an-llm-to-sagemaker-using-tgi" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deploy-an-llm-to-sagemaker-using-tgi"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Deploy an LLM to SageMaker using TGI</span></h2> <p data-svelte-h="svelte-oqngf7">If you are interested in using a high-performance serving container for LLMs, you can use the Hugging Face TGI container. This utilizes the <a href="https://github.com/huggingface/text-generation-inference" rel="nofollow">Text Generation Inference</a> library. A list of compatible models can be found <a href="https://huggingface.co/docs/text-generation-inference/supported_models#supported-models" rel="nofollow">here</a>.</p> <p data-svelte-h="svelte-kb3ib5">First, make sure that the latest version of SageMaker SDK is installed:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install sagemaker&gt;=2.231.0<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1alenu9">Then, we import the SageMaker Python SDK and instantiate a sagemaker_session to find the current region and execution role.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> sagemaker
<span class="hljs-keyword">from</span> sagemaker.huggingface <span class="hljs-keyword">import</span> HuggingFaceModel, get_huggingface_llm_image_uri
<span class="hljs-keyword">import</span> time
sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name
role = sagemaker.get_execution_role()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-wr9zty">Next we retrieve the LLM image URI. We use the helper function get_huggingface_llm_image_uri() to generate the appropriate image URI for the Hugging Face Large Language Model (LLM) inference. The function takes a required parameter backend and several optional parameters. The backend specifies the type of backend to use for the model: “huggingface” refers to using Hugging Face TGI backend.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->image_uri = get_huggingface_llm_image_uri(
backend=<span class="hljs-string">&quot;huggingface&quot;</span>,
region=region
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-pyh7bd">Now that we have the image uri, the next step is to configure the model object. We specify a unique name, the image_uri for the managed TGI container, and the execution role for the endpoint. Additionally, we specify a number of environment variables including the <code>HF_MODEL_ID</code> which corresponds to the model from the HuggingFace Hub that will be deployed, and the <code>HF_TASK</code> which configures the inference task to be performed by the model.</p> <p data-svelte-h="svelte-w1qvv3">You should also define <code>SM_NUM_GPUS</code>, which specifies the tensor parallelism degree of the model. Tensor parallelism can be used to split the model across multiple GPUs, which is necessary when working with LLMs that are too big for a single GPU. To learn more about tensor parallelism with inference, see our previous blog post. Here, you should set <code>SM_NUM_GPUS</code> to the number of available GPUs on your selected instance type. For example, in this tutorial, we set <code>SM_NUM_GPUS</code> to 4 because our selected instance type ml.g4dn.12xlarge has 4 available GPUs.</p> <p data-svelte-h="svelte-2gsa94">Note that you can optionally reduce the memory and computational footprint of the model by setting the <code>HF_MODEL_QUANTIZE</code> environment variable to <code>true</code>, but this lower weight precision could affect the quality of the output for some models.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model_name = <span class="hljs-string">&quot;llama-3-1-8b-instruct&quot;</span> + time.strftime(<span class="hljs-string">&quot;%Y-%m-%d-%H-%M-%S&quot;</span>, time.gmtime())
hub = {
<span class="hljs-string">&#x27;HF_MODEL_ID&#x27;</span>:<span class="hljs-string">&#x27;meta-llama/Llama-3.1-8B-Instruct&#x27;</span>,
<span class="hljs-string">&#x27;SM_NUM_GPUS&#x27;</span>:<span class="hljs-string">&#x27;1&#x27;</span>,
<span class="hljs-string">&#x27;HUGGING_FACE_HUB_TOKEN&#x27;</span>: <span class="hljs-string">&#x27;&lt;REPLACE WITH YOUR TOKEN&gt;&#x27;</span>,
}
<span class="hljs-keyword">assert</span> hub[<span class="hljs-string">&#x27;HUGGING_FACE_HUB_TOKEN&#x27;</span>] != <span class="hljs-string">&#x27;&lt;REPLACE WITH YOUR TOKEN&gt;&#x27;</span>, <span class="hljs-string">&quot;You have to provide a token.&quot;</span>
model = HuggingFaceModel(
name=model_name,
env=hub,
role=role,
image_uri=image_uri
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1dpe0gw">Next, we invoke the deploy method to deploy the model.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->predictor = model.deploy(
initial_instance_count=<span class="hljs-number">1</span>,
instance_type=<span class="hljs-string">&quot;ml.g5.2xlarge&quot;</span>,
endpoint_name=model_name
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-16le46z">Once the model is deployed, we can invoke it to generate text. We pass an input prompt and run the predict method to generate a text response from the LLM running in the TGI container.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->input_data = {
<span class="hljs-string">&quot;inputs&quot;</span>: <span class="hljs-string">&quot;The diamondback terrapin was the first reptile to&quot;</span>,
<span class="hljs-string">&quot;parameters&quot;</span>: {
<span class="hljs-string">&quot;do_sample&quot;</span>: <span class="hljs-literal">True</span>,
<span class="hljs-string">&quot;max_new_tokens&quot;</span>: <span class="hljs-number">100</span>,
<span class="hljs-string">&quot;temperature&quot;</span>: <span class="hljs-number">0.7</span>,
<span class="hljs-string">&quot;watermark&quot;</span>: <span class="hljs-literal">True</span>
}
}
predictor.predict(input_data)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-v1a1yw">We receive the following auto-generated text response:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->[{<span class="hljs-string">&#x27;generated_text&#x27;</span>: <span class="hljs-string">&#x27;The diamondback terrapin was the first reptile to make the list, followed by the American alligator, the American crocodile, and the American box turtle. The polecat, a ferret-like animal, and the skunk rounded out the list, both having gained their slots because they have proven to be particularly dangerous to humans.\n\nCalifornians also seemed to appreciate the new list, judging by the comments left after the election.\n\n“This is fantastic,” one commenter declared.\n\n“California is a very&#x27;</span>}]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1vaiyhh">Once we are done experimenting, we delete the endpoint and the model resources.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->predictor.delete_model()
predictor.delete_endpoint()<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="user-defined-code-and-modules" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#user-defined-code-and-modules"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>User defined code and modules</span></h2> <p data-svelte-h="svelte-1ryqcdi">The Hugging Face Inference Toolkit allows the user to override the default methods of the <code>HuggingFaceHandlerService</code>. You will need to create a folder named <code>code/</code> with an <code>inference.py</code> file in it. See <a href="#create-a-model-artifact-for-deployment">here</a> for more details on how to archive your model artifacts. For example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model.tar.gz/
|- pytorch_model.bin
|- ....
|- code/
|- inference.py
|- requirements.txt <!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1jy9y33">The <code>inference.py</code> file contains your custom inference module, and the <code>requirements.txt</code> file contains additional dependencies that should be added. The custom module can override the following methods:</p> <ul data-svelte-h="svelte-7ouasn"><li><code>model_fn(model_dir)</code> overrides the default method for loading a model. The return value <code>model</code> will be used in <code>predict</code> for predictions. <code>predict</code> receives argument the <code>model_dir</code>, the path to your unzipped <code>model.tar.gz</code>.</li> <li><code>transform_fn(model, data, content_type, accept_type)</code> overrides the default transform function with your custom implementation. You will need to implement your own <code>preprocess</code>, <code>predict</code> and <code>postprocess</code> steps in the <code>transform_fn</code>. This method can’t be combined with <code>input_fn</code>, <code>predict_fn</code> or <code>output_fn</code> mentioned below.</li> <li><code>input_fn(input_data, content_type)</code> overrides the default method for preprocessing. The return value <code>data</code> will be used in <code>predict</code> for predictions. The inputs are:
<ul><li><code>input_data</code> is the raw body of your request.</li> <li><code>content_type</code> is the content type from the request header.</li></ul></li> <li><code>predict_fn(processed_data, model)</code> overrides the default method for predictions. The return value <code>predictions</code> will be used in <code>postprocess</code>. The input is <code>processed_data</code>, the result from <code>preprocess</code>.</li> <li><code>output_fn(prediction, accept)</code> overrides the default method for postprocessing. The return value <code>result</code> will be the response of your request (e.g.<code>JSON</code>). The inputs are:
<ul><li><code>predictions</code> is the result from <code>predict</code>.</li> <li><code>accept</code> is the return accept type from the HTTP Request, e.g. <code>application/json</code>.</li></ul></li></ul> <p data-svelte-h="svelte-d9i7vl">Here is an example of a custom inference module with <code>model_fn</code>, <code>input_fn</code>, <code>predict_fn</code>, and <code>output_fn</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> sagemaker_huggingface_inference_toolkit <span class="hljs-keyword">import</span> decoder_encoder
<span class="hljs-keyword">def</span> <span class="hljs-title function_">model_fn</span>(<span class="hljs-params">model_dir</span>):
<span class="hljs-comment"># implement custom code to load the model</span>
loaded_model = ...
<span class="hljs-keyword">return</span> loaded_model
<span class="hljs-keyword">def</span> <span class="hljs-title function_">input_fn</span>(<span class="hljs-params">input_data, content_type</span>):
<span class="hljs-comment"># decode the input data (e.g. JSON string -&gt; dict)</span>
data = decoder_encoder.decode(input_data, content_type)
<span class="hljs-keyword">return</span> data
<span class="hljs-keyword">def</span> <span class="hljs-title function_">predict_fn</span>(<span class="hljs-params">data, model</span>):
<span class="hljs-comment"># call your custom model with the data</span>
outputs = model(data , ... )
<span class="hljs-keyword">return</span> predictions
<span class="hljs-keyword">def</span> <span class="hljs-title function_">output_fn</span>(<span class="hljs-params">prediction, accept</span>):
<span class="hljs-comment"># convert the model output to the desired output format (e.g. dict -&gt; JSON string)</span>
response = decoder_encoder.encode(prediction, accept)
<span class="hljs-keyword">return</span> response<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1cih48j">Customize your inference module with only <code>model_fn</code> and <code>transform_fn</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> sagemaker_huggingface_inference_toolkit <span class="hljs-keyword">import</span> decoder_encoder
<span class="hljs-keyword">def</span> <span class="hljs-title function_">model_fn</span>(<span class="hljs-params">model_dir</span>):
<span class="hljs-comment"># implement custom code to load the model</span>
loaded_model = ...
<span class="hljs-keyword">return</span> loaded_model
<span class="hljs-keyword">def</span> <span class="hljs-title function_">transform_fn</span>(<span class="hljs-params">model, input_data, content_type, accept</span>):
<span class="hljs-comment"># decode the input data (e.g. JSON string -&gt; dict)</span>
data = decoder_encoder.decode(input_data, content_type)
<span class="hljs-comment"># call your custom model with the data</span>
outputs = model(data , ... )
<span class="hljs-comment"># convert the model output to the desired output format (e.g. dict -&gt; JSON string)</span>
response = decoder_encoder.encode(output, accept)
<span class="hljs-keyword">return</span> response<!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hub-docs/blob/main/docs/sagemaker/source/tutorials/sagemaker-sdk/deploy-sagemaker-sdk.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_16a821l = {
assets: "/docs/sagemaker/pr_2188/en",
base: "/docs/sagemaker/pr_2188/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/sagemaker/pr_2188/en/_app/immutable/entry/start.48a18f09.js"),
import("/docs/sagemaker/pr_2188/en/_app/immutable/entry/app.8481cdda.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 18],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
86.5 kB
·
Xet hash:
2d8ba0d0eed4206d985184899c3dfd2148def7f7998bc400128280a395bda64e

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.