Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Export to TorchScript","local":"export-to-torchscript","sections":[{"title":"TorchScript flag and tied weights","local":"torchscript-flag-and-tied-weights","sections":[],"depth":2},{"title":"Dummy inputs and standard lengths","local":"dummy-inputs-and-standard-lengths","sections":[],"depth":2},{"title":"Using TorchScript in Python","local":"using-torchscript-in-python","sections":[{"title":"Saving a model","local":"saving-a-model","sections":[],"depth":3},{"title":"Loading a model","local":"loading-a-model","sections":[],"depth":3},{"title":"Using a traced model for inference","local":"using-a-traced-model-for-inference","sections":[],"depth":3}],"depth":2},{"title":"Deploy Hugging Face TorchScript models to AWS with the Neuron SDK","local":"deploy-hugging-face-torchscript-models-to-aws-with-the-neuron-sdk","sections":[{"title":"Implications","local":"implications","sections":[],"depth":3},{"title":"Dependencies","local":"dependencies","sections":[],"depth":3},{"title":"Converting a model for AWS Neuron","local":"converting-a-model-for-aws-neuron","sections":[],"depth":3}],"depth":2}],"depth":1}"> | |
| <link href="/docs/transformers/pr_33913/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/entry/start.b67f883f.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/scheduler.25b97de1.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/singletons.62a184e0.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/index.e188933d.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/paths.51881b9e.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/entry/app.e436b1f2.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/index.d9030fc9.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/nodes/0.05e395f5.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/nodes/451.ef775dcc.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/Tip.baa67368.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/CodeBlock.e6cd0d95.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/EditOnGithub.91d95064.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Export to TorchScript","local":"export-to-torchscript","sections":[{"title":"TorchScript flag and tied weights","local":"torchscript-flag-and-tied-weights","sections":[],"depth":2},{"title":"Dummy inputs and standard lengths","local":"dummy-inputs-and-standard-lengths","sections":[],"depth":2},{"title":"Using TorchScript in Python","local":"using-torchscript-in-python","sections":[{"title":"Saving a model","local":"saving-a-model","sections":[],"depth":3},{"title":"Loading a model","local":"loading-a-model","sections":[],"depth":3},{"title":"Using a traced model for inference","local":"using-a-traced-model-for-inference","sections":[],"depth":3}],"depth":2},{"title":"Deploy Hugging Face TorchScript models to AWS with the Neuron SDK","local":"deploy-hugging-face-torchscript-models-to-aws-with-the-neuron-sdk","sections":[{"title":"Implications","local":"implications","sections":[],"depth":3},{"title":"Dependencies","local":"dependencies","sections":[],"depth":3},{"title":"Converting a model for AWS Neuron","local":"converting-a-model-for-aws-neuron","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="export-to-torchscript" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#export-to-torchscript"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Export to TorchScript</span></h1> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-3gxwq2">This is the very beginning of our experiments with TorchScript and we are still | |
| exploring its capabilities with variable-input-size models. It is a focus of interest to | |
| us and we will deepen our analysis in upcoming releases, with more code examples, a more | |
| flexible implementation, and benchmarks comparing Python-based codes with compiled | |
| TorchScript.</p></div> <p data-svelte-h="svelte-16vv52d">According to the <a href="https://pytorch.org/docs/stable/jit.html" rel="nofollow">TorchScript documentation</a>:</p> <blockquote data-svelte-h="svelte-143rw8d"><p>TorchScript is a way to create serializable and optimizable models from PyTorch code.</p></blockquote> <p data-svelte-h="svelte-1ntf3wg">There are two PyTorch modules, <a href="https://pytorch.org/docs/stable/jit.html" rel="nofollow">JIT and | |
| TRACE</a>, that allow developers to export their | |
| models to be reused in other programs like efficiency-oriented C++ programs.</p> <p data-svelte-h="svelte-3e4o7k">We provide an interface that allows you to export 🤗 Transformers models to TorchScript | |
| so they can be reused in a different environment than PyTorch-based Python programs. | |
| Here, we explain how to export and use our models using TorchScript.</p> <p data-svelte-h="svelte-1mmf9kh">Exporting a model requires two things:</p> <ul data-svelte-h="svelte-1yusujd"><li>model instantiation with the <code>torchscript</code> flag</li> <li>a forward pass with dummy inputs</li></ul> <p data-svelte-h="svelte-9awfof">These necessities imply several things developers should be careful about as detailed | |
| below.</p> <h2 class="relative group"><a id="torchscript-flag-and-tied-weights" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#torchscript-flag-and-tied-weights"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>TorchScript flag and tied weights</span></h2> <p data-svelte-h="svelte-qq8y9a">The <code>torchscript</code> flag is necessary because most of the 🤗 Transformers language models | |
| have tied weights between their <code>Embedding</code> layer and their <code>Decoding</code> layer. | |
| TorchScript does not allow you to export models that have tied weights, so it is | |
| necessary to untie and clone the weights beforehand.</p> <p data-svelte-h="svelte-rijh7h">Models instantiated with the <code>torchscript</code> flag have their <code>Embedding</code> layer and | |
| <code>Decoding</code> layer separated, which means that they should not be trained down the line. | |
| Training would desynchronize the two layers, leading to unexpected results.</p> <p data-svelte-h="svelte-1cg0vrq">This is not the case for models that do not have a language model head, as those do not | |
| have tied weights. These models can be safely exported without the <code>torchscript</code> flag.</p> <h2 class="relative group"><a id="dummy-inputs-and-standard-lengths" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dummy-inputs-and-standard-lengths"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dummy inputs and standard lengths</span></h2> <p data-svelte-h="svelte-1an8gt1">The dummy inputs are used for a models forward pass. While the inputs’ values are | |
| propagated through the layers, PyTorch keeps track of the different operations executed | |
| on each tensor. These recorded operations are then used to create the <em>trace</em> of the | |
| model.</p> <p data-svelte-h="svelte-1pagcqs">The trace is created relative to the inputs’ dimensions. It is therefore constrained by | |
| the dimensions of the dummy input, and will not work for any other sequence length or | |
| batch size. When trying with a different size, the following error is raised:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->`The expanded <span class="hljs-built_in">size</span> of the tensor (<span class="hljs-number">3</span>) must match the existing <span class="hljs-built_in">size</span> (<span class="hljs-number">7</span>) at non-singleton <span class="hljs-keyword">dimension</span> <span class="hljs-number">2</span>`<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-e7h40q">We recommended you trace the model with a dummy input size at least as large as the | |
| largest input that will be fed to the model during inference. Padding can help fill the | |
| missing values. However, since the model is traced with a larger input size, the | |
| dimensions of the matrix will also be large, resulting in more calculations.</p> <p data-svelte-h="svelte-k3vdvp">Be careful of the total number of operations done on each input and follow the | |
| performance closely when exporting varying sequence-length models.</p> <h2 class="relative group"><a id="using-torchscript-in-python" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-torchscript-in-python"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using TorchScript in Python</span></h2> <p data-svelte-h="svelte-3ddxtn">This section demonstrates how to save and load models as well as how to use the trace | |
| for inference.</p> <h3 class="relative group"><a id="saving-a-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#saving-a-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Saving a model</span></h3> <p data-svelte-h="svelte-1y405n0">To export a <code>BertModel</code> with TorchScript, instantiate <code>BertModel</code> from the <code>BertConfig</code> | |
| class and then save it to disk under the filename <code>traced_bert.pt</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BertModel, BertTokenizer, BertConfig | |
| <span class="hljs-keyword">import</span> torch | |
| enc = BertTokenizer.from_pretrained(<span class="hljs-string">"google-bert/bert-base-uncased"</span>) | |
| <span class="hljs-comment"># Tokenizing input text</span> | |
| text = <span class="hljs-string">"[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"</span> | |
| tokenized_text = enc.tokenize(text) | |
| <span class="hljs-comment"># Masking one of the input tokens</span> | |
| masked_index = <span class="hljs-number">8</span> | |
| tokenized_text[masked_index] = <span class="hljs-string">"[MASK]"</span> | |
| indexed_tokens = enc.convert_tokens_to_ids(tokenized_text) | |
| segments_ids = [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>] | |
| <span class="hljs-comment"># Creating a dummy input</span> | |
| tokens_tensor = torch.tensor([indexed_tokens]) | |
| segments_tensors = torch.tensor([segments_ids]) | |
| dummy_input = [tokens_tensor, segments_tensors] | |
| <span class="hljs-comment"># Initializing the model with the torchscript flag</span> | |
| <span class="hljs-comment"># Flag set to True even though it is not necessary as this model does not have an LM Head.</span> | |
| config = BertConfig( | |
| vocab_size_or_config_json_file=<span class="hljs-number">32000</span>, | |
| hidden_size=<span class="hljs-number">768</span>, | |
| num_hidden_layers=<span class="hljs-number">12</span>, | |
| num_attention_heads=<span class="hljs-number">12</span>, | |
| intermediate_size=<span class="hljs-number">3072</span>, | |
| torchscript=<span class="hljs-literal">True</span>, | |
| ) | |
| <span class="hljs-comment"># Instantiating the model</span> | |
| model = BertModel(config) | |
| <span class="hljs-comment"># The model needs to be in evaluation mode</span> | |
| model.<span class="hljs-built_in">eval</span>() | |
| <span class="hljs-comment"># If you are instantiating the model with *from_pretrained* you can also easily set the TorchScript flag</span> | |
| model = BertModel.from_pretrained(<span class="hljs-string">"google-bert/bert-base-uncased"</span>, torchscript=<span class="hljs-literal">True</span>) | |
| <span class="hljs-comment"># Creating the trace</span> | |
| traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors]) | |
| torch.jit.save(traced_model, <span class="hljs-string">"traced_bert.pt"</span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="loading-a-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#loading-a-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Loading a model</span></h3> <p data-svelte-h="svelte-uqg1ob">Now you can load the previously saved <code>BertModel</code>, <code>traced_bert.pt</code>, from disk and use | |
| it on the previously initialised <code>dummy_input</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->loaded_model = torch.jit.load(<span class="hljs-string">"traced_bert.pt"</span>) | |
| loaded_model.<span class="hljs-built_in">eval</span>() | |
| all_encoder_layers, pooled_output = loaded_model(*dummy_input)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="using-a-traced-model-for-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-a-traced-model-for-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using a traced model for inference</span></h3> <p data-svelte-h="svelte-1prvnq1">Use the traced model for inference by using its <code>__call__</code> dunder method:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->traced_model(tokens_tensor, segments_tensors)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="deploy-hugging-face-torchscript-models-to-aws-with-the-neuron-sdk" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deploy-hugging-face-torchscript-models-to-aws-with-the-neuron-sdk"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Deploy Hugging Face TorchScript models to AWS with the Neuron SDK</span></h2> <p data-svelte-h="svelte-1kac3or">AWS introduced the <a href="https://aws.amazon.com/ec2/instance-types/inf1/" rel="nofollow">Amazon EC2 Inf1</a> | |
| instance family for low cost, high performance machine learning inference in the cloud. | |
| The Inf1 instances are powered by the AWS Inferentia chip, a custom-built hardware | |
| accelerator, specializing in deep learning inferencing workloads. <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/#" rel="nofollow">AWS | |
| Neuron</a> is the SDK for | |
| Inferentia that supports tracing and optimizing transformers models for deployment on | |
| Inf1. The Neuron SDK provides:</p> <ol data-svelte-h="svelte-1h435mi"><li>Easy-to-use API with one line of code change to trace and optimize a TorchScript | |
| model for inference in the cloud.</li> <li>Out of the box performance optimizations for <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/benchmark/%3E" rel="nofollow">improved | |
| cost-performance</a>.</li> <li>Support for Hugging Face transformers models built with either | |
| <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/src/examples/pytorch/bert_tutorial/tutorial_pretrained_bert.html" rel="nofollow">PyTorch</a> | |
| or | |
| <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/src/examples/tensorflow/huggingface_bert/huggingface_bert.html" rel="nofollow">TensorFlow</a>.</li></ol> <h3 class="relative group"><a id="implications" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#implications"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Implications</span></h3> <p data-svelte-h="svelte-1kv7oct">Transformers models based on the <a href="https://huggingface.co/docs/transformers/main/model_doc/bert" rel="nofollow">BERT (Bidirectional Encoder Representations from | |
| Transformers)</a> | |
| architecture, or its variants such as | |
| <a href="https://huggingface.co/docs/transformers/main/model_doc/distilbert" rel="nofollow">distilBERT</a> and | |
| <a href="https://huggingface.co/docs/transformers/main/model_doc/roberta" rel="nofollow">roBERTa</a> run best on | |
| Inf1 for non-generative tasks such as extractive question answering, sequence | |
| classification, and token classification. However, text generation tasks can still be | |
| adapted to run on Inf1 according to this <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/src/examples/pytorch/transformers-marianmt.html" rel="nofollow">AWS Neuron MarianMT | |
| tutorial</a>. | |
| More information about models that can be converted out of the box on Inferentia can be | |
| found in the <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/models/models-inferentia.html#models-inferentia" rel="nofollow">Model Architecture | |
| Fit</a> | |
| section of the Neuron documentation.</p> <h3 class="relative group"><a id="dependencies" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dependencies"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dependencies</span></h3> <p data-svelte-h="svelte-f2qubg">Using AWS Neuron to convert models requires a <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/neuron-frameworks/pytorch-neuron/index.html#installation-guide" rel="nofollow">Neuron SDK | |
| environment</a> | |
| which comes preconfigured on <a href="https://docs.aws.amazon.com/dlami/latest/devguide/tutorial-inferentia-launching.html" rel="nofollow">AWS Deep Learning | |
| AMI</a>.</p> <h3 class="relative group"><a id="converting-a-model-for-aws-neuron" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#converting-a-model-for-aws-neuron"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Converting a model for AWS Neuron</span></h3> <p data-svelte-h="svelte-1flzoza">Convert a model for AWS NEURON using the same code from <a href="torchscript#using-torchscript-in-python">Using TorchScript in | |
| Python</a> to trace a <code>BertModel</code>. Import the | |
| <code>torch.neuron</code> framework extension to access the components of the Neuron SDK through a | |
| Python API:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BertModel, BertTokenizer, BertConfig | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> torch.neuron<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-uv51ic">You only need to modify the following line:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-deletion">- torch.jit.trace(model, [tokens_tensor, segments_tensors])</span> | |
| <span class="hljs-addition">+ torch.neuron.trace(model, [tokens_tensor, segments_tensors])</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1dylit2">This enables the Neuron SDK to trace the model and optimize it for Inf1 instances.</p> <p data-svelte-h="svelte-u8nslf">To learn more about AWS Neuron SDK features, tools, example tutorials and latest | |
| updates, please see the <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/index.html" rel="nofollow">AWS NeuronSDK | |
| documentation</a>.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/en/torchscript.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_z647wz = { | |
| assets: "/docs/transformers/pr_33913/en", | |
| base: "/docs/transformers/pr_33913/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/transformers/pr_33913/en/_app/immutable/entry/start.b67f883f.js"), | |
| import("/docs/transformers/pr_33913/en/_app/immutable/entry/app.e436b1f2.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 451], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 37.8 kB
- Xet hash:
- e7c5537fa45e78b9adb728b05031005a4cfef4a6654c0a0549b4d4db67757ad5
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.