Buckets:

hf-doc-build/doc-dev / transformers /main /zh /torchscript.html
rtrm's picture
download
raw
36.8 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;导出为 TorchScript&quot;,&quot;local&quot;:&quot;导出为-torchscript&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;TorchScript 参数和绑定权重&quot;,&quot;local&quot;:&quot;torchscript-参数和绑定权重&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;虚拟输入和标准长度&quot;,&quot;local&quot;:&quot;虚拟输入和标准长度&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;在 Python 中使用 TorchScript&quot;,&quot;local&quot;:&quot;在-python-中使用-torchscript&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;保存模型&quot;,&quot;local&quot;:&quot;保存模型&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;加载模型&quot;,&quot;local&quot;:&quot;加载模型&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;使用 trace 模型进行推断&quot;,&quot;local&quot;:&quot;使用-trace-模型进行推断&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;使用 Neuron SDK 将 Hugging Face TorchScript 模型部署到 AWS&quot;,&quot;local&quot;:&quot;使用-neuron-sdk-将-hugging-face-torchscript-模型部署到-aws&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;影响&quot;,&quot;local&quot;:&quot;影响&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;依赖关系&quot;,&quot;local&quot;:&quot;依赖关系&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;将模型转换为 AWS Neuron&quot;,&quot;local&quot;:&quot;将模型转换为-aws-neuron&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/main/zh/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/entry/start.a61b9c50.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/scheduler.9991993c.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/singletons.2822fe91.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/index.02cfeb18.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/paths.d66588b4.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/entry/app.99775688.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/index.7fc9a5e7.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/nodes/0.f4c5a5c1.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/nodes/62.11cccfd1.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/Tip.9de92fc6.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/CodeBlock.e11cba92.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/EditOnGithub.84ab7f0e.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;导出为 TorchScript&quot;,&quot;local&quot;:&quot;导出为-torchscript&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;TorchScript 参数和绑定权重&quot;,&quot;local&quot;:&quot;torchscript-参数和绑定权重&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;虚拟输入和标准长度&quot;,&quot;local&quot;:&quot;虚拟输入和标准长度&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;在 Python 中使用 TorchScript&quot;,&quot;local&quot;:&quot;在-python-中使用-torchscript&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;保存模型&quot;,&quot;local&quot;:&quot;保存模型&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;加载模型&quot;,&quot;local&quot;:&quot;加载模型&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;使用 trace 模型进行推断&quot;,&quot;local&quot;:&quot;使用-trace-模型进行推断&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;使用 Neuron SDK 将 Hugging Face TorchScript 模型部署到 AWS&quot;,&quot;local&quot;:&quot;使用-neuron-sdk-将-hugging-face-torchscript-模型部署到-aws&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;影响&quot;,&quot;local&quot;:&quot;影响&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;依赖关系&quot;,&quot;local&quot;:&quot;依赖关系&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;将模型转换为 AWS Neuron&quot;,&quot;local&quot;:&quot;将模型转换为-aws-neuron&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="导出为-torchscript" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#导出为-torchscript"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>导出为 TorchScript</span></h1> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1gfz947">这是开始使用 TorchScript 进行实验的起点,我们仍在探索其在变量输入大小模型中的能力。
这是我们关注的焦点,我们将在即将发布的版本中深入分析,提供更多的代码示例、更灵活的实现以及比较
Python 代码与编译 TorchScript 的性能基准。</p></div> <p data-svelte-h="svelte-s0qya2">根据 <a href="https://pytorch.org/docs/stable/jit.html" rel="nofollow">TorchScript 文档</a></p> <blockquote data-svelte-h="svelte-rlo4iu"><p>TorchScript 是从 PyTorch 代码创建可序列化和可优化的模型的一种方式。</p></blockquote> <p data-svelte-h="svelte-n5bnu6">有两个 PyTorch 模块:<a href="https://pytorch.org/docs/stable/jit.html" rel="nofollow">JIT 和 TRACE</a>
这两个模块允许开发人员将其模型导出到其他程序中重用,比如面向效率的 C++ 程序。</p> <p data-svelte-h="svelte-13dtdru">我们提供了一个接口,允许您将 🤗 Transformers 模型导出为 TorchScript,
以便在与基于 PyTorch 的 Python 程序不同的环境中重用。
本文解释如何使用 TorchScript 导出并使用我们的模型。</p> <p data-svelte-h="svelte-12yrypg">导出模型需要两个步骤:</p> <ul data-svelte-h="svelte-1fh86cr"><li>使用 <code>torchscript</code> 参数实例化模型</li> <li>使用虚拟输入进行前向传递</li></ul> <p data-svelte-h="svelte-1fwt0g0">这些必要条件意味着开发人员应该注意以下详细信息。</p> <h2 class="relative group"><a id="torchscript-参数和绑定权重" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#torchscript-参数和绑定权重"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>TorchScript 参数和绑定权重</span></h2> <p data-svelte-h="svelte-1xhai90"><code>torchscript</code> 参数是必需的,因为大多数 🤗 Transformers 语言模型的 <code>Embedding</code> 层和
<code>Decoding</code> 层之间有绑定权重。TorchScript 不允许导出具有绑定权重的模型,因此必须事先解绑和克隆权重。</p> <p data-svelte-h="svelte-1rblw4n">使用 <code>torchscript</code> 参数实例化的模型将其 <code>Embedding</code> 层和 <code>Decoding</code> 层分开,
这意味着它们不应该在后续进行训练。训练将导致这两层不同步,产生意外结果。</p> <p data-svelte-h="svelte-1rojuw1">对于没有语言模型头部的模型,情况不同,因为这些模型没有绑定权重。
这些模型可以安全地导出而无需 <code>torchscript</code> 参数。</p> <h2 class="relative group"><a id="虚拟输入和标准长度" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#虚拟输入和标准长度"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>虚拟输入和标准长度</span></h2> <p data-svelte-h="svelte-3rxu07">虚拟输入用于模型的前向传递。当输入的值传播到各层时,PyTorch 会跟踪在每个张量上执行的不同操作。
然后使用记录的操作来创建模型的 <em>trace</em></p> <p data-svelte-h="svelte-1ha254l">跟踪是相对于输入的维度创建的。因此,它受到虚拟输入的维度限制,对于任何其他序列长度或批量大小都不起作用。
当尝试使用不同大小时,会引发以下错误:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->`The expanded size of the tensor (3) must match the existing size (7) at non-singleton dimension 2`<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19fynrt">我们建议使用至少与推断期间将馈送到模型的最大输入一样大的虚拟输入大小进行跟踪。
填充可以帮助填补缺失的值。然而,由于模型是使用更大的输入大小进行跟踪的,矩阵的维度也会很大,导致更多的计算。</p> <p data-svelte-h="svelte-1surt3l">在每个输入上执行的操作总数要仔细考虑,并在导出不同序列长度模型时密切关注性能。</p> <h2 class="relative group"><a id="在-python-中使用-torchscript" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#在-python-中使用-torchscript"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>在 Python 中使用 TorchScript</span></h2> <p data-svelte-h="svelte-1d41jn5">本节演示了如何保存和加载模型以及如何使用 trace 进行推断。</p> <h3 class="relative group"><a id="保存模型" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#保存模型"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>保存模型</span></h3> <p data-svelte-h="svelte-1l6vll5">要使用 TorchScript 导出 <code>BertModel</code>,请从 <code>BertConfig</code> 类实例化 <code>BertModel</code>
然后将其保存到名为 <code>traced_bert.pt</code> 的磁盘文件中:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BertModel, BertTokenizer, BertConfig
<span class="hljs-keyword">import</span> torch
enc = BertTokenizer.from_pretrained(<span class="hljs-string">&quot;google-bert/bert-base-uncased&quot;</span>)
<span class="hljs-comment"># 对输入文本分词</span>
text = <span class="hljs-string">&quot;[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]&quot;</span>
tokenized_text = enc.tokenize(text)
<span class="hljs-comment"># 屏蔽一个输入 token</span>
masked_index = <span class="hljs-number">8</span>
tokenized_text[masked_index] = <span class="hljs-string">&quot;[MASK]&quot;</span>
indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
segments_ids = [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]
<span class="hljs-comment"># 创建虚拟输入</span>
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
dummy_input = [tokens_tensor, segments_tensors]
<span class="hljs-comment"># 使用 torchscript 参数初始化模型</span>
<span class="hljs-comment"># 即使此模型没有 LM Head,也将参数设置为 True。</span>
config = BertConfig(
vocab_size_or_config_json_file=<span class="hljs-number">32000</span>,
hidden_size=<span class="hljs-number">768</span>,
num_hidden_layers=<span class="hljs-number">12</span>,
num_attention_heads=<span class="hljs-number">12</span>,
intermediate_size=<span class="hljs-number">3072</span>,
torchscript=<span class="hljs-literal">True</span>,
)
<span class="hljs-comment"># 实例化模型</span>
model = BertModel(config)
<span class="hljs-comment"># 模型需要处于评估模式</span>
model.<span class="hljs-built_in">eval</span>()
<span class="hljs-comment"># 如果您使用 *from_pretrained* 实例化模型,还可以轻松设置 TorchScript 参数</span>
model = BertModel.from_pretrained(<span class="hljs-string">&quot;google-bert/bert-base-uncased&quot;</span>, torchscript=<span class="hljs-literal">True</span>)
<span class="hljs-comment"># 创建 trace</span>
traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors])
torch.jit.save(traced_model, <span class="hljs-string">&quot;traced_bert.pt&quot;</span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="加载模型" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#加载模型"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>加载模型</span></h3> <p data-svelte-h="svelte-8mcedk">现在,您可以从磁盘加载先前保存的 <code>BertModel</code><code>traced_bert.pt</code>,并在先前初始化的 <code>dummy_input</code> 上使用:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->loaded_model = torch.jit.load(<span class="hljs-string">&quot;traced_bert.pt&quot;</span>)
loaded_model.<span class="hljs-built_in">eval</span>()
all_encoder_layers, pooled_output = loaded_model(*dummy_input)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="使用-trace-模型进行推断" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#使用-trace-模型进行推断"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>使用 trace 模型进行推断</span></h3> <p data-svelte-h="svelte-15zasob">通过使用其 <code>__call__</code> dunder 方法使用 trace 模型进行推断:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->traced_model(tokens_tensor, segments_tensors)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="使用-neuron-sdk-将-hugging-face-torchscript-模型部署到-aws" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#使用-neuron-sdk-将-hugging-face-torchscript-模型部署到-aws"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>使用 Neuron SDK 将 Hugging Face TorchScript 模型部署到 AWS</span></h2> <p data-svelte-h="svelte-qtxveh">AWS 引入了用于云端低成本、高性能机器学习推理的
<a href="https://aws.amazon.com/ec2/instance-types/inf1/" rel="nofollow">Amazon EC2 Inf1</a> 实例系列。
Inf1 实例由 AWS Inferentia 芯片提供支持,这是一款专为深度学习推理工作负载而构建的定制硬件加速器。
<a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/#" rel="nofollow">AWS Neuron</a>
Inferentia 的 SDK,支持对 transformers 模型进行跟踪和优化,以便在 Inf1 上部署。Neuron SDK 提供:</p> <ol data-svelte-h="svelte-pj5pw8"><li>简单易用的 API,只需更改一行代码即可为云端推理跟踪和优化 TorchScript 模型。</li> <li>针对<a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/benchmark/" rel="nofollow">改进的性能成本</a>的即插即用性能优化。</li> <li>支持使用 <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/src/examples/pytorch/bert_tutorial/tutorial_pretrained_bert.html" rel="nofollow">PyTorch</a>
<a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/src/examples/tensorflow/huggingface_bert/huggingface_bert.html" rel="nofollow">TensorFlow</a>
构建的 Hugging Face transformers 模型。</li></ol> <h3 class="relative group"><a id="影响" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#影响"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>影响</span></h3> <p data-svelte-h="svelte-8m4g1s">基于 <a href="https://huggingface.co/docs/transformers/main/model_doc/bert" rel="nofollow">BERT(来自 Transformers 的双向编码器表示)</a>架构的
transformers 模型,或其变体,如 <a href="https://huggingface.co/docs/transformers/main/model_doc/distilbert" rel="nofollow">distilBERT</a>
<a href="https://huggingface.co/docs/transformers/main/model_doc/roberta" rel="nofollow">roBERTa</a> 在 Inf1 上运行最佳,
可用于生成抽取式问答、序列分类和标记分类等任务。然而,文本生成任务仍可以适应在 Inf1 上运行,
如这篇 <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/src/examples/pytorch/transformers-marianmt.html" rel="nofollow">AWS Neuron MarianMT 教程</a>所述。
有关可以直接在 Inferentia 上转换的模型的更多信息,请参阅 Neuron 文档的<a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/models/models-inferentia.html#models-inferentia" rel="nofollow">模型架构适配</a>章节。</p> <h3 class="relative group"><a id="依赖关系" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#依赖关系"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>依赖关系</span></h3> <p data-svelte-h="svelte-y456f4">使用 AWS Neuron 将模型转换为模型需要一个
<a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/neuron-frameworks/pytorch-neuron/index.html#installation-guide" rel="nofollow">Neuron SDK 环境</a>
它已经预先配置在 <a href="https://docs.aws.amazon.com/dlami/latest/devguide/tutorial-inferentia-launching.html" rel="nofollow">AWS 深度学习 AMI</a>上。</p> <h3 class="relative group"><a id="将模型转换为-aws-neuron" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#将模型转换为-aws-neuron"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>将模型转换为 AWS Neuron</span></h3> <p data-svelte-h="svelte-1u7opmj">使用与 <a href="torchscript#using-torchscript-in-python">Python 中使用 TorchScript</a> 相同的代码来跟踪
<code>BertModel</code> 以将模型转换为 AWS NEURON。导入 <code>torch.neuron</code> 框架扩展以通过 Python API 访问 Neuron SDK 的组件:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BertModel, BertTokenizer, BertConfig
<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> torch.neuron<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1t1ndji">您只需要修改下面这一行:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-deletion">- torch.jit.trace(model, [tokens_tensor, segments_tensors])</span>
<span class="hljs-addition">+ torch.neuron.trace(model, [token_tensor, segments_tensors])</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1t6xzzv">这样就能使 Neuron SDK 跟踪模型并对其进行优化,以在 Inf1 实例上运行。</p> <p data-svelte-h="svelte-1tzl0b4">要了解有关 AWS Neuron SDK 功能、工具、示例教程和最新更新的更多信息,
请参阅 <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/index.html" rel="nofollow">AWS NeuronSDK 文档</a></p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/zh/torchscript.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_173uja2 = {
assets: "/docs/transformers/main/zh",
base: "/docs/transformers/main/zh",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/main/zh/_app/immutable/entry/start.a61b9c50.js"),
import("/docs/transformers/main/zh/_app/immutable/entry/app.99775688.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 62],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
36.8 kB
·
Xet hash:
f2799f2ef9787cd8cbede93ba93e3e8467785b7ed604f9e975dc1fd7fe0b26bc

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.