Buckets:

hf-doc-build/doc-dev / transformers /main /es /torchscript.html
rtrm's picture
download
raw
38.9 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Exportar a TorchScript&quot;,&quot;local&quot;:&quot;exportar-a-torchscript&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Bandera TorchScript y pesos atados.&quot;,&quot;local&quot;:&quot;bandera-torchscript-y-pesos-atados&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Entradas ficticias y longitudes estándar&quot;,&quot;local&quot;:&quot;entradas-ficticias-y-longitudes-estándar&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Usando TorchScript en Python&quot;,&quot;local&quot;:&quot;usando-torchscript-en-python&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Guardando un modelo&quot;,&quot;local&quot;:&quot;guardando-un-modelo&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Cargando un modelo&quot;,&quot;local&quot;:&quot;cargando-un-modelo&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Usando un modelo trazado para inferencia&quot;,&quot;local&quot;:&quot;usando-un-modelo-trazado-para-inferencia&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Despliega modelos TorchScript de Hugging Face en AWS con el Neuron SDK&quot;,&quot;local&quot;:&quot;despliega-modelos-torchscript-de-hugging-face-en-aws-con-el-neuron-sdk&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Implicaciones&quot;,&quot;local&quot;:&quot;implicaciones&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Dependencias&quot;,&quot;local&quot;:&quot;dependencias&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Convertir un modelo para AWS Neuron&quot;,&quot;local&quot;:&quot;convertir-un-modelo-para-aws-neuron&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/main/es/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/main/es/_app/immutable/entry/start.b4301699.js">
<link rel="modulepreload" href="/docs/transformers/main/es/_app/immutable/chunks/scheduler.36a0863c.js">
<link rel="modulepreload" href="/docs/transformers/main/es/_app/immutable/chunks/singletons.a6ab9d4a.js">
<link rel="modulepreload" href="/docs/transformers/main/es/_app/immutable/chunks/index.733708bb.js">
<link rel="modulepreload" href="/docs/transformers/main/es/_app/immutable/chunks/paths.815a7736.js">
<link rel="modulepreload" href="/docs/transformers/main/es/_app/immutable/entry/app.eaf9d9af.js">
<link rel="modulepreload" href="/docs/transformers/main/es/_app/immutable/chunks/index.f891bdb2.js">
<link rel="modulepreload" href="/docs/transformers/main/es/_app/immutable/nodes/0.e9e5a018.js">
<link rel="modulepreload" href="/docs/transformers/main/es/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/main/es/_app/immutable/nodes/42.d9bf49b1.js">
<link rel="modulepreload" href="/docs/transformers/main/es/_app/immutable/chunks/Tip.a8272f7f.js">
<link rel="modulepreload" href="/docs/transformers/main/es/_app/immutable/chunks/CodeBlock.3ec784ea.js">
<link rel="modulepreload" href="/docs/transformers/main/es/_app/immutable/chunks/EditOnGithub.a58e27a9.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Exportar a TorchScript&quot;,&quot;local&quot;:&quot;exportar-a-torchscript&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Bandera TorchScript y pesos atados.&quot;,&quot;local&quot;:&quot;bandera-torchscript-y-pesos-atados&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Entradas ficticias y longitudes estándar&quot;,&quot;local&quot;:&quot;entradas-ficticias-y-longitudes-estándar&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Usando TorchScript en Python&quot;,&quot;local&quot;:&quot;usando-torchscript-en-python&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Guardando un modelo&quot;,&quot;local&quot;:&quot;guardando-un-modelo&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Cargando un modelo&quot;,&quot;local&quot;:&quot;cargando-un-modelo&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Usando un modelo trazado para inferencia&quot;,&quot;local&quot;:&quot;usando-un-modelo-trazado-para-inferencia&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Despliega modelos TorchScript de Hugging Face en AWS con el Neuron SDK&quot;,&quot;local&quot;:&quot;despliega-modelos-torchscript-de-hugging-face-en-aws-con-el-neuron-sdk&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Implicaciones&quot;,&quot;local&quot;:&quot;implicaciones&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Dependencias&quot;,&quot;local&quot;:&quot;dependencias&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Convertir un modelo para AWS Neuron&quot;,&quot;local&quot;:&quot;convertir-un-modelo-para-aws-neuron&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="exportar-a-torchscript" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#exportar-a-torchscript"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Exportar a TorchScript</span></h1> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400">Este es el comienzo de nuestros experimentos con TorchScript y todavía estamos explorando sus capacidades con modelos de variables de entrada. Es un tema de interés para nosotros y profundizaremos en nuestro análisis en las próximas versiones, con más ejemplos de código, una implementación más flexible y comparativas de rendimiento comparando códigos basados en Python con TorchScript compilado.</div> <p data-svelte-h="svelte-1vqkwr0">De acuerdo con la documentación de TorchScript:</p> <blockquote data-svelte-h="svelte-1xutnk0"><p>“TorchScript es una manera de crear modelos serializables y optimizables a partir del código PyTorch.”</p></blockquote> <p data-svelte-h="svelte-7iy5bx">Hay dos módulos de PyTorch, <a href="https://pytorch.org/docs/stable/jit.html" rel="nofollow">JIT y TRACE</a>, que permiten a los desarrolladores exportar sus modelos para ser reusados en otros programas, como los programas de C++ orientados a la eficiencia.</p> <p data-svelte-h="svelte-1gpi2g9">Nosotros proveemos una interface que te permite exportar los modelos 🤗Transformers a TorchScript para que puedan ser reusados en un entorno diferente al de los programas Python basados en PyTorch. Aquí explicamos como exportar y usar nuestros modelos utilizando TorchScript.</p> <p data-svelte-h="svelte-14y3y0y">Exportar un modelo requiere de dos cosas:</p> <ul data-svelte-h="svelte-1p05t5f"><li>La instanciación del modelo con la bandera TorchScript.</li> <li>Un paso hacia adelante con entradas ficticias.</li></ul> <p data-svelte-h="svelte-16ismqp">Estas necesidades implican varias cosas de las que los desarrolladores deben tener cuidado, como se detalla a continuación.</p> <h2 class="relative group"><a id="bandera-torchscript-y-pesos-atados" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bandera-torchscript-y-pesos-atados"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Bandera TorchScript y pesos atados.</span></h2> <p data-svelte-h="svelte-1mpcwfz">La bandera <code>torchscript</code> es necesaria porque la mayoría de los modelos de lenguaje de 🤗Transformers tienen pesos atados entre su <code>capa de incrustación</code> (<code>Embedding</code>) y su <code>capa de decodificación</code> (<code>Decoding</code>). TorchScript no te permite exportar modelos que tienen pesos atados, por lo que es necesario desatar y clonar los pesos de antemano.</p> <p data-svelte-h="svelte-guiczo">Los modelos instanciados con la bandera <code>torchscript</code> tienen su <code>capa de incrustación</code> (<code>Embedding</code>) y su <code>capa de decodificación</code> (<code>Decoding</code>) separadas, lo que significa que no deben ser entrenados más adelante. Entrenar desincronizaría las dos capas, lo que llevaría a resultados inesperados.</p> <p data-svelte-h="svelte-1tjg0h1">Esto no es así para los modelos que no tienen una cabeza de modelo de lenguaje, ya que esos modelos no tienen pesos atados. Estos modelos pueden ser exportados de manera segura sin la bandera <code>torchscript</code>.</p> <h2 class="relative group"><a id="entradas-ficticias-y-longitudes-estándar" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#entradas-ficticias-y-longitudes-estándar"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Entradas ficticias y longitudes estándar</span></h2> <p data-svelte-h="svelte-oi27zv">Las entradas ficticias se utilizan para un paso del modelo hacia adelante. Mientras los valores de las entradas se propagan a través de las capas, PyTorch realiza un seguimiento de las diferentes operaciones ejecutadas en cada tensor. Estas operaciones registradas se utilizan luego para crear <em>la traza</em> del modelo.
La traza se crea en relación con las dimensiones de las entradas. Por lo tanto, está limitada por las dimensiones de la entrada ficticia y no funcionará para ninguna otra longitud de secuencia o tamaño de lote. Cuando se intenta con un tamaño diferente, se genera el siguiente error:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->`El tamañ<span class="hljs-keyword">o</span> expandido del tensor (<span class="hljs-number">3</span>) debe coincidir <span class="hljs-keyword">con</span> <span class="hljs-keyword">el</span> tamañ<span class="hljs-keyword">o</span> existente (<span class="hljs-number">7</span>) <span class="hljs-keyword">en</span> <span class="hljs-keyword">la</span> dimensión <span class="hljs-keyword">no</span> singleton <span class="hljs-number">2</span>`.<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-rlahav">Recomendamos trazar el modelo con un tamaño de entrada ficticio al menos tan grande como la entrada más grande con la que se alimentará al modelo durante la inferencia. El relleno puede ayudar a completar los valores faltantes. Sin embargo, dado que el modelo se traza con un tamaño de entrada más grande, las dimensiones de la matriz también serán grandes, lo que resultará en más cálculos.</p> <p data-svelte-h="svelte-jbqu3">Ten cuidado con el número total de operaciones realizadas en cada entrada y sigue de cerca el rendimiento al exportar modelos con longitudes de secuencia variables.</p> <h2 class="relative group"><a id="usando-torchscript-en-python" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#usando-torchscript-en-python"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Usando TorchScript en Python</span></h2> <p data-svelte-h="svelte-1ygt70h">Esta sección demuestra cómo guardar y cargar modelos, así como cómo usar la traza para la inferencia.</p> <h3 class="relative group"><a id="guardando-un-modelo" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#guardando-un-modelo"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Guardando un modelo</span></h3> <p data-svelte-h="svelte-143au2y">Para exportar un <code>BertModel</code> con TorchScript, instancia <code>BertModel</code> a partir de la clase <code>BertConfig</code> y luego guárdalo en disco bajo el nombre de archivo <code>traced_bert.pt</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BertModel, BertTokenizer, BertConfig
<span class="hljs-keyword">import</span> torch
enc = BertTokenizer.from_pretrained(<span class="hljs-string">&quot;bert-base-uncased&quot;</span>)
<span class="hljs-comment"># Tokenizing input text</span>
text = <span class="hljs-string">&quot;[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]&quot;</span>
tokenized_text = enc.tokenize(text)
<span class="hljs-comment"># Masking one of the input tokens</span>
masked_index = <span class="hljs-number">8</span>
tokenized_text[masked_index] = <span class="hljs-string">&quot;[MASK]&quot;</span>
indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
segments_ids = [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]
<span class="hljs-comment"># Creating a dummy input</span>
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
dummy_input = [tokens_tensor, segments_tensors]
<span class="hljs-comment"># Initializing the model with the torchscript flag</span>
<span class="hljs-comment"># Flag set to True even though it is not necessary as this model does not have an LM Head.</span>
config = BertConfig(
vocab_size_or_config_json_file=<span class="hljs-number">32000</span>,
hidden_size=<span class="hljs-number">768</span>,
num_hidden_layers=<span class="hljs-number">12</span>,
num_attention_heads=<span class="hljs-number">12</span>,
intermediate_size=<span class="hljs-number">3072</span>,
torchscript=<span class="hljs-literal">True</span>,
)
<span class="hljs-comment"># Instantiating the model</span>
model = BertModel(config)
<span class="hljs-comment"># The model needs to be in evaluation mode</span>
model.<span class="hljs-built_in">eval</span>()
<span class="hljs-comment"># If you are instantiating the model with *from_pretrained* you can also easily set the TorchScript flag</span>
model = BertModel.from_pretrained(<span class="hljs-string">&quot;bert-base-uncased&quot;</span>, torchscript=<span class="hljs-literal">True</span>)
<span class="hljs-comment"># Creating the trace</span>
traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors])
torch.jit.save(traced_model, <span class="hljs-string">&quot;traced_bert.pt&quot;</span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="cargando-un-modelo" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#cargando-un-modelo"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Cargando un modelo</span></h3> <p data-svelte-h="svelte-vpu7y4">Ahora puedes cargar el <code>BertModel</code> guardado anteriormente, <code>traced_bert.pt</code>, desde el disco y usarlo en la entrada ficticia (<code>dummy_input</code>) previamente inicializada:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->loaded_model = torch.jit.load(<span class="hljs-string">&quot;traced_bert.pt&quot;</span>)
loaded_model.<span class="hljs-built_in">eval</span>()
all_encoder_layers, pooled_output = loaded_model(*dummy_input)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="usando-un-modelo-trazado-para-inferencia" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#usando-un-modelo-trazado-para-inferencia"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Usando un modelo trazado para inferencia</span></h2> <p data-svelte-h="svelte-pmyobe">Utiliza el modelo trazado para inferencia utilizando su método <code>_call_</code> dunder:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->traced_model(tokens_tensor, segments_tensors)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="despliega-modelos-torchscript-de-hugging-face-en-aws-con-el-neuron-sdk" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#despliega-modelos-torchscript-de-hugging-face-en-aws-con-el-neuron-sdk"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Despliega modelos TorchScript de Hugging Face en AWS con el Neuron SDK</span></h2> <p data-svelte-h="svelte-bxk5ru">AWS introdujo la familia de instancias <a href="https://aws.amazon.com/ec2/instance-types/inf1/" rel="nofollow">Amazon EC2 Inf1</a> para inferencia de aprendizaje automático de alto rendimiento y bajo costo en la nube. Las instancias Inf1 están alimentadas por el chip AWS Inferentia, un acelerador de hardware personalizado que se especializa en cargas de trabajo de inferencia de aprendizaje profundo. <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/#" rel="nofollow">AWS Neuron</a> es el SDK para Inferentia que admite el trazado y la optimización de modelos de transformers para implementación en Inf1. El SDK Neuron proporciona:</p> <ol data-svelte-h="svelte-1x38prg"><li><p>Una API fácil de usar con un solo cambio de línea de código para trazar y optimizar un modelo TorchScript para inferencia en la nube.</p></li> <li><p>Optimizaciones de rendimiento listas para usar <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/benchmark/%3E" rel="nofollow">para mejorar el rendimiento y el costo</a>.</p></li> <li><p>Soporte para modelos de transformers de Hugging Face construidos tanto con <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/src/examples/pytorch/bert_tutorial/tutorial_pretrained_bert.html" rel="nofollow">PyTorch</a> como con <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/src/examples/tensorflow/huggingface_bert/huggingface_bert.html" rel="nofollow">TensorFlow</a>.</p></li></ol> <h3 class="relative group"><a id="implicaciones" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#implicaciones"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Implicaciones</span></h3> <p data-svelte-h="svelte-l9xvbl">Los modelos transformers basados en la arquitectura <a href="https://huggingface.co/docs/transformers/main/model_doc/bert" rel="nofollow">BERT (Bidirectional Encoder Representations from Transformers)</a>, o sus variantes como <a href="https://huggingface.co/docs/transformers/main/model_doc/distilbert" rel="nofollow">distilBERT</a> y <a href="https://huggingface.co/docs/transformers/main/model_doc/roberta" rel="nofollow">roBERTa</a>, funcionan mejor en Inf1 para tareas no generativas como la respuesta a preguntas extractivas, la clasificación de secuencias y la clasificación de tokens. Sin embargo, las tareas de generación de texto aún pueden adaptarse para ejecutarse en Inf1 según este <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/src/examples/pytorch/transformers-marianmt.html" rel="nofollow">tutorial de AWS Neuron MarianMT</a>. Se puede encontrar más información sobre los modelos que se pueden convertir fácilmente para usar en Inferentia en la sección de <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/models/models-inferentia.html#models-inferentia" rel="nofollow">Model Architecture Fit</a> de la documentación de Neuron.</p> <h3 class="relative group"><a id="dependencias" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dependencias"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dependencias</span></h3> <p data-svelte-h="svelte-1i2ssjc">El uso de AWS Neuron para convertir modelos requiere un <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/neuron-frameworks/pytorch-neuron/index.html#installation-guide" rel="nofollow">entorno de Neuron SDK</a> que viene preconfigurado en <a href="https://docs.aws.amazon.com/dlami/latest/devguide/tutorial-inferentia-launching.html" rel="nofollow">la AMI de AWS Deep Learning</a>.</p> <h3 class="relative group"><a id="convertir-un-modelo-para-aws-neuron" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#convertir-un-modelo-para-aws-neuron"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Convertir un modelo para AWS Neuron</span></h3> <p data-svelte-h="svelte-5kert7">Convierte un modelo para AWS NEURON utilizando el mismo código de <a href="torchscript#using-torchscript-in-python">Uso de TorchScript en Python</a> para trazar un <code>BertModel</code>. Importa la extensión del framework <code>torch.neuron</code> para acceder a los componentes del Neuron SDK a través de una API de Python:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BertModel, BertTokenizer, BertConfig
<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> torch.neuron<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ktkso8">Solo necesitas la linea sigueda:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-deletion">- torch.jit.trace(model, [tokens_tensor, segments_tensors])</span>
<span class="hljs-addition">+ torch.neuron.trace(model, [token_tensor, segments_tensors])</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-6g0167">Esto permite que el Neuron SDK trace el modelo y lo optimice para las instancias Inf1.</p> <p data-svelte-h="svelte-1dy1cuf">Para obtener más información sobre las características, herramientas, tutoriales de ejemplo y últimas actualizaciones del AWS Neuron SDK, consulta <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/index.html" rel="nofollow">la documentación de AWS NeuronSDK</a>.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/es/torchscript.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_8nzhgd = {
assets: "/docs/transformers/main/es",
base: "/docs/transformers/main/es",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/main/es/_app/immutable/entry/start.b4301699.js"),
import("/docs/transformers/main/es/_app/immutable/entry/app.eaf9d9af.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 42],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
38.9 kB
·
Xet hash:
5759c616c5f05d5af8f74a55df61aa23a6acf218675a2cc12da2d4a187083dff

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.