Buckets:

rtrm's picture
download
raw
13.3 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Inferentia Exporter&quot;,&quot;local&quot;:&quot;inferentia-exporter&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Export functions&quot;,&quot;local&quot;:&quot;export-functions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Configuration classes for Neuron exports&quot;,&quot;local&quot;:&quot;configuration-classes-for-neuron-exports&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Supported architectures&quot;,&quot;local&quot;:&quot;supported-architectures&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/optimum.neuron/v0.0.13/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.0.13/en/_app/immutable/entry/start.0d2162c0.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.0.13/en/_app/immutable/chunks/scheduler.9039eef2.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.0.13/en/_app/immutable/chunks/singletons.516ecead.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.0.13/en/_app/immutable/chunks/paths.e8a549c9.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.0.13/en/_app/immutable/entry/app.5d909f7e.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.0.13/en/_app/immutable/chunks/index.cdcc3d35.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.0.13/en/_app/immutable/nodes/0.f16a8a14.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.0.13/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.0.13/en/_app/immutable/nodes/11.414282c5.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.0.13/en/_app/immutable/chunks/Tip.6f74db41.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.0.13/en/_app/immutable/chunks/Heading.96ce3702.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Inferentia Exporter&quot;,&quot;local&quot;:&quot;inferentia-exporter&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Export functions&quot;,&quot;local&quot;:&quot;export-functions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Configuration classes for Neuron exports&quot;,&quot;local&quot;:&quot;configuration-classes-for-neuron-exports&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Supported architectures&quot;,&quot;local&quot;:&quot;supported-architectures&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="inferentia-exporter" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inferentia-exporter"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inferentia Exporter</span></h1> <p data-svelte-h="svelte-1d09xet">You can export a PyTorch model to Neuron with 🤗 Optimum to run inference on AWS <a href="https://aws.amazon.com/ec2/instance-types/inf1/" rel="nofollow">Inferntia 1</a>
and <a href="https://aws.amazon.com/ec2/instance-types/inf2/" rel="nofollow">Inferentia 2</a>.</p> <h2 class="relative group"><a id="export-functions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#export-functions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Export functions</span></h2> <p data-svelte-h="svelte-1nf9chr">There is an export function for each generation of the Inferentia accelerator, <code>export_neuron</code>
for INF1 and <code>export_neuronx</code> on INF2, but you will be able to use directly the export function <code>export</code>, which will select the proper
exporting function according to the environment.</p> <p data-svelte-h="svelte-zk3hwe">Besides, you can check if the exported model is valid via <code>validate_model_outputs</code>, which compares
the compiled model’s output on Neuron devices to the PyTorch model’s output on CPU.</p> <h2 class="relative group"><a id="configuration-classes-for-neuron-exports" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#configuration-classes-for-neuron-exports"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Configuration classes for Neuron exports</span></h2> <p data-svelte-h="svelte-15gsxx7">Exporting a PyTorch model to a Neuron compiled model involves specifying:</p> <ol data-svelte-h="svelte-lhsb7m"><li>The input names.</li> <li>The output names.</li> <li>The dummy inputs used to trace the model. This is needed by the Neuron Compiler to record the computational graph and convert it to a TorchScript module.</li> <li>The compilation arguments used to control the trade-off between hardware efficiency (latency, throughput) and accuracy.</li></ol> <p data-svelte-h="svelte-1hhqq53">Depending on the choice of model and task, we represent the data above with <em>configuration classes</em>. Each configuration class is associated with
a specific model architecture, and follows the naming convention <code>ArchitectureNameNeuronConfig</code>. For instance, the configuration which specifies the Neuron
export of BERT models is <code>BertNeuronConfig</code>.</p> <p data-svelte-h="svelte-tqmfwt">Since many architectures share similar properties for their Neuron configuration, 🤗 Optimum adopts a 3-level class hierarchy:</p> <ol data-svelte-h="svelte-1ea686j"><li>Abstract and generic base classes. These handle all the fundamental features, while being agnostic to the modality (text, image, audio, etc).</li> <li>Middle-end classes. These are aware of the modality, but multiple can exist for the same modality depending on the inputs they support.
They specify which input generators should be used for the dummy inputs, but remain model-agnostic.</li> <li>Model-specific classes like the <code>BertNeuronConfig</code> mentioned above. These are the ones actually used to export models.</li></ol> <h2 class="relative group"><a id="supported-architectures" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#supported-architectures"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Supported architectures</span></h2> <table data-svelte-h="svelte-1v2my2t"><thead><tr><th>Architecture</th> <th>Task</th></tr></thead> <tbody><tr><td>ALBERT</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>BERT</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>CamemBERT</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>ConvBERT</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>DeBERTa (INF2 only)</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>DeBERTa-v2 (INF2 only)</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>DistilBERT</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>ELECTRA</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>FlauBERT</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>GPT2</td> <td>text-generation</td></tr> <tr><td>MobileBERT</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>MPNet</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>RoBERTa</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>RoFormer</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>XLM</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>XLM-RoBERTa</td> <td>feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification</td></tr> <tr><td>Stable Diffusion</td> <td>text-to-image, image-to-image, inpaint</td></tr> <tr><td>Stable Diffusion XL</td> <td>text-to-image</td></tr></tbody></table> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-hfkaug">More details for checking supported tasks <a href="https://huggingface.co/docs/optimum-neuron/guides/export_model#selecting-a-task" rel="nofollow">here</a>.</p></div> <p data-svelte-h="svelte-dm8pu8">More architectures coming soon, stay tuned! 🚀</p> <p></p>
<script>
{
__sveltekit_3q5blf = {
assets: "/docs/optimum.neuron/v0.0.13/en",
base: "/docs/optimum.neuron/v0.0.13/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/optimum.neuron/v0.0.13/en/_app/immutable/entry/start.0d2162c0.js"),
import("/docs/optimum.neuron/v0.0.13/en/_app/immutable/entry/app.5d909f7e.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 11],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
13.3 kB
·
Xet hash:
6d14e06cffc257b9eb0e44e19c242d969e7cae941ca898f4a89454c7c33b7fd0

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.