Buckets:

hf-doc-build/doc-dev / optimum /pr_2430 /en /quicktour.html
HuggingFaceDocBuilder's picture
download
raw
33.1 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Quick tour&quot;,&quot;local&quot;:&quot;quick-tour&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Accelerated inference&quot;,&quot;local&quot;:&quot;accelerated-inference&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;OpenVINO&quot;,&quot;local&quot;:&quot;openvino&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;ONNX Runtime&quot;,&quot;local&quot;:&quot;onnx-runtime&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Accelerated training&quot;,&quot;local&quot;:&quot;accelerated-training&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Habana&quot;,&quot;local&quot;:&quot;habana&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Out of the box ONNX export&quot;,&quot;local&quot;:&quot;out-of-the-box-onnx-export&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;torch.fx integration&quot;,&quot;local&quot;:&quot;torchfx-integration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/optimum/pr_2430/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/optimum/pr_2430/en/_app/immutable/entry/start.7ca01d50.js">
<link rel="modulepreload" href="/docs/optimum/pr_2430/en/_app/immutable/chunks/scheduler.0cf4ef2e.js">
<link rel="modulepreload" href="/docs/optimum/pr_2430/en/_app/immutable/chunks/singletons.ed7b08e2.js">
<link rel="modulepreload" href="/docs/optimum/pr_2430/en/_app/immutable/chunks/paths.a2bc8d9b.js">
<link rel="modulepreload" href="/docs/optimum/pr_2430/en/_app/immutable/entry/app.cd433aba.js">
<link rel="modulepreload" href="/docs/optimum/pr_2430/en/_app/immutable/chunks/preload-helper.4e3372a7.js">
<link rel="modulepreload" href="/docs/optimum/pr_2430/en/_app/immutable/chunks/index.abc433bf.js">
<link rel="modulepreload" href="/docs/optimum/pr_2430/en/_app/immutable/nodes/0.40e13f95.js">
<link rel="modulepreload" href="/docs/optimum/pr_2430/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/optimum/pr_2430/en/_app/immutable/nodes/11.1142eaca.js">
<link rel="modulepreload" href="/docs/optimum/pr_2430/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.9537ae3a.js">
<link rel="modulepreload" href="/docs/optimum/pr_2430/en/_app/immutable/chunks/CodeBlock.12edfa53.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Quick tour&quot;,&quot;local&quot;:&quot;quick-tour&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Accelerated inference&quot;,&quot;local&quot;:&quot;accelerated-inference&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;OpenVINO&quot;,&quot;local&quot;:&quot;openvino&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;ONNX Runtime&quot;,&quot;local&quot;:&quot;onnx-runtime&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Accelerated training&quot;,&quot;local&quot;:&quot;accelerated-training&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Habana&quot;,&quot;local&quot;:&quot;habana&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Out of the box ONNX export&quot;,&quot;local&quot;:&quot;out-of-the-box-onnx-export&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;torch.fx integration&quot;,&quot;local&quot;:&quot;torchfx-integration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="quick-tour" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#quick-tour"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Quick tour</span></h1> <p data-svelte-h="svelte-3d25f3">This quick tour is intended for developers who are ready to dive into the code and see examples of how to integrate 🤗 Optimum into their model training and inference workflows.</p> <h2 class="relative group"><a id="accelerated-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#accelerated-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Accelerated inference</span></h2> <h4 class="relative group"><a id="openvino" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#openvino"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>OpenVINO</span></h4> <p data-svelte-h="svelte-6jviwr">To load a model and run inference with OpenVINO Runtime, you can just replace your <code>AutoModelForXxx</code> class with the corresponding <code>OVModelForXxx</code> class.
If you want to load a PyTorch checkpoint, set <code>export=True</code> to convert your model to the OpenVINO IR (Intermediate Representation).</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-deletion">- from transformers import AutoModelForSequenceClassification</span>
<span class="hljs-addition">+ from optimum.intel.openvino import OVModelForSequenceClassification</span>
from transformers import AutoTokenizer, pipeline
# Download a tokenizer and model from the Hub and convert to OpenVINO format
tokenizer = AutoTokenizer.from_pretrained(model_id)
model_id = &quot;distilbert-base-uncased-finetuned-sst-2-english&quot;
<span class="hljs-deletion">- model = AutoModelForSequenceClassification.from_pretrained(model_id)</span>
<span class="hljs-addition">+ model = OVModelForSequenceClassification.from_pretrained(model_id, export=True)</span>
# Run inference!
classifier = pipeline(&quot;text-classification&quot;, model=model, tokenizer=tokenizer)
results = classifier(&quot;He&#x27;s a dreadful magician.&quot;)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1tmum2f">You can find more examples in the <a href="https://huggingface.co/docs/optimum/intel/inference" rel="nofollow">documentation</a> and in the <a href="https://github.com/huggingface/optimum-intel/tree/main/examples/openvino" rel="nofollow">examples</a>.</p> <h4 class="relative group"><a id="onnx-runtime" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#onnx-runtime"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ONNX Runtime</span></h4> <p data-svelte-h="svelte-bjfa3x">To accelerate inference with ONNX Runtime, 🤗 Optimum uses <em>configuration objects</em> to define parameters for graph optimization and quantization. These objects are then used to instantiate dedicated <em>optimizers</em> and <em>quantizers</em>.</p> <p data-svelte-h="svelte-a1kook">Before applying quantization or optimization, first we need to load our model. To load a model and run inference with ONNX Runtime, you can just replace the canonical Transformers <a href="https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModel" rel="nofollow"><code>AutoModelForXxx</code></a> class with the corresponding <a href="https://huggingface.co/docs/optimum/onnxruntime/package_reference/modeling_ort#optimum.onnxruntime.ORTModel" rel="nofollow"><code>ORTModelForXxx</code></a> class. If you want to load from a PyTorch checkpoint, set <code>export=True</code> to export your model to the ONNX format.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> optimum.onnxruntime <span class="hljs-keyword">import</span> ORTModelForSequenceClassification
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span>model_checkpoint = <span class="hljs-string">&quot;distilbert-base-uncased-finetuned-sst-2-english&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>save_directory = <span class="hljs-string">&quot;tmp/onnx/&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Load a model from transformers and export it to ONNX</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
<span class="hljs-meta">&gt;&gt;&gt; </span>ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint, export=<span class="hljs-literal">True</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Save the ONNX model and tokenizer</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>ort_model.save_pretrained(save_directory)
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer.save_pretrained(save_directory)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-e7rpqh">Let’s see now how we can apply dynamic quantization with ONNX Runtime:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> optimum.onnxruntime.configuration <span class="hljs-keyword">import</span> AutoQuantizationConfig
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> optimum.onnxruntime <span class="hljs-keyword">import</span> ORTQuantizer
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Define the quantization methodology</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>qconfig = AutoQuantizationConfig.arm64(is_static=<span class="hljs-literal">False</span>, per_channel=<span class="hljs-literal">False</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>quantizer = ORTQuantizer.from_pretrained(ort_model)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Apply dynamic quantization on the model</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>quantizer.quantize(save_dir=save_directory, quantization_config=qconfig)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-eh7tzg">In this example, we’ve quantized a model from the Hugging Face Hub, in the same manner we can quantize a model hosted locally by providing the path to the directory containing the model weights. The result from applying the <code>quantize()</code> method is a <code>model_quantized.onnx</code> file that can be used to run inference. Here’s an example of how to load an ONNX Runtime model and generate predictions with it:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> optimum.onnxruntime <span class="hljs-keyword">import</span> ORTModelForSequenceClassification
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline, AutoTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span>model = ORTModelForSequenceClassification.from_pretrained(save_directory, file_name=<span class="hljs-string">&quot;model_quantized.onnx&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = AutoTokenizer.from_pretrained(save_directory)
<span class="hljs-meta">&gt;&gt;&gt; </span>classifier = pipeline(<span class="hljs-string">&quot;text-classification&quot;</span>, model=model, tokenizer=tokenizer)
<span class="hljs-meta">&gt;&gt;&gt; </span>results = classifier(<span class="hljs-string">&quot;I love burritos!&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19cbrlu">You can find more examples in the <a href="https://huggingface.co/docs/optimum/onnxruntime/quickstart" rel="nofollow">documentation</a> and in the <a href="https://github.com/huggingface/optimum/tree/main/examples/onnxruntime" rel="nofollow">examples</a>.</p> <h2 class="relative group"><a id="accelerated-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#accelerated-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Accelerated training</span></h2> <h4 class="relative group"><a id="habana" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#habana"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Habana</span></h4> <p data-svelte-h="svelte-8h57ng">To train transformers on Habana’s Gaudi processors, 🤗 Optimum provides a <code>GaudiTrainer</code> that is very similar to the 🤗 Transformers <a href="https://huggingface.co/docs/transformers/main_classes/trainer" rel="nofollow">Trainer</a>. Here is a simple example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-deletion">- from transformers import Trainer, TrainingArguments</span>
<span class="hljs-addition">+ from optimum.habana import GaudiTrainer, GaudiTrainingArguments</span>
# Download a pretrained model from the Hub
model = AutoModelForXxx.from_pretrained(&quot;bert-base-uncased&quot;)
# Define the training arguments
<span class="hljs-deletion">- training_args = TrainingArguments(</span>
<span class="hljs-addition">+ training_args = GaudiTrainingArguments(</span>
output_dir=&quot;path/to/save/folder/&quot;,
<span class="hljs-addition">+ use_habana=True,</span>
<span class="hljs-addition">+ use_lazy_mode=True,</span>
<span class="hljs-addition">+ gaudi_config_name=&quot;Habana/bert-base-uncased&quot;,</span>
...
)
# Initialize the trainer
<span class="hljs-deletion">- trainer = Trainer(</span>
<span class="hljs-addition">+ trainer = GaudiTrainer(</span>
model=model,
args=training_args,
train_dataset=train_dataset,
...
)
# Use Habana Gaudi processor for training!
trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-nvdwz8">You can find more examples in the <a href="https://huggingface.co/docs/optimum/habana/quickstart" rel="nofollow">documentation</a> and in the <a href="https://github.com/huggingface/optimum-habana/tree/main/examples" rel="nofollow">examples</a>.</p> <h2 class="relative group"><a id="out-of-the-box-onnx-export" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#out-of-the-box-onnx-export"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Out of the box ONNX export</span></h2> <p data-svelte-h="svelte-1qcigt9">The Optimum library handles out of the box the ONNX export of Transformers and Diffusers models!</p> <p data-svelte-h="svelte-1oa09xy">Exporting a model to ONNX is as simple as</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->optimum-cli <span class="hljs-built_in">export</span> onnx --model gpt2 gpt2_onnx/<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-b35baa">Check out the help for more options:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->optimum-cli <span class="hljs-built_in">export</span> onnx --<span class="hljs-built_in">help</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-q4y2mt">Check out the <a href="https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model" rel="nofollow">documentation</a> for more.</p> <h2 class="relative group"><a id="torchfx-integration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#torchfx-integration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>torch.fx integration</span></h2> <p data-svelte-h="svelte-1psut32">Optimum integrates with <code>torch.fx</code>, providing as a one-liner several graph transformations. We aim at supporting a better management of <a href="https://huggingface.co/docs/optimum/concept_guides/quantization" rel="nofollow">quantization</a> through <code>torch.fx</code>, both for quantization-aware training (QAT) and post-training quantization (PTQ).</p> <p data-svelte-h="svelte-p7b5of">Check out the <a href="https://huggingface.co/docs/optimum/torch_fx/usage_guides/optimization" rel="nofollow">documentation</a> and <a href="https://huggingface.co/docs/optimum/torch_fx/package_reference/optimization" rel="nofollow">reference</a> for more!</p> <p></p>
<script>
{
__sveltekit_mx90ez = {
assets: "/docs/optimum/pr_2430/en",
base: "/docs/optimum/pr_2430/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/optimum/pr_2430/en/_app/immutable/entry/start.7ca01d50.js"),
import("/docs/optimum/pr_2430/en/_app/immutable/entry/app.cd433aba.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 11],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
33.1 kB
·
Xet hash:
2aa824eb1c0712e68e68dd7eb3b64c46b49efe8825f6ff0bfbd487db3f2a29be

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.