Buckets:

hf-doc-build/doc-dev / transformers /main /it /perf_infer_cpu.html
rtrm's picture
download
raw
15.7 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Inferenza Efficiente su CPU&quot;,&quot;local&quot;:&quot;inferenza-efficiente-su-cpu&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;BetterTransformer per inferenza più rapida&quot;,&quot;local&quot;:&quot;bettertransformer-per-inferenza-più-rapida&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;PyTorch JIT-mode (TorchScript)&quot;,&quot;local&quot;:&quot;pytorch-jit-mode-torchscript&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;IPEX Graph Optimization con JIT-mode&quot;,&quot;local&quot;:&quot;ipex-graph-optimization-con-jit-mode&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Installazione di IPEX&quot;,&quot;local&quot;:&quot;installazione-di-ipex&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Utilizzo del JIT-mode&quot;,&quot;local&quot;:&quot;utilizzo-del-jit-mode&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/main/it/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/main/it/_app/immutable/entry/start.dac10038.js">
<link rel="modulepreload" href="/docs/transformers/main/it/_app/immutable/chunks/scheduler.36a0863c.js">
<link rel="modulepreload" href="/docs/transformers/main/it/_app/immutable/chunks/singletons.d5d832bc.js">
<link rel="modulepreload" href="/docs/transformers/main/it/_app/immutable/chunks/index.733708bb.js">
<link rel="modulepreload" href="/docs/transformers/main/it/_app/immutable/chunks/paths.d03c7e87.js">
<link rel="modulepreload" href="/docs/transformers/main/it/_app/immutable/entry/app.354f2351.js">
<link rel="modulepreload" href="/docs/transformers/main/it/_app/immutable/chunks/index.9c13489a.js">
<link rel="modulepreload" href="/docs/transformers/main/it/_app/immutable/nodes/0.b7cdb3fe.js">
<link rel="modulepreload" href="/docs/transformers/main/it/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/main/it/_app/immutable/nodes/18.b1b8c3aa.js">
<link rel="modulepreload" href="/docs/transformers/main/it/_app/immutable/chunks/Tip.3b06990e.js">
<link rel="modulepreload" href="/docs/transformers/main/it/_app/immutable/chunks/EditOnGithub.e88f2b7b.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Inferenza Efficiente su CPU&quot;,&quot;local&quot;:&quot;inferenza-efficiente-su-cpu&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;BetterTransformer per inferenza più rapida&quot;,&quot;local&quot;:&quot;bettertransformer-per-inferenza-più-rapida&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;PyTorch JIT-mode (TorchScript)&quot;,&quot;local&quot;:&quot;pytorch-jit-mode-torchscript&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;IPEX Graph Optimization con JIT-mode&quot;,&quot;local&quot;:&quot;ipex-graph-optimization-con-jit-mode&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Installazione di IPEX&quot;,&quot;local&quot;:&quot;installazione-di-ipex&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Utilizzo del JIT-mode&quot;,&quot;local&quot;:&quot;utilizzo-del-jit-mode&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="inferenza-efficiente-su-cpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inferenza-efficiente-su-cpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inferenza Efficiente su CPU</span></h1> <p data-svelte-h="svelte-qu0j64">Questa guida si concentra sull’inferenza di modelli di grandi dimensioni in modo efficiente sulla CPU.</p> <h2 class="relative group"><a id="bettertransformer-per-inferenza-più-rapida" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bettertransformer-per-inferenza-più-rapida"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>BetterTransformer per inferenza più rapida</span></h2> <p data-svelte-h="svelte-67xobp">Abbiamo integrato di recente <code>BetterTransformer</code> per fare inferenza più rapidamente con modelli per testi, immagini e audio. Visualizza la documentazione sull’integrazione <a href="https://huggingface.co/docs/optimum/bettertransformer/overview" rel="nofollow">qui</a> per maggiori dettagli.</p> <h2 class="relative group"><a id="pytorch-jit-mode-torchscript" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pytorch-jit-mode-torchscript"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PyTorch JIT-mode (TorchScript)</span></h2> <p data-svelte-h="svelte-3muwsh">TorchScript è un modo di creare modelli serializzabili e ottimizzabili da codice PyTorch. Ogni programmma TorchScript può esere salvato da un processo Python e caricato in un processo dove non ci sono dipendenze Python.
Comparandolo con l’eager mode di default, jit mode in PyTorch normalmente fornisce prestazioni migliori per l’inferenza del modello da parte di metodologie di ottimizzazione come la operator fusion.</p> <p data-svelte-h="svelte-6flq5m">Per una prima introduzione a TorchScript, vedi la Introduction to <a href="https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html#tracing-modules" rel="nofollow">PyTorch TorchScript tutorial</a>.</p> <h3 class="relative group"><a id="ipex-graph-optimization-con-jit-mode" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#ipex-graph-optimization-con-jit-mode"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>IPEX Graph Optimization con JIT-mode</span></h3> <p data-svelte-h="svelte-1p2d5pw">Intel® Extension per PyTorch fornnisce ulteriori ottimizzazioni in jit mode per i modelli della serie Transformers. Consigliamo vivamente agli utenti di usufruire dei vantaggi di Intel® Extension per PyTorch con jit mode. Alcuni operator patterns usati fequentemente dai modelli Transformers models sono già supportati in Intel® Extension per PyTorch con jit mode fusions. Questi fusion patterns come Multi-head-attention fusion, Concat Linear, Linear+Add, Linear+Gelu, Add+LayerNorm fusion and etc. sono abilitati e hanno buone performance. I benefici della fusion è fornito agli utenti in modo trasparente. In base alle analisi, il ~70% dei problemi più popolari in NLP question-answering, text-classification, and token-classification possono avere benefici sulle performance grazie ai fusion patterns sia per Float32 precision che per BFloat16 Mixed precision.</p> <p data-svelte-h="svelte-1e9vei1">Vedi maggiori informazioni per <a href="https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/features/graph_optimization.html" rel="nofollow">IPEX Graph Optimization</a>.</p> <h4 class="relative group"><a id="installazione-di-ipex" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#installazione-di-ipex"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Installazione di IPEX</span></h4> <p data-svelte-h="svelte-1cbxjdx">I rilasci di IPEX seguono PyTorch, verifica i vari approcci per <a href="https://intel.github.io/intel-extension-for-pytorch/" rel="nofollow">IPEX installation</a>.</p> <h3 class="relative group"><a id="utilizzo-del-jit-mode" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#utilizzo-del-jit-mode"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Utilizzo del JIT-mode</span></h3> <p data-svelte-h="svelte-y6f70">Per abilitare JIT-mode in Trainer per evaluation e prediction, devi aggiungere <code>jit_mode_eval</code> negli argomenti di Trainer.</p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-1ukl5we">per PyTorch &gt;= 1.14.0. JIT-mode potrebe giovare a qualsiasi modello di prediction e evaluaion visto che il dict input è supportato in jit.trace</p> <p data-svelte-h="svelte-1ajwizk">per PyTorch &lt; 1.14.0. JIT-mode potrebbe giovare ai modelli il cui ordine dei parametri corrisponde all’ordine delle tuple in ingresso in jit.trace, come i modelli per question-answering.
Nel caso in cui l’ordine dei parametri seguenti non corrisponda all’ordine delle tuple in ingresso in jit.trace, come nei modelli di text-classification, jit.trace fallirà e lo cattureremo con una eccezione al fine di renderlo un fallback. Il logging è usato per notificare gli utenti.</p></div> <p data-svelte-h="svelte-ibdywp">Trovi un esempo con caso d’uso in <a href="https://github.com/huggingface/transformers/tree/main/examples/pytorch/question-answering" rel="nofollow">Transformers question-answering</a></p> <ul data-svelte-h="svelte-xwyqaf"><li>Inference using jit mode on CPU:</li></ul> <pre data-svelte-h="svelte-gw81i0">python run_qa.py \
--model_name_or_path csarron/bert-base-uncased-squad-v1 \
--dataset_name squad \
--do_eval \
--max_seq_length 384 \
--doc_stride 128 \
--output_dir /tmp/ \
--no_cuda \
<b>--jit_mode_eval </b></pre> <ul data-svelte-h="svelte-prijpn"><li>Inference with IPEX using jit mode on CPU:</li></ul> <pre data-svelte-h="svelte-18ss6jb">python run_qa.py \
--model_name_or_path csarron/bert-base-uncased-squad-v1 \
--dataset_name squad \
--do_eval \
--max_seq_length 384 \
--doc_stride 128 \
--output_dir /tmp/ \
--no_cuda \
<b>--use_ipex \</b>
<b>--jit_mode_eval</b></pre> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/it/perf_infer_cpu.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1h8xam7 = {
assets: "/docs/transformers/main/it",
base: "/docs/transformers/main/it",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/main/it/_app/immutable/entry/start.dac10038.js"),
import("/docs/transformers/main/it/_app/immutable/entry/app.354f2351.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 18],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
15.7 kB
·
Xet hash:
617e22601ec01bc4217569cde2762f135afe606f6a6580e598b2864abff3fb73

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.