Buckets:

hf-doc-build/doc-dev / transformers /pr_33892 /en /pipeline_tutorial.html
rtrm's picture
download
raw
63.9 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Pipeline&quot;,&quot;local&quot;:&quot;pipeline&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Tasks&quot;,&quot;local&quot;:&quot;tasks&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Parameters&quot;,&quot;local&quot;:&quot;parameters&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Device&quot;,&quot;local&quot;:&quot;device&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Batch inference&quot;,&quot;local&quot;:&quot;batch-inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Task-specific parameters&quot;,&quot;local&quot;:&quot;task-specific-parameters&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Chunk batching&quot;,&quot;local&quot;:&quot;chunk-batching&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Large datasets&quot;,&quot;local&quot;:&quot;large-datasets&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Large models&quot;,&quot;local&quot;:&quot;large-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/pr_33892/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/entry/start.b2c4257a.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/scheduler.31fdf58d.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/singletons.9860629f.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/index.252883d5.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/paths.e85c0ec8.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/entry/app.05ef1f97.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/preload-helper.40847a0e.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/index.2f76fdf0.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/nodes/0.ca4aafa4.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/nodes/517.90614deb.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/CopyLLMTxtMenu.ff482081.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.71f274cc.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/IconCopy.ac192424.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/Youtube.e3933a11.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/CodeBlock.ab12f8e1.js">
<link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/HfOption.fb051768.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Pipeline&quot;,&quot;local&quot;:&quot;pipeline&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Tasks&quot;,&quot;local&quot;:&quot;tasks&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Parameters&quot;,&quot;local&quot;:&quot;parameters&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Device&quot;,&quot;local&quot;:&quot;device&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Batch inference&quot;,&quot;local&quot;:&quot;batch-inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Task-specific parameters&quot;,&quot;local&quot;:&quot;task-specific-parameters&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Chunk batching&quot;,&quot;local&quot;:&quot;chunk-batching&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Large datasets&quot;,&quot;local&quot;:&quot;large-datasets&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Large models&quot;,&quot;local&quot;:&quot;large-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="pipeline" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pipeline"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Pipeline</span></h1> <p data-svelte-h="svelte-aax6g7">The <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> is a simple but powerful inference API that is readily available for a variety of machine learning tasks with any model from the Hugging Face <a href="https://hf.co/models" rel="nofollow">Hub</a>.</p> <p data-svelte-h="svelte-18a81ih">Tailor the <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> to your task with task specific parameters such as adding timestamps to an automatic speech recognition (ASR) pipeline for transcribing meeting notes. <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> supports GPUs, Apple Silicon, and half-precision weights to accelerate inference and save memory.</p> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/tiZFewofSLM" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-mz5wld">Transformers has two pipeline classes, a generic <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> and many individual task-specific pipelines like <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a> or <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.VisualQuestionAnsweringPipeline">VisualQuestionAnsweringPipeline</a>. Load these individual pipelines by setting the task identifier in the <code>task</code> parameter in <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a>. You can find the task identifier for each pipeline in their API documentation.</p> <p data-svelte-h="svelte-ms7obs">Each task is configured to use a default pretrained model and preprocessor, but this can be overridden with the <code>model</code> parameter if you want to use a different model.</p> <p data-svelte-h="svelte-1x885fu">For example, to use the <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a> with <a href="./model_doc/gemma2">Gemma 2</a>, set <code>task=&quot;text-generation&quot;</code> and <code>model=&quot;google/gemma-2-2b&quot;</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
pipeline = pipeline(task=<span class="hljs-string">&quot;text-generation&quot;</span>, model=<span class="hljs-string">&quot;google/gemma-2-2b&quot;</span>)
pipeline(<span class="hljs-string">&quot;the secret to baking a really good cake is &quot;</span>)
[{<span class="hljs-string">&#x27;generated_text&#x27;</span>: <span class="hljs-string">&#x27;the secret to baking a really good cake is 1. the right ingredients 2. the&#x27;</span>}]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ty30nx">When you have more than one input, pass them as a list.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator
device = Accelerator().device
pipeline = pipeline(task=<span class="hljs-string">&quot;text-generation&quot;</span>, model=<span class="hljs-string">&quot;google/gemma-2-2b&quot;</span>, device=device)
pipeline([<span class="hljs-string">&quot;the secret to baking a really good cake is &quot;</span>, <span class="hljs-string">&quot;a baguette is &quot;</span>])
[[{<span class="hljs-string">&#x27;generated_text&#x27;</span>: <span class="hljs-string">&#x27;the secret to baking a really good cake is 1. the right ingredients 2. the&#x27;</span>}],
[{<span class="hljs-string">&#x27;generated_text&#x27;</span>: <span class="hljs-string">&#x27;a baguette is 100% bread.\n\na baguette is 100%&#x27;</span>}]]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1xy5z46">This guide will introduce you to the <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a>, demonstrate its features, and show how to configure its various parameters.</p> <h2 class="relative group"><a id="tasks" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tasks"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Tasks</span></h2> <p data-svelte-h="svelte-oe3lkc"><a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> is compatible with many machine learning tasks across different modalities. Pass an appropriate input to the pipeline and it will handle the rest.</p> <p data-svelte-h="svelte-17oboj1">Here are some examples of how to use <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> for different tasks and modalities.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">summarization </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">automatic speech recognition </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">image classification </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">visual question answering </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
pipeline = pipeline(task=<span class="hljs-string">&quot;summarization&quot;</span>, model=<span class="hljs-string">&quot;google/pegasus-billsum&quot;</span>)
pipeline(<span class="hljs-string">&quot;Section was formerly set out as section 44 of this title. As originally enacted, this section contained two further provisions that &#x27;nothing in this act shall be construed as in any wise affecting the grant of lands made to the State of California by virtue of the act entitled &#x27;An act authorizing a grant to the State of California of the Yosemite Valley, and of the land&#x27; embracing the Mariposa Big-Tree Grove, approved June thirtieth, eighteen hundred and sixty-four; or as affecting any bona-fide entry of land made within the limits above described under any law of the United States prior to the approval of this act.&#x27; The first quoted provision was omitted from the Code because the land, granted to the state of California pursuant to the Act cite, was receded to the United States. Resolution June 11, 1906, No. 27, accepted the recession.&quot;</span>)
[{<span class="hljs-string">&#x27;summary_text&#x27;</span>: <span class="hljs-string">&#x27;Instructs the Secretary of the Interior to convey to the State of California all right, title, and interest of the United States in and to specified lands which are located within the Yosemite and Mariposa National Forests, California.&#x27;</span>}]<!-- HTML_TAG_END --></pre></div> </div> <h2 class="relative group"><a id="parameters" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#parameters"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Parameters</span></h2> <p data-svelte-h="svelte-ri92n8">At a minimum, <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> only requires a task identifier, model, and the appropriate input. But there are many parameters available to configure the pipeline with, from task-specific parameters to optimizing performance.</p> <p data-svelte-h="svelte-fh3bdc">This section introduces you to some of the more important parameters.</p> <h3 class="relative group"><a id="device" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#device"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Device</span></h3> <p data-svelte-h="svelte-lprch9"><a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> is compatible with many hardware types, including GPUs, CPUs, Apple Silicon, and more. Configure the hardware type with the <code>device</code> parameter. By default, <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> runs on a CPU which is given by <code>device=-1</code>.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">GPU </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Apple silicon </div></div> <div class="language-select"><p data-svelte-h="svelte-myh33d">To run <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> on a GPU, set <code>device</code> to the associated CUDA device id. For example, <code>device=0</code> runs on the first GPU.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
pipeline = pipeline(task=<span class="hljs-string">&quot;text-generation&quot;</span>, model=<span class="hljs-string">&quot;google/gemma-2-2b&quot;</span>, device=<span class="hljs-number">0</span>)
pipeline(<span class="hljs-string">&quot;the secret to baking a really good cake is &quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1xsajsn">You could also let <a href="https://hf.co/docs/accelerate/index" rel="nofollow">Accelerate</a>, a library for distributed training, automatically choose how to load and store the model weights on the appropriate device. This is especially useful if you have multiple devices. Accelerate loads and stores the model weights on the fastest device first, and then moves the weights to other devices (CPU, hard drive) as needed. Set <code>device_map=&quot;auto&quot;</code> to let Accelerate choose the device.</p> <blockquote class="tip"><p data-svelte-h="svelte-1hs6ufp">Make sure have <a href="https://hf.co/docs/accelerate/basic_tutorials/install" rel="nofollow">Accelerate</a> is installed.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->!pip install -U accelerate<!-- HTML_TAG_END --></pre></div></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
pipeline = pipeline(task=<span class="hljs-string">&quot;text-generation&quot;</span>, model=<span class="hljs-string">&quot;google/gemma-2-2b&quot;</span>, device_map=<span class="hljs-string">&quot;auto&quot;</span>)
pipeline(<span class="hljs-string">&quot;the secret to baking a really good cake is &quot;</span>)<!-- HTML_TAG_END --></pre></div> </div> <h3 class="relative group"><a id="batch-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#batch-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Batch inference</span></h3> <p data-svelte-h="svelte-188bw86"><a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> can also process batches of inputs with the <code>batch_size</code> parameter. Batch inference may improve speed, especially on a GPU, but it isn’t guaranteed. Other variables such as hardware, data, and the model itself can affect whether batch inference improves speed. For this reason, batch inference is disabled by default.</p> <p data-svelte-h="svelte-1mmmlyk">In the example below, when there are 4 inputs and <code>batch_size</code> is set to 2, <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> passes a batch of 2 inputs to the model at a time.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator
device = Accelerator().device
pipeline = pipeline(task=<span class="hljs-string">&quot;text-generation&quot;</span>, model=<span class="hljs-string">&quot;google/gemma-2-2b&quot;</span>, device=device, batch_size=<span class="hljs-number">2</span>)
pipeline([<span class="hljs-string">&quot;the secret to baking a really good cake is&quot;</span>, <span class="hljs-string">&quot;a baguette is&quot;</span>, <span class="hljs-string">&quot;paris is the&quot;</span>, <span class="hljs-string">&quot;hotdogs are&quot;</span>])
[[{<span class="hljs-string">&#x27;generated_text&#x27;</span>: <span class="hljs-string">&#x27;the secret to baking a really good cake is to use a good cake mix.\n\ni’&#x27;</span>}],
[{<span class="hljs-string">&#x27;generated_text&#x27;</span>: <span class="hljs-string">&#x27;a baguette is&#x27;</span>}],
[{<span class="hljs-string">&#x27;generated_text&#x27;</span>: <span class="hljs-string">&#x27;paris is the most beautiful city in the world.\n\ni’ve been to paris 3&#x27;</span>}],
[{<span class="hljs-string">&#x27;generated_text&#x27;</span>: <span class="hljs-string">&#x27;hotdogs are a staple of the american diet. they are a great source of protein and can&#x27;</span>}]]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ydr3us">Another good use case for batch inference is for streaming data in <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator
<span class="hljs-keyword">from</span> transformers.pipelines.pt_utils <span class="hljs-keyword">import</span> KeyDataset
<span class="hljs-keyword">import</span> datasets
device = Accelerator().device
<span class="hljs-comment"># KeyDataset is a utility that returns the item in the dict returned by the dataset</span>
dataset = datasets.load_dataset(<span class="hljs-string">&quot;imdb&quot;</span>, name=<span class="hljs-string">&quot;plain_text&quot;</span>, split=<span class="hljs-string">&quot;unsupervised&quot;</span>)
pipeline = pipeline(task=<span class="hljs-string">&quot;text-classification&quot;</span>, model=<span class="hljs-string">&quot;distilbert/distilbert-base-uncased-finetuned-sst-2-english&quot;</span>, device=device)
<span class="hljs-keyword">for</span> out <span class="hljs-keyword">in</span> pipeline(KeyDataset(dataset, <span class="hljs-string">&quot;text&quot;</span>), batch_size=<span class="hljs-number">8</span>, truncation=<span class="hljs-string">&quot;only_first&quot;</span>):
<span class="hljs-built_in">print</span>(out)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-c57m2q">Keep the following general rules of thumb in mind for determining whether batch inference can help improve performance.</p> <ol data-svelte-h="svelte-1kufr0d"><li>The only way to know for sure is to measure performance on your model, data, and hardware.</li> <li>Don’t batch inference if you’re constrained by latency (a live inference product for example).</li> <li>Don’t batch inference if you’re using a CPU.</li> <li>Don’t batch inference if you don’t know the <code>sequence_length</code> of your data. Measure performance, iteratively add to <code>sequence_length</code>, and include out-of-memory (OOM) checks to recover from failures.</li> <li>Do batch inference if your <code>sequence_length</code> is regular, and keep pushing it until you reach an OOM error. The larger the GPU, the more helpful batch inference is.</li> <li>Do make sure you can handle OOM errors if you decide to do batch inference.</li></ol> <h3 class="relative group"><a id="task-specific-parameters" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#task-specific-parameters"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Task-specific parameters</span></h3> <p data-svelte-h="svelte-xvi9xt"><a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> accepts any parameters that are supported by each individual task pipeline. Make sure to check out each individual task pipeline to see what type of parameters are available. If you can’t find a parameter that is useful for your use case, please feel free to open a GitHub <a href="https://github.com/huggingface/transformers/issues/new?assignees=&labels=feature&template=feature-request.yml" rel="nofollow">issue</a> to request it!</p> <p data-svelte-h="svelte-1r45mm4">The examples below demonstrate some of the task-specific parameters available.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">automatic speech recognition </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">text generation </div></div> <div class="language-select"><p data-svelte-h="svelte-1n1wttu">Pass the <code>return_timestamps=&quot;word&quot;</code> parameter to <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> to return when each word was spoken.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
pipeline = pipeline(task=<span class="hljs-string">&quot;automatic-speech-recognition&quot;</span>, model=<span class="hljs-string">&quot;openai/whisper-large-v3&quot;</span>)
pipeline(audio=<span class="hljs-string">&quot;https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac&quot;</span>, return_timestamp=<span class="hljs-string">&quot;word&quot;</span>)
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; I have a dream that one day this nation will rise up and live out the true meaning of its creed.&#x27;</span>,
<span class="hljs-string">&#x27;chunks&#x27;</span>: [{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; I&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">0.0</span>, <span class="hljs-number">1.1</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; have&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">1.1</span>, <span class="hljs-number">1.44</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; a&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">1.44</span>, <span class="hljs-number">1.62</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; dream&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">1.62</span>, <span class="hljs-number">1.92</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; that&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">1.92</span>, <span class="hljs-number">3.7</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; one&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">3.7</span>, <span class="hljs-number">3.88</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; day&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">3.88</span>, <span class="hljs-number">4.24</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; this&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">4.24</span>, <span class="hljs-number">5.82</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; nation&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">5.82</span>, <span class="hljs-number">6.78</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; will&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">6.78</span>, <span class="hljs-number">7.36</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; rise&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">7.36</span>, <span class="hljs-number">7.88</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; up&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">7.88</span>, <span class="hljs-number">8.46</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; and&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">8.46</span>, <span class="hljs-number">9.2</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; live&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">9.2</span>, <span class="hljs-number">10.34</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; out&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">10.34</span>, <span class="hljs-number">10.58</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; the&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">10.58</span>, <span class="hljs-number">10.8</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; true&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">10.8</span>, <span class="hljs-number">11.04</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; meaning&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">11.04</span>, <span class="hljs-number">11.4</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; of&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">11.4</span>, <span class="hljs-number">11.64</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; its&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">11.64</span>, <span class="hljs-number">11.8</span>)},
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; creed.&#x27;</span>, <span class="hljs-string">&#x27;timestamp&#x27;</span>: (<span class="hljs-number">11.8</span>, <span class="hljs-number">12.3</span>)}]}<!-- HTML_TAG_END --></pre></div> </div> <h2 class="relative group"><a id="chunk-batching" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#chunk-batching"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Chunk batching</span></h2> <p data-svelte-h="svelte-atfdfd">There are some instances where you need to process data in chunks.</p> <ul data-svelte-h="svelte-bybore"><li>for some data types, a single input (for example, a really long audio file) may need to be chunked into multiple parts before it can be processed</li> <li>for some tasks, like zero-shot classification or question answering, a single input may need multiple forward passes which can cause issues with the <code>batch_size</code> parameter</li></ul> <p data-svelte-h="svelte-1fz22pv">The <a href="https://github.com/huggingface/transformers/blob/99e0ab6ed888136ea4877c6d8ab03690a1478363/src/transformers/pipelines/base.py#L1387" rel="nofollow">ChunkPipeline</a> class is designed to handle these use cases. Both pipeline classes are used in the same way, but since <a href="https://github.com/huggingface/transformers/blob/99e0ab6ed888136ea4877c6d8ab03690a1478363/src/transformers/pipelines/base.py#L1387" rel="nofollow">ChunkPipeline</a> can automatically handle batching, you don’t need to worry about the number of forward passes your inputs trigger. Instead, you can optimize <code>batch_size</code> independently of the inputs.</p> <p data-svelte-h="svelte-1yw2l4b">The example below shows how it differs from <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># ChunkPipeline</span>
all_model_outputs = []
<span class="hljs-keyword">for</span> preprocessed <span class="hljs-keyword">in</span> pipeline.preprocess(inputs):
model_outputs = pipeline.model_forward(preprocessed)
all_model_outputs.append(model_outputs)
outputs =pipeline.postprocess(all_model_outputs)
<span class="hljs-comment"># Pipeline</span>
preprocessed = pipeline.preprocess(inputs)
model_outputs = pipeline.forward(preprocessed)
outputs = pipeline.postprocess(model_outputs)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="large-datasets" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#large-datasets"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Large datasets</span></h2> <p data-svelte-h="svelte-1qel1t2">For inference with large datasets, you can iterate directly over the dataset itself. This avoids immediately allocating memory for the entire dataset, and you don’t need to worry about creating batches yourself. Try <a href="#batch-inference">Batch inference</a> with the <code>batch_size</code> parameter to see if it improves performance.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers.pipelines.pt_utils <span class="hljs-keyword">import</span> KeyDataset
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator
<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
device = Accelerator().device
dataset = datasets.load_dataset(<span class="hljs-string">&quot;imdb&quot;</span>, name=<span class="hljs-string">&quot;plain_text&quot;</span>, split=<span class="hljs-string">&quot;unsupervised&quot;</span>)
pipeline = pipeline(task=<span class="hljs-string">&quot;text-classification&quot;</span>, model=<span class="hljs-string">&quot;distilbert/distilbert-base-uncased-finetuned-sst-2-english&quot;</span>, device=device)
<span class="hljs-keyword">for</span> out <span class="hljs-keyword">in</span> pipeline(KeyDataset(dataset, <span class="hljs-string">&quot;text&quot;</span>), batch_size=<span class="hljs-number">8</span>, truncation=<span class="hljs-string">&quot;only_first&quot;</span>):
<span class="hljs-built_in">print</span>(out)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mrmkfq">Other ways to run inference on large datasets with <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> include using an iterator or generator.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">data</span>():
<span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">1000</span>):
<span class="hljs-keyword">yield</span> <span class="hljs-string">f&quot;My example <span class="hljs-subst">{i}</span>&quot;</span>
pipeline = pipeline(model=<span class="hljs-string">&quot;openai-community/gpt2&quot;</span>, device=<span class="hljs-number">0</span>)
generated_characters = <span class="hljs-number">0</span>
<span class="hljs-keyword">for</span> out <span class="hljs-keyword">in</span> pipeline(data()):
generated_characters += <span class="hljs-built_in">len</span>(out[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;generated_text&quot;</span>])<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="large-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#large-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Large models</span></h2> <p data-svelte-h="svelte-17i633y"><a href="https://hf.co/docs/accelerate/index" rel="nofollow">Accelerate</a> enables a couple of optimizations for running large models with <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a>. Make sure Accelerate is installed first.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->!pip install -U accelerate<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ldjqsx">The <code>device_map=&quot;auto&quot;</code> setting is useful for automatically distributing the model across the fastest devices (GPUs) first before dispatching to other slower devices if available (CPU, hard drive).</p> <p data-svelte-h="svelte-86xx2m"><a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> supports half-precision weights (torch.float16), which can be significantly faster and save memory. Performance loss is negligible for most models, especially for larger ones. If your hardware supports it, you can enable torch.bfloat16 instead for more range.</p> <blockquote class="tip" data-svelte-h="svelte-2fnk2n"><p>Inputs are internally converted to torch.float16 and it only works for models with a PyTorch backend.</p></blockquote> <p data-svelte-h="svelte-1g7knub">Lastly, <a href="/docs/transformers/pr_33892/en/main_classes/pipelines#transformers.Pipeline">Pipeline</a> also accepts quantized models to reduce memory usage even further. Make sure you have the <a href="https://hf.co/docs/bitsandbytes/installation" rel="nofollow">bitsandbytes</a> library installed first, and then add <code>quantization_config</code> to <code>model_kwargs</code> in the pipeline.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline, BitsAndBytesConfig
pipeline = pipeline(model=<span class="hljs-string">&quot;google/gemma-7b&quot;</span>, dtype=torch.bfloat16, device_map=<span class="hljs-string">&quot;auto&quot;</span>, model_kwargs={<span class="hljs-string">&quot;quantization_config&quot;</span>: BitsAndBytesConfig(load_in_8bit=<span class="hljs-literal">True</span>)})
pipeline(<span class="hljs-string">&quot;the secret to baking a good cake is &quot;</span>)
[{<span class="hljs-string">&#x27;generated_text&#x27;</span>: <span class="hljs-string">&#x27;the secret to baking a good cake is 1. the right ingredients 2. the right&#x27;</span>}]<!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/en/pipeline_tutorial.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_16tnnm8 = {
assets: "/docs/transformers/pr_33892/en",
base: "/docs/transformers/pr_33892/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/pr_33892/en/_app/immutable/entry/start.b2c4257a.js"),
import("/docs/transformers/pr_33892/en/_app/immutable/entry/app.05ef1f97.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 517],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
63.9 kB
·
Xet hash:
b804662a083a02448b4f97221c1728294e37b390df6b1c594e699ff950ee0462

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.