Buckets:

HuggingFaceDocBuilder's picture
download
raw
29.8 kB
import{s as jl,o as vl,n as Bt}from"../chunks/scheduler.b9285784.js";import{S as gl,i as Jl,e as p,s as a,c as m,h as Ul,a as o,d as l,b as n,f as $l,g as u,j as r,k as Tl,l as Cl,m as s,n as f,t as d,o as h,p as y,q as kl,r as xl}from"../chunks/index.26bc89a1.js";import{T as Ce}from"../chunks/Tip.e4eba3d6.js";import{C as _l,H as Gt,E as Zl}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.7a0ae628.js";import{C as $}from"../chunks/CodeBlock.844ff9c3.js";function Bl(T){let i,w=`This is only needed when trying to perform an action such as gathering the results, where the data on each device
needs to be the same length. Basic inference does not require this.`;return{c(){i=p("p"),i.textContent=w},l(c){i=o(c,"P",{"data-svelte-h":!0}),r(i)!=="svelte-rtowy2"&&(i.textContent=w)},m(c,M){s(c,i,M)},p:Bt,d(c){c&&l(i)}}}function Gl(T){let i;return{c(){i=kl(`However you make this example will determine the relative batch size that will be used/passed
through the model at a given time, so make sure to remember how many items there are!`)},l(w){i=xl(w,`However you make this example will determine the relative batch size that will be used/passed
through the model at a given time, so make sure to remember how many items there are!`)},m(w,c){s(w,i,c)},d(w){w&&l(i)}}}function Wl(T){let i,w="There are a variety of parameters you can pass through to <code>prepare_pippy</code>:",c,M,v="<li><p><code>split_points</code> lets you determine what layers to split the model at. By default we use wherever <code>device_map=&quot;auto&quot; declares, such as </code>fc<code>or</code>conv1`.</p></li> <li><p><code>num_chunks</code> determines how the batch will be split and sent to the model itself (so <code>num_chunks=1</code> with four split points/four GPUs will have a naive MP where a single input gets passed between the four layer split points)</p></li>";return{c(){i=p("p"),i.innerHTML=w,c=a(),M=p("ul"),M.innerHTML=v},l(b){i=o(b,"P",{"data-svelte-h":!0}),r(i)!=="svelte-123k9fd"&&(i.innerHTML=w),c=n(b),M=o(b,"UL",{"data-svelte-h":!0}),r(M)!=="svelte-1bax0io"&&(M.innerHTML=v)},m(b,j){s(b,i,j),s(b,c,j),s(b,M,j)},p:Bt,d(b){b&&(l(i),l(c),l(M))}}}function Hl(T){let i,w="When passing inputs, we highly recommend to pass them in as a tuple of arguments. Using <code>kwargs</code> is supported, however, this approach is experimental.";return{c(){i=p("p"),i.innerHTML=w},l(c){i=o(c,"P",{"data-svelte-h":!0}),r(i)!=="svelte-1cjitly"&&(i.innerHTML=w)},m(c,M){s(c,i,M)},p:Bt,d(c){c&&l(i)}}}function Xl(T){let i,w=`If you pass in <code>gather_output=True</code> to <a href="/docs/accelerate/pr_4021/en/package_reference/inference#accelerate.prepare_pippy">inference.prepare_pippy()</a>, the output will be sent
across to all the GPUs afterwards without needing the <code>is_last_process</code> check. This is
<code>False</code> by default as it incurs a communication call.`;return{c(){i=p("p"),i.innerHTML=w},l(c){i=o(c,"P",{"data-svelte-h":!0}),r(i)!=="svelte-fqz35p"&&(i.innerHTML=w)},m(c,M){s(c,i,M)},p:Bt,d(c){c&&l(i)}}}function Il(T){let i,w,c,M,v,b,j,ke,x,Wt="Distributed inference can fall into three brackets:",xe,_,Ht="<li>Loading an entire model onto each GPU and sending chunks of a batch through each GPU’s model copy at a time</li> <li>Loading parts of a model onto each GPU and processing a single input at one time</li> <li>Loading parts of a model onto each GPU and using what is called scheduled Pipeline Parallelism to combine the two prior techniques.</li>",_e,Z,Xt="We’re going to go through the first and the last bracket, showcasing how to do each as they are more realistic scenarios.",Ze,B,Be,G,It="This is the most memory-intensive solution, as it requires each GPU to keep a full copy of the model in memory at a given time.",Ge,W,Nt="Normally when doing this, users send the model to a specific device to load it from the CPU, and then move each prompt to a different device.",We,H,Pt="A basic pipeline using the <code>diffusers</code> library might look something like so:",He,X,Xe,I,St="Followed then by performing inference based on the specific prompt:",Ie,N,Ne,P,Rt="One will notice how we have to check the rank to know what prompt to send, which can be a bit tedious.",Pe,S,Yt=`A user might then also think that with Accelerate, using the <code>Accelerator</code> to prepare a dataloader for such a task might also be
a simple way to manage this. (To learn more, check out the relevant section in the <a href="../quicktour#distributed-evaluation">Quick Tour</a>)`,Se,R,zt="Can it manage it? Yes. Does it add unneeded extra code however: also yes.",Re,Y,Et=`With Accelerate, we can simplify this process by using the <a href="/docs/accelerate/pr_4021/en/package_reference/accelerator#accelerate.Accelerator.split_between_processes">Accelerator.split_between_processes()</a> context manager (which also exists in <code>PartialState</code> and <code>AcceleratorState</code>).
This function will automatically split whatever data you pass to it (be it a prompt, a set of tensors, a dictionary of the prior data, etc.) across all the processes (with a potential
to be padded) for you to use right away.`,Ye,z,Ft="Let’s rewrite the above example using this context manager:",ze,E,Ee,F,Lt="And then to launch the code, we can use the Accelerate:",Fe,L,Qt="If you have generated a config file to be used using <code>accelerate config</code>:",Le,Q,Qe,A,At="If you have a specific config file you want to use:",Ae,V,Ve,q,Vt="Or if don’t want to make any config files and launch on two GPUs:",qe,D,qt="<p>Note: You will get some warnings about values being guessed based on your system. To remove these you can do <code>accelerate config default</code> or go through <code>accelerate config</code> to create a config file.</p>",De,K,Ke,O,Dt="We’ve now reduced the boilerplate code needed to split this data to a few lines of code quite easily.",Oe,ee,Kt="But what if we have an odd distribution of prompts to GPUs? For example, what if we have 3 prompts, but only 2 GPUs?",et,te,Ot=`Under the context manager, the first GPU would receive the first two prompts and the second GPU the third, ensuring that
all prompts are split and no overhead is needed.`,tt,le,el=`<em>However</em>, what if we then wanted to do something with the results of <em>all the GPUs</em>? (Say gather them all and perform some kind of post processing)
You can pass in <code>apply_padding=True</code> to ensure that the lists of prompts are padded to the same length, with extra data being taken
from the last sample. This way all GPUs will have the same number of prompts, and you can then gather the results.`,lt,g,st,se,tl="For instance:",at,ae,nt,ne,ll=`On the first GPU, the prompts will be <code>[&quot;a dog&quot;, &quot;a cat&quot;]</code>, and on the second GPU it will be <code>[&quot;a chicken&quot;, &quot;a chicken&quot;]</code>.
Make sure to drop the final sample, as it will be a duplicate of the previous one.`,it,ie,sl='You can find more complex examples <a href="https://github.com/huggingface/accelerate/tree/main/examples/inference/distributed" rel="nofollow">here</a> such as how to use it with LLMs.',pt,pe,ot,oe,al='This next part will discuss using <em>pipeline parallelism</em>. This is an <strong>experimental</strong> API that utilizes <a href="https://pytorch.org/docs/stable/distributed.pipelining.html#" rel="nofollow">torch.distributed.pipelining</a> as a native solution.',rt,re,nl="The general idea with pipeline parallelism is: say you have 4 GPUs and a model big enough it can be <em>split</em> on four GPUs using <code>device_map=&quot;auto&quot;</code>. With this method you can send in 4 inputs at a time (for example here, any amount works) and each model chunk will work on an input, then receive the next input once the prior chunk finished, making it <em>much</em> more efficient <strong>and faster</strong> than the method described earlier. Here’s a visual taken from the PyTorch repository:",ct,ce,il='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/accelerate/pipeline_parallel.png" alt="Pipeline parallelism example"/>',mt,me,pl='To illustrate how you can use this with Accelerate, we have created an <a href="https://github.com/huggingface/accelerate/tree/main/examples/inference" rel="nofollow">example zoo</a> showcasing a number of different models and situations. In this tutorial, we’ll show this method for GPT2 across two GPUs.',ut,ue,ol="Before you proceed, please make sure you have the latest PyTorch version installed by running the following:",ft,fe,dt,de,rl="Start by creating the model on the CPU:",ht,he,yt,ye,cl="Next you’ll need to create some example inputs to use. These help <code>torch.distributed.pipelining</code> trace the model.",wt,J,Mt,we,bt,Me,ml='Next we need to actually perform the tracing and get the model ready. To do so, use the <a href="/docs/accelerate/pr_4021/en/package_reference/inference#accelerate.prepare_pippy">inference.prepare_pippy()</a> function and it will fully wrap the model for pipeline parallelism automatically:',$t,be,Tt,U,jt,$e,ul="From here, all that’s left is to actually perform the distributed inference!",vt,C,gt,Te,Jt,je,fl="When finished all the data will be on the last process only:",Ut,ve,Ct,k,kt,ge,dl='And that’s it! To explore more, please check out the inference examples in the <a href="https://github.com/huggingface/accelerate/tree/main/examples/inference/pippy" rel="nofollow">Accelerate repo</a> and our <a href="../package_reference/inference">documentation</a> as we work to improving this integration.',xt,Je,_t,Ue,Zt;return v=new _l({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),j=new Gt({props:{title:"Distributed inference",local:"distributed-inference",headingTag:"h1"}}),B=new Gt({props:{title:"Sending chunks of a batch automatically to each loaded model",local:"sending-chunks-of-a-batch-automatically-to-each-loaded-model",headingTag:"h2"}}),X=new $({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjB0b3JjaC5kaXN0cmlidXRlZCUyMGFzJTIwZGlzdCUwQWZyb20lMjBkaWZmdXNlcnMlMjBpbXBvcnQlMjBEaWZmdXNpb25QaXBlbGluZSUwQSUwQXBpcGUlMjAlM0QlMjBEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTIycnVud2F5bWwlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYp",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> torch.distributed <span class="hljs-keyword">as</span> dist
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline
pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>, torch_dtype=torch.float16)`,wrap:!1}}),N=new $({props:{code:"ZGVmJTIwcnVuX2luZmVyZW5jZShyYW5rJTJDJTIwd29ybGRfc2l6ZSklM0ElMEElMjAlMjAlMjAlMjBkaXN0LmluaXRfcHJvY2Vzc19ncm91cCglMjJuY2NsJTIyJTJDJTIwcmFuayUzRHJhbmslMkMlMjB3b3JsZF9zaXplJTNEd29ybGRfc2l6ZSklMEElMjAlMjAlMjAlMjBwaXBlLnRvKHJhbmspJTBBJTBBJTIwJTIwJTIwJTIwaWYlMjB0b3JjaC5kaXN0cmlidXRlZC5nZXRfcmFuaygpJTIwJTNEJTNEJTIwMCUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHByb21wdCUyMCUzRCUyMCUyMmElMjBkb2clMjIlMEElMjAlMjAlMjAlMjBlbGlmJTIwdG9yY2guZGlzdHJpYnV0ZWQuZ2V0X3JhbmsoKSUyMCUzRCUzRCUyMDElM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHQlMjAlM0QlMjAlMjJhJTIwY2F0JTIyJTBBJTBBJTIwJTIwJTIwJTIwcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQpLmltYWdlcyU1QjAlNUQlMEElMjAlMjAlMjAlMjByZXN1bHQuc2F2ZShmJTIycmVzdWx0XyU3QnJhbmslN0QucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">run_inference</span>(<span class="hljs-params">rank, world_size</span>):
dist.init_process_group(<span class="hljs-string">&quot;nccl&quot;</span>, rank=rank, world_size=world_size)
pipe.to(rank)
<span class="hljs-keyword">if</span> torch.distributed.get_rank() == <span class="hljs-number">0</span>:
prompt = <span class="hljs-string">&quot;a dog&quot;</span>
<span class="hljs-keyword">elif</span> torch.distributed.get_rank() == <span class="hljs-number">1</span>:
prompt = <span class="hljs-string">&quot;a cat&quot;</span>
result = pipe(prompt).images[<span class="hljs-number">0</span>]
result.save(<span class="hljs-string">f&quot;result_<span class="hljs-subst">{rank}</span>.png&quot;</span>)`,wrap:!1}}),E=new $({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMFBhcnRpYWxTdGF0ZSUyMCUyMCUyMyUyMENhbiUyMGFsc28lMjBiZSUyMEFjY2VsZXJhdG9yJTIwb3IlMjBBY2NlbGVyYXRvclN0YXRlJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMERpZmZ1c2lvblBpcGVsaW5lJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJydW53YXltbCUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiklMEFkaXN0cmlidXRlZF9zdGF0ZSUyMCUzRCUyMFBhcnRpYWxTdGF0ZSgpJTBBcGlwZS50byhkaXN0cmlidXRlZF9zdGF0ZS5kZXZpY2UpJTBBJTBBJTIzJTIwQXNzdW1lJTIwdHdvJTIwcHJvY2Vzc2VzJTBBd2l0aCUyMGRpc3RyaWJ1dGVkX3N0YXRlLnNwbGl0X2JldHdlZW5fcHJvY2Vzc2VzKCU1QiUyMmElMjBkb2clMjIlMkMlMjAlMjJhJTIwY2F0JTIyJTVEKSUyMGFzJTIwcHJvbXB0JTNBJTBBJTIwJTIwJTIwJTIwcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQpLmltYWdlcyU1QjAlNUQlMEElMjAlMjAlMjAlMjByZXN1bHQuc2F2ZShmJTIycmVzdWx0XyU3QmRpc3RyaWJ1dGVkX3N0YXRlLnByb2Nlc3NfaW5kZXglN0QucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> PartialState <span class="hljs-comment"># Can also be Accelerator or AcceleratorState</span>
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline
pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>, torch_dtype=torch.float16)
distributed_state = PartialState()
pipe.to(distributed_state.device)
<span class="hljs-comment"># Assume two processes</span>
<span class="hljs-keyword">with</span> distributed_state.split_between_processes([<span class="hljs-string">&quot;a dog&quot;</span>, <span class="hljs-string">&quot;a cat&quot;</span>]) <span class="hljs-keyword">as</span> prompt:
result = pipe(prompt).images[<span class="hljs-number">0</span>]
result.save(<span class="hljs-string">f&quot;result_<span class="hljs-subst">{distributed_state.process_index}</span>.png&quot;</span>)`,wrap:!1}}),Q=new $({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMGRpc3RyaWJ1dGVkX2luZmVyZW5jZS5weQ==",highlighted:"accelerate launch distributed_inference.py",wrap:!1}}),V=new $({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMC0tY29uZmlnX2ZpbGUlMjBteV9jb25maWcuanNvbiUyMGRpc3RyaWJ1dGVkX2luZmVyZW5jZS5weQ==",highlighted:"accelerate launch --config_file my_config.json distributed_inference.py",wrap:!1}}),K=new $({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMC0tbnVtX3Byb2Nlc3NlcyUyMDIlMjBkaXN0cmlidXRlZF9pbmZlcmVuY2UucHk=",highlighted:"accelerate launch --num_processes 2 distributed_inference.py",wrap:!1}}),g=new Ce({props:{$$slots:{default:[Bl]},$$scope:{ctx:T}}}),ae=new $({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMFBhcnRpYWxTdGF0ZSUyMCUyMCUyMyUyMENhbiUyMGFsc28lMjBiZSUyMEFjY2VsZXJhdG9yJTIwb3IlMjBBY2NlbGVyYXRvclN0YXRlJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMERpZmZ1c2lvblBpcGVsaW5lJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJydW53YXltbCUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiklMEFkaXN0cmlidXRlZF9zdGF0ZSUyMCUzRCUyMFBhcnRpYWxTdGF0ZSgpJTBBcGlwZS50byhkaXN0cmlidXRlZF9zdGF0ZS5kZXZpY2UpJTBBJTBBJTIzJTIwQXNzdW1lJTIwdHdvJTIwcHJvY2Vzc2VzJTBBd2l0aCUyMGRpc3RyaWJ1dGVkX3N0YXRlLnNwbGl0X2JldHdlZW5fcHJvY2Vzc2VzKCU1QiUyMmElMjBkb2clMjIlMkMlMjAlMjJhJTIwY2F0JTIyJTJDJTIwJTIyYSUyMGNoaWNrZW4lMjIlNUQlMkMlMjBhcHBseV9wYWRkaW5nJTNEVHJ1ZSklMjBhcyUyMHByb21wdCUzQSUwQSUyMCUyMCUyMCUyMHJlc3VsdCUyMCUzRCUyMHBpcGUocHJvbXB0KS5pbWFnZXM=",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> PartialState <span class="hljs-comment"># Can also be Accelerator or AcceleratorState</span>
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline
pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>, torch_dtype=torch.float16)
distributed_state = PartialState()
pipe.to(distributed_state.device)
<span class="hljs-comment"># Assume two processes</span>
<span class="hljs-keyword">with</span> distributed_state.split_between_processes([<span class="hljs-string">&quot;a dog&quot;</span>, <span class="hljs-string">&quot;a cat&quot;</span>, <span class="hljs-string">&quot;a chicken&quot;</span>], apply_padding=<span class="hljs-literal">True</span>) <span class="hljs-keyword">as</span> prompt:
result = pipe(prompt).images`,wrap:!1}}),pe=new Gt({props:{title:"Memory-efficient pipeline parallelism (experimental)",local:"memory-efficient-pipeline-parallelism-experimental",headingTag:"h2"}}),fe=new $({props:{code:"cGlwJTIwaW5zdGFsbCUyMHRvcmNo",highlighted:"pip install torch",wrap:!1}}),he=new $({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEdQVDJGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uJTJDJTIwR1BUMkNvbmZpZyUwQSUwQWNvbmZpZyUyMCUzRCUyMEdQVDJDb25maWcoKSUwQW1vZGVsJTIwJTNEJTIwR1BUMkZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24oY29uZmlnKSUwQW1vZGVsLmV2YWwoKQ==",highlighted:`from transformers <span class="hljs-keyword">import</span> GPT2ForSequenceClassification, GPT2Config
config = <span class="hljs-built_in">GPT2Config</span>()
model = <span class="hljs-built_in">GPT2ForSequenceClassification</span>(config)
model.<span class="hljs-built_in">eval</span>()`,wrap:!1}}),J=new Ce({props:{warning:!0,$$slots:{default:[Gl]},$$scope:{ctx:T}}}),we=new $({props:{code:"aW5wdXQlMjAlM0QlMjB0b3JjaC5yYW5kaW50KCUwQSUyMCUyMCUyMCUyMGxvdyUzRDAlMkMlMEElMjAlMjAlMjAlMjBoaWdoJTNEY29uZmlnLnZvY2FiX3NpemUlMkMlMEElMjAlMjAlMjAlMjBzaXplJTNEKDIlMkMlMjAxMDI0KSUyQyUyMCUyMCUyMyUyMGJzJTIweCUyMHNlcV9sZW4lMEElMjAlMjAlMjAlMjBkZXZpY2UlM0QlMjJjcHUlMjIlMkMlMEElMjAlMjAlMjAlMjBkdHlwZSUzRHRvcmNoLmludDY0JTJDJTBBJTIwJTIwJTIwJTIwcmVxdWlyZXNfZ3JhZCUzREZhbHNlJTJDJTBBKQ==",highlighted:`input = torch.randint(
<span class="hljs-attribute">low</span>=0,
<span class="hljs-attribute">high</span>=config.vocab_size,
size=(2, 1024), # bs x seq_len
<span class="hljs-attribute">device</span>=<span class="hljs-string">&quot;cpu&quot;</span>,
<span class="hljs-attribute">dtype</span>=torch.int64,
<span class="hljs-attribute">requires_grad</span>=<span class="hljs-literal">False</span>,
)`,wrap:!1}}),be=new $({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUuaW5mZXJlbmNlJTIwaW1wb3J0JTIwcHJlcGFyZV9waXBweSUwQWV4YW1wbGVfaW5wdXRzJTIwJTNEJTIwJTdCJTIyaW5wdXRfaWRzJTIyJTNBJTIwaW5wdXQlN0QlMEFtb2RlbCUyMCUzRCUyMHByZXBhcmVfcGlwcHkobW9kZWwlMkMlMjBleGFtcGxlX2FyZ3MlM0QoaW5wdXQlMkMpKQ==",highlighted:`<span class="hljs-keyword">from</span> accelerate.inference <span class="hljs-keyword">import</span> prepare_pippy
example_inputs = {&quot;input_ids&quot;: <span class="hljs-keyword">input</span>}
model = prepare_pippy(model, example_args=(<span class="hljs-keyword">input</span>,))`,wrap:!1}}),U=new Ce({props:{$$slots:{default:[Wl]},$$scope:{ctx:T}}}),C=new Ce({props:{warning:!0,$$slots:{default:[Hl]},$$scope:{ctx:T}}}),Te=new $({props:{code:"YXJncyUyMCUzRCUyMHNvbWVfbW9yZV9hcmd1bWVudHMlMEF3aXRoJTIwdG9yY2gubm9fZ3JhZCgpJTNBJTBBJTIwJTIwJTIwJTIwb3V0cHV0JTIwJTNEJTIwbW9kZWwoKmFyZ3Mp",highlighted:`<span class="hljs-variable">args</span> = <span class="hljs-variable">some_more_arguments</span>
<span class="hljs-variable">with</span> <span class="hljs-variable">torch.no_grad</span>():
<span class="hljs-variable">output</span> = <span class="hljs-function"><span class="hljs-title">model</span>(*<span class="hljs-variable">args</span>)</span>`,wrap:!1}}),ve=new $({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBQYXJ0aWFsU3RhdGUlMEFpZiUyMFBhcnRpYWxTdGF0ZSgpLmlzX2xhc3RfcHJvY2VzcyUzQSUwQSUyMCUyMCUyMCUyMHByaW50KG91dHB1dCk=",highlighted:`<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> PartialState
<span class="hljs-keyword">if</span> PartialState().is_last_process:
<span class="hljs-built_in">print</span>(output)`,wrap:!1}}),k=new Ce({props:{$$slots:{default:[Xl]},$$scope:{ctx:T}}}),Je=new Zl({props:{source:"https://github.com/huggingface/accelerate/blob/main/docs/source/usage_guides/distributed_inference.md"}}),{c(){i=p("meta"),w=a(),c=p("p"),M=a(),m(v.$$.fragment),b=a(),m(j.$$.fragment),ke=a(),x=p("p"),x.textContent=Wt,xe=a(),_=p("ol"),_.innerHTML=Ht,_e=a(),Z=p("p"),Z.textContent=Xt,Ze=a(),m(B.$$.fragment),Be=a(),G=p("p"),G.textContent=It,Ge=a(),W=p("p"),W.textContent=Nt,We=a(),H=p("p"),H.innerHTML=Pt,He=a(),m(X.$$.fragment),Xe=a(),I=p("p"),I.textContent=St,Ie=a(),m(N.$$.fragment),Ne=a(),P=p("p"),P.textContent=Rt,Pe=a(),S=p("p"),S.innerHTML=Yt,Se=a(),R=p("p"),R.textContent=zt,Re=a(),Y=p("p"),Y.innerHTML=Et,Ye=a(),z=p("p"),z.textContent=Ft,ze=a(),m(E.$$.fragment),Ee=a(),F=p("p"),F.textContent=Lt,Fe=a(),L=p("p"),L.innerHTML=Qt,Le=a(),m(Q.$$.fragment),Qe=a(),A=p("p"),A.textContent=At,Ae=a(),m(V.$$.fragment),Ve=a(),q=p("p"),q.textContent=Vt,qe=a(),D=p("blockquote"),D.innerHTML=qt,De=a(),m(K.$$.fragment),Ke=a(),O=p("p"),O.textContent=Dt,Oe=a(),ee=p("p"),ee.textContent=Kt,et=a(),te=p("p"),te.textContent=Ot,tt=a(),le=p("p"),le.innerHTML=el,lt=a(),m(g.$$.fragment),st=a(),se=p("p"),se.textContent=tl,at=a(),m(ae.$$.fragment),nt=a(),ne=p("p"),ne.innerHTML=ll,it=a(),ie=p("p"),ie.innerHTML=sl,pt=a(),m(pe.$$.fragment),ot=a(),oe=p("p"),oe.innerHTML=al,rt=a(),re=p("p"),re.innerHTML=nl,ct=a(),ce=p("p"),ce.innerHTML=il,mt=a(),me=p("p"),me.innerHTML=pl,ut=a(),ue=p("p"),ue.textContent=ol,ft=a(),m(fe.$$.fragment),dt=a(),de=p("p"),de.textContent=rl,ht=a(),m(he.$$.fragment),yt=a(),ye=p("p"),ye.innerHTML=cl,wt=a(),m(J.$$.fragment),Mt=a(),m(we.$$.fragment),bt=a(),Me=p("p"),Me.innerHTML=ml,$t=a(),m(be.$$.fragment),Tt=a(),m(U.$$.fragment),jt=a(),$e=p("p"),$e.textContent=ul,vt=a(),m(C.$$.fragment),gt=a(),m(Te.$$.fragment),Jt=a(),je=p("p"),je.textContent=fl,Ut=a(),m(ve.$$.fragment),Ct=a(),m(k.$$.fragment),kt=a(),ge=p("p"),ge.innerHTML=dl,xt=a(),m(Je.$$.fragment),_t=a(),Ue=p("p"),this.h()},l(e){const t=Ul("svelte-u9bgzb",document.head);i=o(t,"META",{name:!0,content:!0}),t.forEach(l),w=n(e),c=o(e,"P",{}),$l(c).forEach(l),M=n(e),u(v.$$.fragment,e),b=n(e),u(j.$$.fragment,e),ke=n(e),x=o(e,"P",{"data-svelte-h":!0}),r(x)!=="svelte-pzys4w"&&(x.textContent=Wt),xe=n(e),_=o(e,"OL",{"data-svelte-h":!0}),r(_)!=="svelte-auhvyh"&&(_.innerHTML=Ht),_e=n(e),Z=o(e,"P",{"data-svelte-h":!0}),r(Z)!=="svelte-105ktyk"&&(Z.textContent=Xt),Ze=n(e),u(B.$$.fragment,e),Be=n(e),G=o(e,"P",{"data-svelte-h":!0}),r(G)!=="svelte-1ojc10y"&&(G.textContent=It),Ge=n(e),W=o(e,"P",{"data-svelte-h":!0}),r(W)!=="svelte-1qi053h"&&(W.textContent=Nt),We=n(e),H=o(e,"P",{"data-svelte-h":!0}),r(H)!=="svelte-rhvic4"&&(H.innerHTML=Pt),He=n(e),u(X.$$.fragment,e),Xe=n(e),I=o(e,"P",{"data-svelte-h":!0}),r(I)!=="svelte-1dzspg1"&&(I.textContent=St),Ie=n(e),u(N.$$.fragment,e),Ne=n(e),P=o(e,"P",{"data-svelte-h":!0}),r(P)!=="svelte-1ov43u6"&&(P.textContent=Rt),Pe=n(e),S=o(e,"P",{"data-svelte-h":!0}),r(S)!=="svelte-1n5a8vc"&&(S.innerHTML=Yt),Se=n(e),R=o(e,"P",{"data-svelte-h":!0}),r(R)!=="svelte-12rt8sz"&&(R.textContent=zt),Re=n(e),Y=o(e,"P",{"data-svelte-h":!0}),r(Y)!=="svelte-gw34xi"&&(Y.innerHTML=Et),Ye=n(e),z=o(e,"P",{"data-svelte-h":!0}),r(z)!=="svelte-8uk323"&&(z.textContent=Ft),ze=n(e),u(E.$$.fragment,e),Ee=n(e),F=o(e,"P",{"data-svelte-h":!0}),r(F)!=="svelte-k1tgc3"&&(F.textContent=Lt),Fe=n(e),L=o(e,"P",{"data-svelte-h":!0}),r(L)!=="svelte-glszdf"&&(L.innerHTML=Qt),Le=n(e),u(Q.$$.fragment,e),Qe=n(e),A=o(e,"P",{"data-svelte-h":!0}),r(A)!=="svelte-1beq8se"&&(A.textContent=At),Ae=n(e),u(V.$$.fragment,e),Ve=n(e),q=o(e,"P",{"data-svelte-h":!0}),r(q)!=="svelte-52p69u"&&(q.textContent=Vt),qe=n(e),D=o(e,"BLOCKQUOTE",{"data-svelte-h":!0}),r(D)!=="svelte-ix7ij8"&&(D.innerHTML=qt),De=n(e),u(K.$$.fragment,e),Ke=n(e),O=o(e,"P",{"data-svelte-h":!0}),r(O)!=="svelte-ui4crb"&&(O.textContent=Dt),Oe=n(e),ee=o(e,"P",{"data-svelte-h":!0}),r(ee)!=="svelte-dyqt5a"&&(ee.textContent=Kt),et=n(e),te=o(e,"P",{"data-svelte-h":!0}),r(te)!=="svelte-1o56krx"&&(te.textContent=Ot),tt=n(e),le=o(e,"P",{"data-svelte-h":!0}),r(le)!=="svelte-1y9mak3"&&(le.innerHTML=el),lt=n(e),u(g.$$.fragment,e),st=n(e),se=o(e,"P",{"data-svelte-h":!0}),r(se)!=="svelte-4vay6o"&&(se.textContent=tl),at=n(e),u(ae.$$.fragment,e),nt=n(e),ne=o(e,"P",{"data-svelte-h":!0}),r(ne)!=="svelte-42h08l"&&(ne.innerHTML=ll),it=n(e),ie=o(e,"P",{"data-svelte-h":!0}),r(ie)!=="svelte-13go2fo"&&(ie.innerHTML=sl),pt=n(e),u(pe.$$.fragment,e),ot=n(e),oe=o(e,"P",{"data-svelte-h":!0}),r(oe)!=="svelte-ynzvto"&&(oe.innerHTML=al),rt=n(e),re=o(e,"P",{"data-svelte-h":!0}),r(re)!=="svelte-1l3sfxh"&&(re.innerHTML=nl),ct=n(e),ce=o(e,"P",{"data-svelte-h":!0}),r(ce)!=="svelte-l2zgqj"&&(ce.innerHTML=il),mt=n(e),me=o(e,"P",{"data-svelte-h":!0}),r(me)!=="svelte-heqe2k"&&(me.innerHTML=pl),ut=n(e),ue=o(e,"P",{"data-svelte-h":!0}),r(ue)!=="svelte-1pr52sj"&&(ue.textContent=ol),ft=n(e),u(fe.$$.fragment,e),dt=n(e),de=o(e,"P",{"data-svelte-h":!0}),r(de)!=="svelte-1mviugk"&&(de.textContent=rl),ht=n(e),u(he.$$.fragment,e),yt=n(e),ye=o(e,"P",{"data-svelte-h":!0}),r(ye)!=="svelte-15xaqzv"&&(ye.innerHTML=cl),wt=n(e),u(J.$$.fragment,e),Mt=n(e),u(we.$$.fragment,e),bt=n(e),Me=o(e,"P",{"data-svelte-h":!0}),r(Me)!=="svelte-11mjb3e"&&(Me.innerHTML=ml),$t=n(e),u(be.$$.fragment,e),Tt=n(e),u(U.$$.fragment,e),jt=n(e),$e=o(e,"P",{"data-svelte-h":!0}),r($e)!=="svelte-t45mca"&&($e.textContent=ul),vt=n(e),u(C.$$.fragment,e),gt=n(e),u(Te.$$.fragment,e),Jt=n(e),je=o(e,"P",{"data-svelte-h":!0}),r(je)!=="svelte-1f6tfb4"&&(je.textContent=fl),Ut=n(e),u(ve.$$.fragment,e),Ct=n(e),u(k.$$.fragment,e),kt=n(e),ge=o(e,"P",{"data-svelte-h":!0}),r(ge)!=="svelte-bz2a3m"&&(ge.innerHTML=dl),xt=n(e),u(Je.$$.fragment,e),_t=n(e),Ue=o(e,"P",{}),$l(Ue).forEach(l),this.h()},h(){Tl(i,"name","hf:doc:metadata"),Tl(i,"content",Nl)},m(e,t){Cl(document.head,i),s(e,w,t),s(e,c,t),s(e,M,t),f(v,e,t),s(e,b,t),f(j,e,t),s(e,ke,t),s(e,x,t),s(e,xe,t),s(e,_,t),s(e,_e,t),s(e,Z,t),s(e,Ze,t),f(B,e,t),s(e,Be,t),s(e,G,t),s(e,Ge,t),s(e,W,t),s(e,We,t),s(e,H,t),s(e,He,t),f(X,e,t),s(e,Xe,t),s(e,I,t),s(e,Ie,t),f(N,e,t),s(e,Ne,t),s(e,P,t),s(e,Pe,t),s(e,S,t),s(e,Se,t),s(e,R,t),s(e,Re,t),s(e,Y,t),s(e,Ye,t),s(e,z,t),s(e,ze,t),f(E,e,t),s(e,Ee,t),s(e,F,t),s(e,Fe,t),s(e,L,t),s(e,Le,t),f(Q,e,t),s(e,Qe,t),s(e,A,t),s(e,Ae,t),f(V,e,t),s(e,Ve,t),s(e,q,t),s(e,qe,t),s(e,D,t),s(e,De,t),f(K,e,t),s(e,Ke,t),s(e,O,t),s(e,Oe,t),s(e,ee,t),s(e,et,t),s(e,te,t),s(e,tt,t),s(e,le,t),s(e,lt,t),f(g,e,t),s(e,st,t),s(e,se,t),s(e,at,t),f(ae,e,t),s(e,nt,t),s(e,ne,t),s(e,it,t),s(e,ie,t),s(e,pt,t),f(pe,e,t),s(e,ot,t),s(e,oe,t),s(e,rt,t),s(e,re,t),s(e,ct,t),s(e,ce,t),s(e,mt,t),s(e,me,t),s(e,ut,t),s(e,ue,t),s(e,ft,t),f(fe,e,t),s(e,dt,t),s(e,de,t),s(e,ht,t),f(he,e,t),s(e,yt,t),s(e,ye,t),s(e,wt,t),f(J,e,t),s(e,Mt,t),f(we,e,t),s(e,bt,t),s(e,Me,t),s(e,$t,t),f(be,e,t),s(e,Tt,t),f(U,e,t),s(e,jt,t),s(e,$e,t),s(e,vt,t),f(C,e,t),s(e,gt,t),f(Te,e,t),s(e,Jt,t),s(e,je,t),s(e,Ut,t),f(ve,e,t),s(e,Ct,t),f(k,e,t),s(e,kt,t),s(e,ge,t),s(e,xt,t),f(Je,e,t),s(e,_t,t),s(e,Ue,t),Zt=!0},p(e,[t]){const hl={};t&2&&(hl.$$scope={dirty:t,ctx:e}),g.$set(hl);const yl={};t&2&&(yl.$$scope={dirty:t,ctx:e}),J.$set(yl);const wl={};t&2&&(wl.$$scope={dirty:t,ctx:e}),U.$set(wl);const Ml={};t&2&&(Ml.$$scope={dirty:t,ctx:e}),C.$set(Ml);const bl={};t&2&&(bl.$$scope={dirty:t,ctx:e}),k.$set(bl)},i(e){Zt||(d(v.$$.fragment,e),d(j.$$.fragment,e),d(B.$$.fragment,e),d(X.$$.fragment,e),d(N.$$.fragment,e),d(E.$$.fragment,e),d(Q.$$.fragment,e),d(V.$$.fragment,e),d(K.$$.fragment,e),d(g.$$.fragment,e),d(ae.$$.fragment,e),d(pe.$$.fragment,e),d(fe.$$.fragment,e),d(he.$$.fragment,e),d(J.$$.fragment,e),d(we.$$.fragment,e),d(be.$$.fragment,e),d(U.$$.fragment,e),d(C.$$.fragment,e),d(Te.$$.fragment,e),d(ve.$$.fragment,e),d(k.$$.fragment,e),d(Je.$$.fragment,e),Zt=!0)},o(e){h(v.$$.fragment,e),h(j.$$.fragment,e),h(B.$$.fragment,e),h(X.$$.fragment,e),h(N.$$.fragment,e),h(E.$$.fragment,e),h(Q.$$.fragment,e),h(V.$$.fragment,e),h(K.$$.fragment,e),h(g.$$.fragment,e),h(ae.$$.fragment,e),h(pe.$$.fragment,e),h(fe.$$.fragment,e),h(he.$$.fragment,e),h(J.$$.fragment,e),h(we.$$.fragment,e),h(be.$$.fragment,e),h(U.$$.fragment,e),h(C.$$.fragment,e),h(Te.$$.fragment,e),h(ve.$$.fragment,e),h(k.$$.fragment,e),h(Je.$$.fragment,e),Zt=!1},d(e){e&&(l(w),l(c),l(M),l(b),l(ke),l(x),l(xe),l(_),l(_e),l(Z),l(Ze),l(Be),l(G),l(Ge),l(W),l(We),l(H),l(He),l(Xe),l(I),l(Ie),l(Ne),l(P),l(Pe),l(S),l(Se),l(R),l(Re),l(Y),l(Ye),l(z),l(ze),l(Ee),l(F),l(Fe),l(L),l(Le),l(Qe),l(A),l(Ae),l(Ve),l(q),l(qe),l(D),l(De),l(Ke),l(O),l(Oe),l(ee),l(et),l(te),l(tt),l(le),l(lt),l(st),l(se),l(at),l(nt),l(ne),l(it),l(ie),l(pt),l(ot),l(oe),l(rt),l(re),l(ct),l(ce),l(mt),l(me),l(ut),l(ue),l(ft),l(dt),l(de),l(ht),l(yt),l(ye),l(wt),l(Mt),l(bt),l(Me),l($t),l(Tt),l(jt),l($e),l(vt),l(gt),l(Jt),l(je),l(Ut),l(Ct),l(kt),l(ge),l(xt),l(_t),l(Ue)),l(i),y(v,e),y(j,e),y(B,e),y(X,e),y(N,e),y(E,e),y(Q,e),y(V,e),y(K,e),y(g,e),y(ae,e),y(pe,e),y(fe,e),y(he,e),y(J,e),y(we,e),y(be,e),y(U,e),y(C,e),y(Te,e),y(ve,e),y(k,e),y(Je,e)}}}const Nl='{"title":"Distributed inference","local":"distributed-inference","sections":[{"title":"Sending chunks of a batch automatically to each loaded model","local":"sending-chunks-of-a-batch-automatically-to-each-loaded-model","sections":[],"depth":2},{"title":"Memory-efficient pipeline parallelism (experimental)","local":"memory-efficient-pipeline-parallelism-experimental","sections":[],"depth":2}],"depth":1}';function Pl(T){return vl(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Fl extends gl{constructor(i){super(),Jl(this,i,Pl,Il,jl,{})}}export{Fl as component};

Xet Storage Details

Size:
29.8 kB
·
Xet hash:
9ee872bbc44600c55f05cfe0ca4ec1d91ecddb36b28bb9233da988a052384e92

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.