Buckets:
| import{s as jl,o as vl,n as Bt}from"../chunks/scheduler.b9285784.js";import{S as gl,i as Jl,e as p,s as a,c as m,h as Ul,a as o,d as l,b as n,f as $l,g as u,j as r,k as Tl,l as Cl,m as s,n as f,t as d,o as h,p as y,q as kl,r as xl}from"../chunks/index.26bc89a1.js";import{T as Ce}from"../chunks/Tip.e4eba3d6.js";import{C as _l,H as Gt,E as Zl}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.7a0ae628.js";import{C as $}from"../chunks/CodeBlock.844ff9c3.js";function Bl(T){let i,w=`This is only needed when trying to perform an action such as gathering the results, where the data on each device | |
| needs to be the same length. Basic inference does not require this.`;return{c(){i=p("p"),i.textContent=w},l(c){i=o(c,"P",{"data-svelte-h":!0}),r(i)!=="svelte-rtowy2"&&(i.textContent=w)},m(c,M){s(c,i,M)},p:Bt,d(c){c&&l(i)}}}function Gl(T){let i;return{c(){i=kl(`However you make this example will determine the relative batch size that will be used/passed | |
| through the model at a given time, so make sure to remember how many items there are!`)},l(w){i=xl(w,`However you make this example will determine the relative batch size that will be used/passed | |
| through the model at a given time, so make sure to remember how many items there are!`)},m(w,c){s(w,i,c)},d(w){w&&l(i)}}}function Wl(T){let i,w="There are a variety of parameters you can pass through to <code>prepare_pippy</code>:",c,M,v="<li><p><code>split_points</code> lets you determine what layers to split the model at. By default we use wherever <code>device_map="auto" declares, such as </code>fc<code>or</code>conv1`.</p></li> <li><p><code>num_chunks</code> determines how the batch will be split and sent to the model itself (so <code>num_chunks=1</code> with four split points/four GPUs will have a naive MP where a single input gets passed between the four layer split points)</p></li>";return{c(){i=p("p"),i.innerHTML=w,c=a(),M=p("ul"),M.innerHTML=v},l(b){i=o(b,"P",{"data-svelte-h":!0}),r(i)!=="svelte-123k9fd"&&(i.innerHTML=w),c=n(b),M=o(b,"UL",{"data-svelte-h":!0}),r(M)!=="svelte-1bax0io"&&(M.innerHTML=v)},m(b,j){s(b,i,j),s(b,c,j),s(b,M,j)},p:Bt,d(b){b&&(l(i),l(c),l(M))}}}function Hl(T){let i,w="When passing inputs, we highly recommend to pass them in as a tuple of arguments. Using <code>kwargs</code> is supported, however, this approach is experimental.";return{c(){i=p("p"),i.innerHTML=w},l(c){i=o(c,"P",{"data-svelte-h":!0}),r(i)!=="svelte-1cjitly"&&(i.innerHTML=w)},m(c,M){s(c,i,M)},p:Bt,d(c){c&&l(i)}}}function Xl(T){let i,w=`If you pass in <code>gather_output=True</code> to <a href="/docs/accelerate/pr_4021/en/package_reference/inference#accelerate.prepare_pippy">inference.prepare_pippy()</a>, the output will be sent | |
| across to all the GPUs afterwards without needing the <code>is_last_process</code> check. This is | |
| <code>False</code> by default as it incurs a communication call.`;return{c(){i=p("p"),i.innerHTML=w},l(c){i=o(c,"P",{"data-svelte-h":!0}),r(i)!=="svelte-fqz35p"&&(i.innerHTML=w)},m(c,M){s(c,i,M)},p:Bt,d(c){c&&l(i)}}}function Il(T){let i,w,c,M,v,b,j,ke,x,Wt="Distributed inference can fall into three brackets:",xe,_,Ht="<li>Loading an entire model onto each GPU and sending chunks of a batch through each GPU’s model copy at a time</li> <li>Loading parts of a model onto each GPU and processing a single input at one time</li> <li>Loading parts of a model onto each GPU and using what is called scheduled Pipeline Parallelism to combine the two prior techniques.</li>",_e,Z,Xt="We’re going to go through the first and the last bracket, showcasing how to do each as they are more realistic scenarios.",Ze,B,Be,G,It="This is the most memory-intensive solution, as it requires each GPU to keep a full copy of the model in memory at a given time.",Ge,W,Nt="Normally when doing this, users send the model to a specific device to load it from the CPU, and then move each prompt to a different device.",We,H,Pt="A basic pipeline using the <code>diffusers</code> library might look something like so:",He,X,Xe,I,St="Followed then by performing inference based on the specific prompt:",Ie,N,Ne,P,Rt="One will notice how we have to check the rank to know what prompt to send, which can be a bit tedious.",Pe,S,Yt=`A user might then also think that with Accelerate, using the <code>Accelerator</code> to prepare a dataloader for such a task might also be | |
| a simple way to manage this. (To learn more, check out the relevant section in the <a href="../quicktour#distributed-evaluation">Quick Tour</a>)`,Se,R,zt="Can it manage it? Yes. Does it add unneeded extra code however: also yes.",Re,Y,Et=`With Accelerate, we can simplify this process by using the <a href="/docs/accelerate/pr_4021/en/package_reference/accelerator#accelerate.Accelerator.split_between_processes">Accelerator.split_between_processes()</a> context manager (which also exists in <code>PartialState</code> and <code>AcceleratorState</code>). | |
| This function will automatically split whatever data you pass to it (be it a prompt, a set of tensors, a dictionary of the prior data, etc.) across all the processes (with a potential | |
| to be padded) for you to use right away.`,Ye,z,Ft="Let’s rewrite the above example using this context manager:",ze,E,Ee,F,Lt="And then to launch the code, we can use the Accelerate:",Fe,L,Qt="If you have generated a config file to be used using <code>accelerate config</code>:",Le,Q,Qe,A,At="If you have a specific config file you want to use:",Ae,V,Ve,q,Vt="Or if don’t want to make any config files and launch on two GPUs:",qe,D,qt="<p>Note: You will get some warnings about values being guessed based on your system. To remove these you can do <code>accelerate config default</code> or go through <code>accelerate config</code> to create a config file.</p>",De,K,Ke,O,Dt="We’ve now reduced the boilerplate code needed to split this data to a few lines of code quite easily.",Oe,ee,Kt="But what if we have an odd distribution of prompts to GPUs? For example, what if we have 3 prompts, but only 2 GPUs?",et,te,Ot=`Under the context manager, the first GPU would receive the first two prompts and the second GPU the third, ensuring that | |
| all prompts are split and no overhead is needed.`,tt,le,el=`<em>However</em>, what if we then wanted to do something with the results of <em>all the GPUs</em>? (Say gather them all and perform some kind of post processing) | |
| You can pass in <code>apply_padding=True</code> to ensure that the lists of prompts are padded to the same length, with extra data being taken | |
| from the last sample. This way all GPUs will have the same number of prompts, and you can then gather the results.`,lt,g,st,se,tl="For instance:",at,ae,nt,ne,ll=`On the first GPU, the prompts will be <code>["a dog", "a cat"]</code>, and on the second GPU it will be <code>["a chicken", "a chicken"]</code>. | |
| Make sure to drop the final sample, as it will be a duplicate of the previous one.`,it,ie,sl='You can find more complex examples <a href="https://github.com/huggingface/accelerate/tree/main/examples/inference/distributed" rel="nofollow">here</a> such as how to use it with LLMs.',pt,pe,ot,oe,al='This next part will discuss using <em>pipeline parallelism</em>. This is an <strong>experimental</strong> API that utilizes <a href="https://pytorch.org/docs/stable/distributed.pipelining.html#" rel="nofollow">torch.distributed.pipelining</a> as a native solution.',rt,re,nl="The general idea with pipeline parallelism is: say you have 4 GPUs and a model big enough it can be <em>split</em> on four GPUs using <code>device_map="auto"</code>. With this method you can send in 4 inputs at a time (for example here, any amount works) and each model chunk will work on an input, then receive the next input once the prior chunk finished, making it <em>much</em> more efficient <strong>and faster</strong> than the method described earlier. Here’s a visual taken from the PyTorch repository:",ct,ce,il='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/accelerate/pipeline_parallel.png" alt="Pipeline parallelism example"/>',mt,me,pl='To illustrate how you can use this with Accelerate, we have created an <a href="https://github.com/huggingface/accelerate/tree/main/examples/inference" rel="nofollow">example zoo</a> showcasing a number of different models and situations. In this tutorial, we’ll show this method for GPT2 across two GPUs.',ut,ue,ol="Before you proceed, please make sure you have the latest PyTorch version installed by running the following:",ft,fe,dt,de,rl="Start by creating the model on the CPU:",ht,he,yt,ye,cl="Next you’ll need to create some example inputs to use. These help <code>torch.distributed.pipelining</code> trace the model.",wt,J,Mt,we,bt,Me,ml='Next we need to actually perform the tracing and get the model ready. To do so, use the <a href="/docs/accelerate/pr_4021/en/package_reference/inference#accelerate.prepare_pippy">inference.prepare_pippy()</a> function and it will fully wrap the model for pipeline parallelism automatically:',$t,be,Tt,U,jt,$e,ul="From here, all that’s left is to actually perform the distributed inference!",vt,C,gt,Te,Jt,je,fl="When finished all the data will be on the last process only:",Ut,ve,Ct,k,kt,ge,dl='And that’s it! To explore more, please check out the inference examples in the <a href="https://github.com/huggingface/accelerate/tree/main/examples/inference/pippy" rel="nofollow">Accelerate repo</a> and our <a href="../package_reference/inference">documentation</a> as we work to improving this integration.',xt,Je,_t,Ue,Zt;return v=new _l({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),j=new Gt({props:{title:"Distributed inference",local:"distributed-inference",headingTag:"h1"}}),B=new Gt({props:{title:"Sending chunks of a batch automatically to each loaded model",local:"sending-chunks-of-a-batch-automatically-to-each-loaded-model",headingTag:"h2"}}),X=new $({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjB0b3JjaC5kaXN0cmlidXRlZCUyMGFzJTIwZGlzdCUwQWZyb20lMjBkaWZmdXNlcnMlMjBpbXBvcnQlMjBEaWZmdXNpb25QaXBlbGluZSUwQSUwQXBpcGUlMjAlM0QlMjBEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTIycnVud2F5bWwlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> torch.distributed <span class="hljs-keyword">as</span> dist | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16)`,wrap:!1}}),N=new $({props:{code:"ZGVmJTIwcnVuX2luZmVyZW5jZShyYW5rJTJDJTIwd29ybGRfc2l6ZSklM0ElMEElMjAlMjAlMjAlMjBkaXN0LmluaXRfcHJvY2Vzc19ncm91cCglMjJuY2NsJTIyJTJDJTIwcmFuayUzRHJhbmslMkMlMjB3b3JsZF9zaXplJTNEd29ybGRfc2l6ZSklMEElMjAlMjAlMjAlMjBwaXBlLnRvKHJhbmspJTBBJTBBJTIwJTIwJTIwJTIwaWYlMjB0b3JjaC5kaXN0cmlidXRlZC5nZXRfcmFuaygpJTIwJTNEJTNEJTIwMCUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHByb21wdCUyMCUzRCUyMCUyMmElMjBkb2clMjIlMEElMjAlMjAlMjAlMjBlbGlmJTIwdG9yY2guZGlzdHJpYnV0ZWQuZ2V0X3JhbmsoKSUyMCUzRCUzRCUyMDElM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHQlMjAlM0QlMjAlMjJhJTIwY2F0JTIyJTBBJTBBJTIwJTIwJTIwJTIwcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQpLmltYWdlcyU1QjAlNUQlMEElMjAlMjAlMjAlMjByZXN1bHQuc2F2ZShmJTIycmVzdWx0XyU3QnJhbmslN0QucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">run_inference</span>(<span class="hljs-params">rank, world_size</span>): | |
| dist.init_process_group(<span class="hljs-string">"nccl"</span>, rank=rank, world_size=world_size) | |
| pipe.to(rank) | |
| <span class="hljs-keyword">if</span> torch.distributed.get_rank() == <span class="hljs-number">0</span>: | |
| prompt = <span class="hljs-string">"a dog"</span> | |
| <span class="hljs-keyword">elif</span> torch.distributed.get_rank() == <span class="hljs-number">1</span>: | |
| prompt = <span class="hljs-string">"a cat"</span> | |
| result = pipe(prompt).images[<span class="hljs-number">0</span>] | |
| result.save(<span class="hljs-string">f"result_<span class="hljs-subst">{rank}</span>.png"</span>)`,wrap:!1}}),E=new $({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMFBhcnRpYWxTdGF0ZSUyMCUyMCUyMyUyMENhbiUyMGFsc28lMjBiZSUyMEFjY2VsZXJhdG9yJTIwb3IlMjBBY2NlbGVyYXRvclN0YXRlJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMERpZmZ1c2lvblBpcGVsaW5lJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJydW53YXltbCUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiklMEFkaXN0cmlidXRlZF9zdGF0ZSUyMCUzRCUyMFBhcnRpYWxTdGF0ZSgpJTBBcGlwZS50byhkaXN0cmlidXRlZF9zdGF0ZS5kZXZpY2UpJTBBJTBBJTIzJTIwQXNzdW1lJTIwdHdvJTIwcHJvY2Vzc2VzJTBBd2l0aCUyMGRpc3RyaWJ1dGVkX3N0YXRlLnNwbGl0X2JldHdlZW5fcHJvY2Vzc2VzKCU1QiUyMmElMjBkb2clMjIlMkMlMjAlMjJhJTIwY2F0JTIyJTVEKSUyMGFzJTIwcHJvbXB0JTNBJTBBJTIwJTIwJTIwJTIwcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQpLmltYWdlcyU1QjAlNUQlMEElMjAlMjAlMjAlMjByZXN1bHQuc2F2ZShmJTIycmVzdWx0XyU3QmRpc3RyaWJ1dGVkX3N0YXRlLnByb2Nlc3NfaW5kZXglN0QucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> PartialState <span class="hljs-comment"># Can also be Accelerator or AcceleratorState</span> | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16) | |
| distributed_state = PartialState() | |
| pipe.to(distributed_state.device) | |
| <span class="hljs-comment"># Assume two processes</span> | |
| <span class="hljs-keyword">with</span> distributed_state.split_between_processes([<span class="hljs-string">"a dog"</span>, <span class="hljs-string">"a cat"</span>]) <span class="hljs-keyword">as</span> prompt: | |
| result = pipe(prompt).images[<span class="hljs-number">0</span>] | |
| result.save(<span class="hljs-string">f"result_<span class="hljs-subst">{distributed_state.process_index}</span>.png"</span>)`,wrap:!1}}),Q=new $({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMGRpc3RyaWJ1dGVkX2luZmVyZW5jZS5weQ==",highlighted:"accelerate launch distributed_inference.py",wrap:!1}}),V=new $({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMC0tY29uZmlnX2ZpbGUlMjBteV9jb25maWcuanNvbiUyMGRpc3RyaWJ1dGVkX2luZmVyZW5jZS5weQ==",highlighted:"accelerate launch --config_file my_config.json distributed_inference.py",wrap:!1}}),K=new $({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMC0tbnVtX3Byb2Nlc3NlcyUyMDIlMjBkaXN0cmlidXRlZF9pbmZlcmVuY2UucHk=",highlighted:"accelerate launch --num_processes 2 distributed_inference.py",wrap:!1}}),g=new Ce({props:{$$slots:{default:[Bl]},$$scope:{ctx:T}}}),ae=new $({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMFBhcnRpYWxTdGF0ZSUyMCUyMCUyMyUyMENhbiUyMGFsc28lMjBiZSUyMEFjY2VsZXJhdG9yJTIwb3IlMjBBY2NlbGVyYXRvclN0YXRlJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMERpZmZ1c2lvblBpcGVsaW5lJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJydW53YXltbCUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiklMEFkaXN0cmlidXRlZF9zdGF0ZSUyMCUzRCUyMFBhcnRpYWxTdGF0ZSgpJTBBcGlwZS50byhkaXN0cmlidXRlZF9zdGF0ZS5kZXZpY2UpJTBBJTBBJTIzJTIwQXNzdW1lJTIwdHdvJTIwcHJvY2Vzc2VzJTBBd2l0aCUyMGRpc3RyaWJ1dGVkX3N0YXRlLnNwbGl0X2JldHdlZW5fcHJvY2Vzc2VzKCU1QiUyMmElMjBkb2clMjIlMkMlMjAlMjJhJTIwY2F0JTIyJTJDJTIwJTIyYSUyMGNoaWNrZW4lMjIlNUQlMkMlMjBhcHBseV9wYWRkaW5nJTNEVHJ1ZSklMjBhcyUyMHByb21wdCUzQSUwQSUyMCUyMCUyMCUyMHJlc3VsdCUyMCUzRCUyMHBpcGUocHJvbXB0KS5pbWFnZXM=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> PartialState <span class="hljs-comment"># Can also be Accelerator or AcceleratorState</span> | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16) | |
| distributed_state = PartialState() | |
| pipe.to(distributed_state.device) | |
| <span class="hljs-comment"># Assume two processes</span> | |
| <span class="hljs-keyword">with</span> distributed_state.split_between_processes([<span class="hljs-string">"a dog"</span>, <span class="hljs-string">"a cat"</span>, <span class="hljs-string">"a chicken"</span>], apply_padding=<span class="hljs-literal">True</span>) <span class="hljs-keyword">as</span> prompt: | |
| result = pipe(prompt).images`,wrap:!1}}),pe=new Gt({props:{title:"Memory-efficient pipeline parallelism (experimental)",local:"memory-efficient-pipeline-parallelism-experimental",headingTag:"h2"}}),fe=new $({props:{code:"cGlwJTIwaW5zdGFsbCUyMHRvcmNo",highlighted:"pip install torch",wrap:!1}}),he=new $({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEdQVDJGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uJTJDJTIwR1BUMkNvbmZpZyUwQSUwQWNvbmZpZyUyMCUzRCUyMEdQVDJDb25maWcoKSUwQW1vZGVsJTIwJTNEJTIwR1BUMkZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24oY29uZmlnKSUwQW1vZGVsLmV2YWwoKQ==",highlighted:`from transformers <span class="hljs-keyword">import</span> GPT2ForSequenceClassification, GPT2Config | |
| config = <span class="hljs-built_in">GPT2Config</span>() | |
| model = <span class="hljs-built_in">GPT2ForSequenceClassification</span>(config) | |
| model.<span class="hljs-built_in">eval</span>()`,wrap:!1}}),J=new Ce({props:{warning:!0,$$slots:{default:[Gl]},$$scope:{ctx:T}}}),we=new $({props:{code:"aW5wdXQlMjAlM0QlMjB0b3JjaC5yYW5kaW50KCUwQSUyMCUyMCUyMCUyMGxvdyUzRDAlMkMlMEElMjAlMjAlMjAlMjBoaWdoJTNEY29uZmlnLnZvY2FiX3NpemUlMkMlMEElMjAlMjAlMjAlMjBzaXplJTNEKDIlMkMlMjAxMDI0KSUyQyUyMCUyMCUyMyUyMGJzJTIweCUyMHNlcV9sZW4lMEElMjAlMjAlMjAlMjBkZXZpY2UlM0QlMjJjcHUlMjIlMkMlMEElMjAlMjAlMjAlMjBkdHlwZSUzRHRvcmNoLmludDY0JTJDJTBBJTIwJTIwJTIwJTIwcmVxdWlyZXNfZ3JhZCUzREZhbHNlJTJDJTBBKQ==",highlighted:`input = torch.randint( | |
| <span class="hljs-attribute">low</span>=0, | |
| <span class="hljs-attribute">high</span>=config.vocab_size, | |
| size=(2, 1024), # bs x seq_len | |
| <span class="hljs-attribute">device</span>=<span class="hljs-string">"cpu"</span>, | |
| <span class="hljs-attribute">dtype</span>=torch.int64, | |
| <span class="hljs-attribute">requires_grad</span>=<span class="hljs-literal">False</span>, | |
| )`,wrap:!1}}),be=new $({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUuaW5mZXJlbmNlJTIwaW1wb3J0JTIwcHJlcGFyZV9waXBweSUwQWV4YW1wbGVfaW5wdXRzJTIwJTNEJTIwJTdCJTIyaW5wdXRfaWRzJTIyJTNBJTIwaW5wdXQlN0QlMEFtb2RlbCUyMCUzRCUyMHByZXBhcmVfcGlwcHkobW9kZWwlMkMlMjBleGFtcGxlX2FyZ3MlM0QoaW5wdXQlMkMpKQ==",highlighted:`<span class="hljs-keyword">from</span> accelerate.inference <span class="hljs-keyword">import</span> prepare_pippy | |
| example_inputs = {"input_ids": <span class="hljs-keyword">input</span>} | |
| model = prepare_pippy(model, example_args=(<span class="hljs-keyword">input</span>,))`,wrap:!1}}),U=new Ce({props:{$$slots:{default:[Wl]},$$scope:{ctx:T}}}),C=new Ce({props:{warning:!0,$$slots:{default:[Hl]},$$scope:{ctx:T}}}),Te=new $({props:{code:"YXJncyUyMCUzRCUyMHNvbWVfbW9yZV9hcmd1bWVudHMlMEF3aXRoJTIwdG9yY2gubm9fZ3JhZCgpJTNBJTBBJTIwJTIwJTIwJTIwb3V0cHV0JTIwJTNEJTIwbW9kZWwoKmFyZ3Mp",highlighted:`<span class="hljs-variable">args</span> = <span class="hljs-variable">some_more_arguments</span> | |
| <span class="hljs-variable">with</span> <span class="hljs-variable">torch.no_grad</span>(): | |
| <span class="hljs-variable">output</span> = <span class="hljs-function"><span class="hljs-title">model</span>(*<span class="hljs-variable">args</span>)</span>`,wrap:!1}}),ve=new $({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBQYXJ0aWFsU3RhdGUlMEFpZiUyMFBhcnRpYWxTdGF0ZSgpLmlzX2xhc3RfcHJvY2VzcyUzQSUwQSUyMCUyMCUyMCUyMHByaW50KG91dHB1dCk=",highlighted:`<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> PartialState | |
| <span class="hljs-keyword">if</span> PartialState().is_last_process: | |
| <span class="hljs-built_in">print</span>(output)`,wrap:!1}}),k=new Ce({props:{$$slots:{default:[Xl]},$$scope:{ctx:T}}}),Je=new Zl({props:{source:"https://github.com/huggingface/accelerate/blob/main/docs/source/usage_guides/distributed_inference.md"}}),{c(){i=p("meta"),w=a(),c=p("p"),M=a(),m(v.$$.fragment),b=a(),m(j.$$.fragment),ke=a(),x=p("p"),x.textContent=Wt,xe=a(),_=p("ol"),_.innerHTML=Ht,_e=a(),Z=p("p"),Z.textContent=Xt,Ze=a(),m(B.$$.fragment),Be=a(),G=p("p"),G.textContent=It,Ge=a(),W=p("p"),W.textContent=Nt,We=a(),H=p("p"),H.innerHTML=Pt,He=a(),m(X.$$.fragment),Xe=a(),I=p("p"),I.textContent=St,Ie=a(),m(N.$$.fragment),Ne=a(),P=p("p"),P.textContent=Rt,Pe=a(),S=p("p"),S.innerHTML=Yt,Se=a(),R=p("p"),R.textContent=zt,Re=a(),Y=p("p"),Y.innerHTML=Et,Ye=a(),z=p("p"),z.textContent=Ft,ze=a(),m(E.$$.fragment),Ee=a(),F=p("p"),F.textContent=Lt,Fe=a(),L=p("p"),L.innerHTML=Qt,Le=a(),m(Q.$$.fragment),Qe=a(),A=p("p"),A.textContent=At,Ae=a(),m(V.$$.fragment),Ve=a(),q=p("p"),q.textContent=Vt,qe=a(),D=p("blockquote"),D.innerHTML=qt,De=a(),m(K.$$.fragment),Ke=a(),O=p("p"),O.textContent=Dt,Oe=a(),ee=p("p"),ee.textContent=Kt,et=a(),te=p("p"),te.textContent=Ot,tt=a(),le=p("p"),le.innerHTML=el,lt=a(),m(g.$$.fragment),st=a(),se=p("p"),se.textContent=tl,at=a(),m(ae.$$.fragment),nt=a(),ne=p("p"),ne.innerHTML=ll,it=a(),ie=p("p"),ie.innerHTML=sl,pt=a(),m(pe.$$.fragment),ot=a(),oe=p("p"),oe.innerHTML=al,rt=a(),re=p("p"),re.innerHTML=nl,ct=a(),ce=p("p"),ce.innerHTML=il,mt=a(),me=p("p"),me.innerHTML=pl,ut=a(),ue=p("p"),ue.textContent=ol,ft=a(),m(fe.$$.fragment),dt=a(),de=p("p"),de.textContent=rl,ht=a(),m(he.$$.fragment),yt=a(),ye=p("p"),ye.innerHTML=cl,wt=a(),m(J.$$.fragment),Mt=a(),m(we.$$.fragment),bt=a(),Me=p("p"),Me.innerHTML=ml,$t=a(),m(be.$$.fragment),Tt=a(),m(U.$$.fragment),jt=a(),$e=p("p"),$e.textContent=ul,vt=a(),m(C.$$.fragment),gt=a(),m(Te.$$.fragment),Jt=a(),je=p("p"),je.textContent=fl,Ut=a(),m(ve.$$.fragment),Ct=a(),m(k.$$.fragment),kt=a(),ge=p("p"),ge.innerHTML=dl,xt=a(),m(Je.$$.fragment),_t=a(),Ue=p("p"),this.h()},l(e){const t=Ul("svelte-u9bgzb",document.head);i=o(t,"META",{name:!0,content:!0}),t.forEach(l),w=n(e),c=o(e,"P",{}),$l(c).forEach(l),M=n(e),u(v.$$.fragment,e),b=n(e),u(j.$$.fragment,e),ke=n(e),x=o(e,"P",{"data-svelte-h":!0}),r(x)!=="svelte-pzys4w"&&(x.textContent=Wt),xe=n(e),_=o(e,"OL",{"data-svelte-h":!0}),r(_)!=="svelte-auhvyh"&&(_.innerHTML=Ht),_e=n(e),Z=o(e,"P",{"data-svelte-h":!0}),r(Z)!=="svelte-105ktyk"&&(Z.textContent=Xt),Ze=n(e),u(B.$$.fragment,e),Be=n(e),G=o(e,"P",{"data-svelte-h":!0}),r(G)!=="svelte-1ojc10y"&&(G.textContent=It),Ge=n(e),W=o(e,"P",{"data-svelte-h":!0}),r(W)!=="svelte-1qi053h"&&(W.textContent=Nt),We=n(e),H=o(e,"P",{"data-svelte-h":!0}),r(H)!=="svelte-rhvic4"&&(H.innerHTML=Pt),He=n(e),u(X.$$.fragment,e),Xe=n(e),I=o(e,"P",{"data-svelte-h":!0}),r(I)!=="svelte-1dzspg1"&&(I.textContent=St),Ie=n(e),u(N.$$.fragment,e),Ne=n(e),P=o(e,"P",{"data-svelte-h":!0}),r(P)!=="svelte-1ov43u6"&&(P.textContent=Rt),Pe=n(e),S=o(e,"P",{"data-svelte-h":!0}),r(S)!=="svelte-1n5a8vc"&&(S.innerHTML=Yt),Se=n(e),R=o(e,"P",{"data-svelte-h":!0}),r(R)!=="svelte-12rt8sz"&&(R.textContent=zt),Re=n(e),Y=o(e,"P",{"data-svelte-h":!0}),r(Y)!=="svelte-gw34xi"&&(Y.innerHTML=Et),Ye=n(e),z=o(e,"P",{"data-svelte-h":!0}),r(z)!=="svelte-8uk323"&&(z.textContent=Ft),ze=n(e),u(E.$$.fragment,e),Ee=n(e),F=o(e,"P",{"data-svelte-h":!0}),r(F)!=="svelte-k1tgc3"&&(F.textContent=Lt),Fe=n(e),L=o(e,"P",{"data-svelte-h":!0}),r(L)!=="svelte-glszdf"&&(L.innerHTML=Qt),Le=n(e),u(Q.$$.fragment,e),Qe=n(e),A=o(e,"P",{"data-svelte-h":!0}),r(A)!=="svelte-1beq8se"&&(A.textContent=At),Ae=n(e),u(V.$$.fragment,e),Ve=n(e),q=o(e,"P",{"data-svelte-h":!0}),r(q)!=="svelte-52p69u"&&(q.textContent=Vt),qe=n(e),D=o(e,"BLOCKQUOTE",{"data-svelte-h":!0}),r(D)!=="svelte-ix7ij8"&&(D.innerHTML=qt),De=n(e),u(K.$$.fragment,e),Ke=n(e),O=o(e,"P",{"data-svelte-h":!0}),r(O)!=="svelte-ui4crb"&&(O.textContent=Dt),Oe=n(e),ee=o(e,"P",{"data-svelte-h":!0}),r(ee)!=="svelte-dyqt5a"&&(ee.textContent=Kt),et=n(e),te=o(e,"P",{"data-svelte-h":!0}),r(te)!=="svelte-1o56krx"&&(te.textContent=Ot),tt=n(e),le=o(e,"P",{"data-svelte-h":!0}),r(le)!=="svelte-1y9mak3"&&(le.innerHTML=el),lt=n(e),u(g.$$.fragment,e),st=n(e),se=o(e,"P",{"data-svelte-h":!0}),r(se)!=="svelte-4vay6o"&&(se.textContent=tl),at=n(e),u(ae.$$.fragment,e),nt=n(e),ne=o(e,"P",{"data-svelte-h":!0}),r(ne)!=="svelte-42h08l"&&(ne.innerHTML=ll),it=n(e),ie=o(e,"P",{"data-svelte-h":!0}),r(ie)!=="svelte-13go2fo"&&(ie.innerHTML=sl),pt=n(e),u(pe.$$.fragment,e),ot=n(e),oe=o(e,"P",{"data-svelte-h":!0}),r(oe)!=="svelte-ynzvto"&&(oe.innerHTML=al),rt=n(e),re=o(e,"P",{"data-svelte-h":!0}),r(re)!=="svelte-1l3sfxh"&&(re.innerHTML=nl),ct=n(e),ce=o(e,"P",{"data-svelte-h":!0}),r(ce)!=="svelte-l2zgqj"&&(ce.innerHTML=il),mt=n(e),me=o(e,"P",{"data-svelte-h":!0}),r(me)!=="svelte-heqe2k"&&(me.innerHTML=pl),ut=n(e),ue=o(e,"P",{"data-svelte-h":!0}),r(ue)!=="svelte-1pr52sj"&&(ue.textContent=ol),ft=n(e),u(fe.$$.fragment,e),dt=n(e),de=o(e,"P",{"data-svelte-h":!0}),r(de)!=="svelte-1mviugk"&&(de.textContent=rl),ht=n(e),u(he.$$.fragment,e),yt=n(e),ye=o(e,"P",{"data-svelte-h":!0}),r(ye)!=="svelte-15xaqzv"&&(ye.innerHTML=cl),wt=n(e),u(J.$$.fragment,e),Mt=n(e),u(we.$$.fragment,e),bt=n(e),Me=o(e,"P",{"data-svelte-h":!0}),r(Me)!=="svelte-11mjb3e"&&(Me.innerHTML=ml),$t=n(e),u(be.$$.fragment,e),Tt=n(e),u(U.$$.fragment,e),jt=n(e),$e=o(e,"P",{"data-svelte-h":!0}),r($e)!=="svelte-t45mca"&&($e.textContent=ul),vt=n(e),u(C.$$.fragment,e),gt=n(e),u(Te.$$.fragment,e),Jt=n(e),je=o(e,"P",{"data-svelte-h":!0}),r(je)!=="svelte-1f6tfb4"&&(je.textContent=fl),Ut=n(e),u(ve.$$.fragment,e),Ct=n(e),u(k.$$.fragment,e),kt=n(e),ge=o(e,"P",{"data-svelte-h":!0}),r(ge)!=="svelte-bz2a3m"&&(ge.innerHTML=dl),xt=n(e),u(Je.$$.fragment,e),_t=n(e),Ue=o(e,"P",{}),$l(Ue).forEach(l),this.h()},h(){Tl(i,"name","hf:doc:metadata"),Tl(i,"content",Nl)},m(e,t){Cl(document.head,i),s(e,w,t),s(e,c,t),s(e,M,t),f(v,e,t),s(e,b,t),f(j,e,t),s(e,ke,t),s(e,x,t),s(e,xe,t),s(e,_,t),s(e,_e,t),s(e,Z,t),s(e,Ze,t),f(B,e,t),s(e,Be,t),s(e,G,t),s(e,Ge,t),s(e,W,t),s(e,We,t),s(e,H,t),s(e,He,t),f(X,e,t),s(e,Xe,t),s(e,I,t),s(e,Ie,t),f(N,e,t),s(e,Ne,t),s(e,P,t),s(e,Pe,t),s(e,S,t),s(e,Se,t),s(e,R,t),s(e,Re,t),s(e,Y,t),s(e,Ye,t),s(e,z,t),s(e,ze,t),f(E,e,t),s(e,Ee,t),s(e,F,t),s(e,Fe,t),s(e,L,t),s(e,Le,t),f(Q,e,t),s(e,Qe,t),s(e,A,t),s(e,Ae,t),f(V,e,t),s(e,Ve,t),s(e,q,t),s(e,qe,t),s(e,D,t),s(e,De,t),f(K,e,t),s(e,Ke,t),s(e,O,t),s(e,Oe,t),s(e,ee,t),s(e,et,t),s(e,te,t),s(e,tt,t),s(e,le,t),s(e,lt,t),f(g,e,t),s(e,st,t),s(e,se,t),s(e,at,t),f(ae,e,t),s(e,nt,t),s(e,ne,t),s(e,it,t),s(e,ie,t),s(e,pt,t),f(pe,e,t),s(e,ot,t),s(e,oe,t),s(e,rt,t),s(e,re,t),s(e,ct,t),s(e,ce,t),s(e,mt,t),s(e,me,t),s(e,ut,t),s(e,ue,t),s(e,ft,t),f(fe,e,t),s(e,dt,t),s(e,de,t),s(e,ht,t),f(he,e,t),s(e,yt,t),s(e,ye,t),s(e,wt,t),f(J,e,t),s(e,Mt,t),f(we,e,t),s(e,bt,t),s(e,Me,t),s(e,$t,t),f(be,e,t),s(e,Tt,t),f(U,e,t),s(e,jt,t),s(e,$e,t),s(e,vt,t),f(C,e,t),s(e,gt,t),f(Te,e,t),s(e,Jt,t),s(e,je,t),s(e,Ut,t),f(ve,e,t),s(e,Ct,t),f(k,e,t),s(e,kt,t),s(e,ge,t),s(e,xt,t),f(Je,e,t),s(e,_t,t),s(e,Ue,t),Zt=!0},p(e,[t]){const hl={};t&2&&(hl.$$scope={dirty:t,ctx:e}),g.$set(hl);const yl={};t&2&&(yl.$$scope={dirty:t,ctx:e}),J.$set(yl);const wl={};t&2&&(wl.$$scope={dirty:t,ctx:e}),U.$set(wl);const Ml={};t&2&&(Ml.$$scope={dirty:t,ctx:e}),C.$set(Ml);const bl={};t&2&&(bl.$$scope={dirty:t,ctx:e}),k.$set(bl)},i(e){Zt||(d(v.$$.fragment,e),d(j.$$.fragment,e),d(B.$$.fragment,e),d(X.$$.fragment,e),d(N.$$.fragment,e),d(E.$$.fragment,e),d(Q.$$.fragment,e),d(V.$$.fragment,e),d(K.$$.fragment,e),d(g.$$.fragment,e),d(ae.$$.fragment,e),d(pe.$$.fragment,e),d(fe.$$.fragment,e),d(he.$$.fragment,e),d(J.$$.fragment,e),d(we.$$.fragment,e),d(be.$$.fragment,e),d(U.$$.fragment,e),d(C.$$.fragment,e),d(Te.$$.fragment,e),d(ve.$$.fragment,e),d(k.$$.fragment,e),d(Je.$$.fragment,e),Zt=!0)},o(e){h(v.$$.fragment,e),h(j.$$.fragment,e),h(B.$$.fragment,e),h(X.$$.fragment,e),h(N.$$.fragment,e),h(E.$$.fragment,e),h(Q.$$.fragment,e),h(V.$$.fragment,e),h(K.$$.fragment,e),h(g.$$.fragment,e),h(ae.$$.fragment,e),h(pe.$$.fragment,e),h(fe.$$.fragment,e),h(he.$$.fragment,e),h(J.$$.fragment,e),h(we.$$.fragment,e),h(be.$$.fragment,e),h(U.$$.fragment,e),h(C.$$.fragment,e),h(Te.$$.fragment,e),h(ve.$$.fragment,e),h(k.$$.fragment,e),h(Je.$$.fragment,e),Zt=!1},d(e){e&&(l(w),l(c),l(M),l(b),l(ke),l(x),l(xe),l(_),l(_e),l(Z),l(Ze),l(Be),l(G),l(Ge),l(W),l(We),l(H),l(He),l(Xe),l(I),l(Ie),l(Ne),l(P),l(Pe),l(S),l(Se),l(R),l(Re),l(Y),l(Ye),l(z),l(ze),l(Ee),l(F),l(Fe),l(L),l(Le),l(Qe),l(A),l(Ae),l(Ve),l(q),l(qe),l(D),l(De),l(Ke),l(O),l(Oe),l(ee),l(et),l(te),l(tt),l(le),l(lt),l(st),l(se),l(at),l(nt),l(ne),l(it),l(ie),l(pt),l(ot),l(oe),l(rt),l(re),l(ct),l(ce),l(mt),l(me),l(ut),l(ue),l(ft),l(dt),l(de),l(ht),l(yt),l(ye),l(wt),l(Mt),l(bt),l(Me),l($t),l(Tt),l(jt),l($e),l(vt),l(gt),l(Jt),l(je),l(Ut),l(Ct),l(kt),l(ge),l(xt),l(_t),l(Ue)),l(i),y(v,e),y(j,e),y(B,e),y(X,e),y(N,e),y(E,e),y(Q,e),y(V,e),y(K,e),y(g,e),y(ae,e),y(pe,e),y(fe,e),y(he,e),y(J,e),y(we,e),y(be,e),y(U,e),y(C,e),y(Te,e),y(ve,e),y(k,e),y(Je,e)}}}const Nl='{"title":"Distributed inference","local":"distributed-inference","sections":[{"title":"Sending chunks of a batch automatically to each loaded model","local":"sending-chunks-of-a-batch-automatically-to-each-loaded-model","sections":[],"depth":2},{"title":"Memory-efficient pipeline parallelism (experimental)","local":"memory-efficient-pipeline-parallelism-experimental","sections":[],"depth":2}],"depth":1}';function Pl(T){return vl(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Fl extends gl{constructor(i){super(),Jl(this,i,Pl,Il,jl,{})}}export{Fl as component}; | |
Xet Storage Details
- Size:
- 29.8 kB
- Xet hash:
- 9ee872bbc44600c55f05cfe0ca4ec1d91ecddb36b28bb9233da988a052384e92
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.