Buckets:

rtrm's picture
download
raw
9.56 kB
import{s as re,n as ie,o as me}from"../chunks/scheduler.53228c21.js";import{S as de,i as fe,e as s,s as a,c,h as ce,a as r,d as n,b as l,f as T,g as u,j as J,k as S,l as q,m as o,n as g,t as p,o as h,p as _}from"../chunks/index.100fac89.js";import{C as ue}from"../chunks/CopyLLMTxtMenu.2bfe8872.js";import{D as ee}from"../chunks/Docstring.65b2998b.js";import{H as te,E as ge}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.e20ef58b.js";function pe(ne){let i,V,E,A,v,z,$,H,x,oe='Parallelism strategies help speed up diffusion transformers by distributing computations across multiple devices, allowing for faster inference/training times. Refer to the <a href="../training/distributed_inference">Distributed inferece</a> guide to learn more.',K,b,F,m,C,X,D,ae="Configuration for applying different parallelisms.",I,y,U,d,P,Y,M,le="Configuration for context parallelism.",j,f,w,Z,k,se="Apply context parallel on a model.",R,N,W,L,B;return v=new ue({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),$=new te({props:{title:"Parallelism",local:"parallelism",headingTag:"h1"}}),b=new te({props:{title:"ParallelConfig",local:"diffusers.ParallelConfig",headingTag:"h2"}}),C=new ee({props:{name:"class diffusers.ParallelConfig",anchor:"diffusers.ParallelConfig",parameters:[{name:"context_parallel_config",val:": diffusers.models._modeling_parallel.ContextParallelConfig | None = None"},{name:"_rank",val:": int = None"},{name:"_world_size",val:": int = None"},{name:"_device",val:": device = None"},{name:"_mesh",val:": DeviceMesh = None"}],parametersDescription:[{anchor:"diffusers.ParallelConfig.context_parallel_config",description:`<strong>context_parallel_config</strong> (<code>ContextParallelConfig</code>, <em>optional</em>) &#x2014;
Configuration for context parallelism.`,name:"context_parallel_config"}],source:"https://github.com/huggingface/diffusers/blob/vr_13360/src/diffusers/models/_modeling_parallel.py#L146"}}),y=new te({props:{title:"ContextParallelConfig",local:"diffusers.ContextParallelConfig",headingTag:"h2"}}),P=new ee({props:{name:"class diffusers.ContextParallelConfig",anchor:"diffusers.ContextParallelConfig",parameters:[{name:"ring_degree",val:": int | None = None"},{name:"ulysses_degree",val:": int | None = None"},{name:"convert_to_fp32",val:": bool = True"},{name:"rotate_method",val:": typing.Literal['allgather', 'alltoall'] = 'allgather'"},{name:"mesh",val:": torch.distributed.device_mesh.DeviceMesh | None = None"},{name:"ulysses_anything",val:": bool = False"},{name:"_rank",val:": int = None"},{name:"_world_size",val:": int = None"},{name:"_device",val:": device = None"},{name:"_mesh",val:": DeviceMesh = None"},{name:"_flattened_mesh",val:": DeviceMesh = None"},{name:"_ring_mesh",val:": DeviceMesh = None"},{name:"_ulysses_mesh",val:": DeviceMesh = None"},{name:"_ring_local_rank",val:": int = None"},{name:"_ulysses_local_rank",val:": int = None"}],parametersDescription:[{anchor:"diffusers.ContextParallelConfig.ring_degree",description:`<strong>ring_degree</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) &#x2014;
Number of devices to use for Ring Attention. Sequence is split across devices. Each device computes
attention between its local Q and KV chunks passed sequentially around ring. Lower memory (only holds 1/N
of KV at a time), overlaps compute with communication, but requires N iterations to see all tokens. Best
for long sequences with limited memory/bandwidth. Number of devices to use for ring attention within a
context parallel region. Must be a divisor of the total number of devices in the context parallel mesh.`,name:"ring_degree"},{anchor:"diffusers.ContextParallelConfig.ulysses_degree",description:`<strong>ulysses_degree</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) &#x2014;
Number of devices to use for Ulysses Attention. Sequence split is across devices. Each device computes
local QKV, then all-gathers all KV chunks to compute full attention in one pass. Higher memory (stores all
KV), requires high-bandwidth all-to-all communication, but lower latency. Best for moderate sequences with
good interconnect bandwidth.`,name:"ulysses_degree"},{anchor:"diffusers.ContextParallelConfig.convert_to_fp32",description:`<strong>convert_to_fp32</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to convert output and LSE to float32 for ring attention numerical stability.`,name:"convert_to_fp32"},{anchor:"diffusers.ContextParallelConfig.rotate_method",description:`<strong>rotate_method</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;allgather&quot;</code>) &#x2014;
Method to use for rotating key/value states across devices in ring attention. Currently, only <code>&quot;allgather&quot;</code>
is supported.`,name:"rotate_method"},{anchor:"diffusers.ContextParallelConfig.ulysses_anything",description:`<strong>ulysses_anything</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
Whether to enable &#x201C;Ulysses Anything&#x201D; mode, which supports arbitrary sequence lengths and head counts that
are not evenly divisible by <code>ulysses_degree</code>. When enabled, <code>ulysses_degree</code> must be greater than 1 and
<code>ring_degree</code> must be 1.`,name:"ulysses_anything"},{anchor:"diffusers.ContextParallelConfig.mesh",description:`<strong>mesh</strong> (<code>torch.distributed.device_mesh.DeviceMesh</code>, <em>optional</em>) &#x2014;
A custom device mesh to use for context parallelism. If provided, this mesh will be used instead of
creating a new one. This is useful when combining context parallelism with other parallelism strategies
(e.g., FSDP, tensor parallelism) that share the same device mesh. The mesh must have both &#x201C;ring&#x201D; and
&#x201C;ulysses&#x201D; dimensions. Use size 1 for dimensions not being used (e.g., <code>mesh_shape=(2, 1, 4)</code> with
<code>mesh_dim_names=(&quot;ring&quot;, &quot;ulysses&quot;, &quot;fsdp&quot;)</code> for ring attention only with FSDP).`,name:"mesh"}],source:"https://github.com/huggingface/diffusers/blob/vr_13360/src/diffusers/models/_modeling_parallel.py#L42"}}),w=new ee({props:{name:"diffusers.hooks.apply_context_parallel",anchor:"diffusers.hooks.apply_context_parallel",parameters:[{name:"module",val:": Module"},{name:"parallel_config",val:": ContextParallelConfig"},{name:"plan",val:": dict"}],source:"https://github.com/huggingface/diffusers/blob/vr_13360/src/diffusers/hooks/context_parallel.py#L80"}}),N=new ge({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/parallel.md"}}),{c(){i=s("meta"),V=a(),E=s("p"),A=a(),c(v.$$.fragment),z=a(),c($.$$.fragment),H=a(),x=s("p"),x.innerHTML=oe,K=a(),c(b.$$.fragment),F=a(),m=s("div"),c(C.$$.fragment),X=a(),D=s("p"),D.textContent=ae,I=a(),c(y.$$.fragment),U=a(),d=s("div"),c(P.$$.fragment),Y=a(),M=s("p"),M.textContent=le,j=a(),f=s("div"),c(w.$$.fragment),Z=a(),k=s("p"),k.textContent=se,R=a(),c(N.$$.fragment),W=a(),L=s("p"),this.h()},l(e){const t=ce("svelte-u9bgzb",document.head);i=r(t,"META",{name:!0,content:!0}),t.forEach(n),V=l(e),E=r(e,"P",{}),T(E).forEach(n),A=l(e),u(v.$$.fragment,e),z=l(e),u($.$$.fragment,e),H=l(e),x=r(e,"P",{"data-svelte-h":!0}),J(x)!=="svelte-cblplr"&&(x.innerHTML=oe),K=l(e),u(b.$$.fragment,e),F=l(e),m=r(e,"DIV",{class:!0});var Q=T(m);u(C.$$.fragment,Q),X=l(Q),D=r(Q,"P",{"data-svelte-h":!0}),J(D)!=="svelte-1xt9qrn"&&(D.textContent=ae),Q.forEach(n),I=l(e),u(y.$$.fragment,e),U=l(e),d=r(e,"DIV",{class:!0});var G=T(d);u(P.$$.fragment,G),Y=l(G),M=r(G,"P",{"data-svelte-h":!0}),J(M)!=="svelte-1ng6p0q"&&(M.textContent=le),G.forEach(n),j=l(e),f=r(e,"DIV",{class:!0});var O=T(f);u(w.$$.fragment,O),Z=l(O),k=r(O,"P",{"data-svelte-h":!0}),J(k)!=="svelte-jjn757"&&(k.textContent=se),O.forEach(n),R=l(e),u(N.$$.fragment,e),W=l(e),L=r(e,"P",{}),T(L).forEach(n),this.h()},h(){S(i,"name","hf:doc:metadata"),S(i,"content",he),S(m,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(d,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(f,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){q(document.head,i),o(e,V,t),o(e,E,t),o(e,A,t),g(v,e,t),o(e,z,t),g($,e,t),o(e,H,t),o(e,x,t),o(e,K,t),g(b,e,t),o(e,F,t),o(e,m,t),g(C,m,null),q(m,X),q(m,D),o(e,I,t),g(y,e,t),o(e,U,t),o(e,d,t),g(P,d,null),q(d,Y),q(d,M),o(e,j,t),o(e,f,t),g(w,f,null),q(f,Z),q(f,k),o(e,R,t),g(N,e,t),o(e,W,t),o(e,L,t),B=!0},p:ie,i(e){B||(p(v.$$.fragment,e),p($.$$.fragment,e),p(b.$$.fragment,e),p(C.$$.fragment,e),p(y.$$.fragment,e),p(P.$$.fragment,e),p(w.$$.fragment,e),p(N.$$.fragment,e),B=!0)},o(e){h(v.$$.fragment,e),h($.$$.fragment,e),h(b.$$.fragment,e),h(C.$$.fragment,e),h(y.$$.fragment,e),h(P.$$.fragment,e),h(w.$$.fragment,e),h(N.$$.fragment,e),B=!1},d(e){e&&(n(V),n(E),n(A),n(z),n(H),n(x),n(K),n(F),n(m),n(I),n(U),n(d),n(j),n(f),n(R),n(W),n(L)),n(i),_(v,e),_($,e),_(b,e),_(C),_(y,e),_(P),_(w),_(N,e)}}}const he='{"title":"Parallelism","local":"parallelism","sections":[{"title":"ParallelConfig","local":"diffusers.ParallelConfig","sections":[],"depth":2},{"title":"ContextParallelConfig","local":"diffusers.ContextParallelConfig","sections":[],"depth":2}],"depth":1}';function _e(ne){return me(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ye extends de{constructor(i){super(),fe(this,i,_e,pe,re,{})}}export{ye as component};

Xet Storage Details

Size:
9.56 kB
·
Xet hash:
b674e23c888026ddbad8b602c0cd2ef235467762aa201d5be72a94def863a466

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.