Buckets:

HuggingFaceDocBuilder's picture
download
raw
10 kB
import{s as re,n as ie,o as de}from"../chunks/scheduler.53228c21.js";import{S as me,i as ce,e as l,s as a,c as f,h as fe,a as r,d as n,b as s,f as T,g,j as J,k as S,l as q,m as o,n as u,t as p,o as h,p as _}from"../chunks/index.100fac89.js";import{C as ge}from"../chunks/CopyLLMTxtMenu.f7e332d5.js";import{D as ee}from"../chunks/Docstring.8934f3ee.js";import{H as te,E as ue}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.b70fb789.js";function pe(ne){let i,V,E,A,v,F,b,z,x,oe='Parallelism strategies help speed up diffusion transformers by distributing computations across multiple devices, allowing for faster inference/training times. Refer to the <a href="../training/distributed_inference">Distributed inferece</a> guide to learn more.',H,$,K,d,y,X,D,ae="Configuration for applying different parallelisms.",W,C,I,m,P,Y,M,se="Configuration for context parallelism.",R,c,w,Z,k,le="Apply context parallel on a model.",U,N,j,L,B;return v=new ge({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),b=new te({props:{title:"Parallelism",local:"parallelism",headingTag:"h1"}}),$=new te({props:{title:"ParallelConfig",local:"diffusers.ParallelConfig",headingTag:"h2"}}),y=new ee({props:{name:"class diffusers.ParallelConfig",anchor:"diffusers.ParallelConfig",parameters:[{name:"context_parallel_config",val:": diffusers.models._modeling_parallel.ContextParallelConfig | None = None"},{name:"_rank",val:": int = None"},{name:"_world_size",val:": int = None"},{name:"_device",val:": device = None"},{name:"_mesh",val:": DeviceMesh = None"}],parametersDescription:[{anchor:"diffusers.ParallelConfig.context_parallel_config",description:`<strong>context_parallel_config</strong> (<code>ContextParallelConfig</code>, <em>optional</em>) &#x2014;
Configuration for context parallelism.`,name:"context_parallel_config"}],source:"https://github.com/huggingface/diffusers/blob/vr_13769/src/diffusers/models/_modeling_parallel.py#L158"}}),C=new te({props:{title:"ContextParallelConfig",local:"diffusers.ContextParallelConfig",headingTag:"h2"}}),P=new ee({props:{name:"class diffusers.ContextParallelConfig",anchor:"diffusers.ContextParallelConfig",parameters:[{name:"ring_degree",val:": int | None = None"},{name:"ulysses_degree",val:": int | None = None"},{name:"convert_to_fp32",val:": bool = True"},{name:"rotate_method",val:": typing.Literal['allgather', 'alltoall'] = 'allgather'"},{name:"mesh",val:": torch.distributed.device_mesh.DeviceMesh | None = None"},{name:"ulysses_anything",val:": bool = False"},{name:"ring_anything",val:": bool = False"},{name:"_rank",val:": int = None"},{name:"_world_size",val:": int = None"},{name:"_device",val:": device = None"},{name:"_mesh",val:": DeviceMesh = None"},{name:"_flattened_mesh",val:": DeviceMesh = None"},{name:"_ring_mesh",val:": DeviceMesh = None"},{name:"_ulysses_mesh",val:": DeviceMesh = None"},{name:"_ring_local_rank",val:": int = None"},{name:"_ulysses_local_rank",val:": int = None"}],parametersDescription:[{anchor:"diffusers.ContextParallelConfig.ring_degree",description:`<strong>ring_degree</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) &#x2014;
Number of devices to use for Ring Attention. Sequence is split across devices. Each device computes
attention between its local Q and KV chunks passed sequentially around ring. Lower memory (only holds 1/N
of KV at a time), overlaps compute with communication, but requires N iterations to see all tokens. Best
for long sequences with limited memory/bandwidth. Number of devices to use for ring attention within a
context parallel region. Must be a divisor of the total number of devices in the context parallel mesh.`,name:"ring_degree"},{anchor:"diffusers.ContextParallelConfig.ulysses_degree",description:`<strong>ulysses_degree</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) &#x2014;
Number of devices to use for Ulysses Attention. Sequence split is across devices. Each device computes
local QKV, then all-gathers all KV chunks to compute full attention in one pass. Higher memory (stores all
KV), requires high-bandwidth all-to-all communication, but lower latency. Best for moderate sequences with
good interconnect bandwidth.`,name:"ulysses_degree"},{anchor:"diffusers.ContextParallelConfig.convert_to_fp32",description:`<strong>convert_to_fp32</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to convert output and LSE to float32 for ring attention numerical stability.`,name:"convert_to_fp32"},{anchor:"diffusers.ContextParallelConfig.rotate_method",description:`<strong>rotate_method</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;allgather&quot;</code>) &#x2014;
Method to use for rotating key/value states across devices in ring attention. Currently, only <code>&quot;allgather&quot;</code>
is supported.`,name:"rotate_method"},{anchor:"diffusers.ContextParallelConfig.ulysses_anything",description:`<strong>ulysses_anything</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
Whether to enable &#x201C;Ulysses Anything&#x201D; mode, which supports arbitrary sequence lengths and head counts that
are not evenly divisible by <code>ulysses_degree</code>. When enabled, <code>ulysses_degree</code> must be greater than 1 and
<code>ring_degree</code> must be 1.`,name:"ulysses_anything"},{anchor:"diffusers.ContextParallelConfig.ring_anything",description:`<strong>ring_anything</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
Whether to enable &#x201C;Ring Anything&#x201D; mode, which supports arbitrary sequence lengths. When enabled,
<code>ring_degree</code> must be greater than 1 and <code>ulysses_degree</code> must be 1.`,name:"ring_anything"},{anchor:"diffusers.ContextParallelConfig.mesh",description:`<strong>mesh</strong> (<code>torch.distributed.device_mesh.DeviceMesh</code>, <em>optional</em>) &#x2014;
A custom device mesh to use for context parallelism. If provided, this mesh will be used instead of
creating a new one. This is useful when combining context parallelism with other parallelism strategies
(e.g., FSDP, tensor parallelism) that share the same device mesh. The mesh must have both &#x201C;ring&#x201D; and
&#x201C;ulysses&#x201D; dimensions. Use size 1 for dimensions not being used (e.g., <code>mesh_shape=(2, 1, 4)</code> with
<code>mesh_dim_names=(&quot;ring&quot;, &quot;ulysses&quot;, &quot;fsdp&quot;)</code> for ring attention only with FSDP).`,name:"mesh"}],source:"https://github.com/huggingface/diffusers/blob/vr_13769/src/diffusers/models/_modeling_parallel.py#L42"}}),w=new ee({props:{name:"diffusers.hooks.apply_context_parallel",anchor:"diffusers.hooks.apply_context_parallel",parameters:[{name:"module",val:": Module"},{name:"parallel_config",val:": ContextParallelConfig"},{name:"plan",val:": dict"}],source:"https://github.com/huggingface/diffusers/blob/vr_13769/src/diffusers/hooks/context_parallel.py#L80"}}),N=new ue({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/parallel.md"}}),{c(){i=l("meta"),V=a(),E=l("p"),A=a(),f(v.$$.fragment),F=a(),f(b.$$.fragment),z=a(),x=l("p"),x.innerHTML=oe,H=a(),f($.$$.fragment),K=a(),d=l("div"),f(y.$$.fragment),X=a(),D=l("p"),D.textContent=ae,W=a(),f(C.$$.fragment),I=a(),m=l("div"),f(P.$$.fragment),Y=a(),M=l("p"),M.textContent=se,R=a(),c=l("div"),f(w.$$.fragment),Z=a(),k=l("p"),k.textContent=le,U=a(),f(N.$$.fragment),j=a(),L=l("p"),this.h()},l(e){const t=fe("svelte-u9bgzb",document.head);i=r(t,"META",{name:!0,content:!0}),t.forEach(n),V=s(e),E=r(e,"P",{}),T(E).forEach(n),A=s(e),g(v.$$.fragment,e),F=s(e),g(b.$$.fragment,e),z=s(e),x=r(e,"P",{"data-svelte-h":!0}),J(x)!=="svelte-cblplr"&&(x.innerHTML=oe),H=s(e),g($.$$.fragment,e),K=s(e),d=r(e,"DIV",{class:!0});var Q=T(d);g(y.$$.fragment,Q),X=s(Q),D=r(Q,"P",{"data-svelte-h":!0}),J(D)!=="svelte-1xt9qrn"&&(D.textContent=ae),Q.forEach(n),W=s(e),g(C.$$.fragment,e),I=s(e),m=r(e,"DIV",{class:!0});var G=T(m);g(P.$$.fragment,G),Y=s(G),M=r(G,"P",{"data-svelte-h":!0}),J(M)!=="svelte-1ng6p0q"&&(M.textContent=se),G.forEach(n),R=s(e),c=r(e,"DIV",{class:!0});var O=T(c);g(w.$$.fragment,O),Z=s(O),k=r(O,"P",{"data-svelte-h":!0}),J(k)!=="svelte-jjn757"&&(k.textContent=le),O.forEach(n),U=s(e),g(N.$$.fragment,e),j=s(e),L=r(e,"P",{}),T(L).forEach(n),this.h()},h(){S(i,"name","hf:doc:metadata"),S(i,"content",he),S(d,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(m,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(c,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){q(document.head,i),o(e,V,t),o(e,E,t),o(e,A,t),u(v,e,t),o(e,F,t),u(b,e,t),o(e,z,t),o(e,x,t),o(e,H,t),u($,e,t),o(e,K,t),o(e,d,t),u(y,d,null),q(d,X),q(d,D),o(e,W,t),u(C,e,t),o(e,I,t),o(e,m,t),u(P,m,null),q(m,Y),q(m,M),o(e,R,t),o(e,c,t),u(w,c,null),q(c,Z),q(c,k),o(e,U,t),u(N,e,t),o(e,j,t),o(e,L,t),B=!0},p:ie,i(e){B||(p(v.$$.fragment,e),p(b.$$.fragment,e),p($.$$.fragment,e),p(y.$$.fragment,e),p(C.$$.fragment,e),p(P.$$.fragment,e),p(w.$$.fragment,e),p(N.$$.fragment,e),B=!0)},o(e){h(v.$$.fragment,e),h(b.$$.fragment,e),h($.$$.fragment,e),h(y.$$.fragment,e),h(C.$$.fragment,e),h(P.$$.fragment,e),h(w.$$.fragment,e),h(N.$$.fragment,e),B=!1},d(e){e&&(n(V),n(E),n(A),n(F),n(z),n(x),n(H),n(K),n(d),n(W),n(I),n(m),n(R),n(c),n(U),n(j),n(L)),n(i),_(v,e),_(b,e),_($,e),_(y),_(C,e),_(P),_(w),_(N,e)}}}const he='{"title":"Parallelism","local":"parallelism","sections":[{"title":"ParallelConfig","local":"diffusers.ParallelConfig","sections":[],"depth":2},{"title":"ContextParallelConfig","local":"diffusers.ContextParallelConfig","sections":[],"depth":2}],"depth":1}';function _e(ne){return de(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ce extends me{constructor(i){super(),ce(this,i,_e,pe,re,{})}}export{Ce as component};

Xet Storage Details

Size:
10 kB
·
Xet hash:
ca835b6c6340276df5ff4c2c0ece7925ec1556eb51dec07adccccbee2c2be166

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.