Buckets:
| import{s as ie,n as se,o as me}from"../chunks/scheduler.53228c21.js";import{S as fe,i as de,e as r,s as a,c as p,h as pe,a as i,d as n,b as o,f as T,g as c,j as J,k as V,l as D,m as l,n as u,t as g,o as _,p as h}from"../chunks/index.100fac89.js";import{C as ce}from"../chunks/CopyLLMTxtMenu.ed0e3681.js";import{D as ee}from"../chunks/Docstring.1305e0ff.js";import{H as te,E as ue}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.dd42f483.js";function ge(ne){let s,O,k,S,v,I,$,H,x,le='Parallelism strategies help speed up diffusion transformers by distributing computations across multiple devices, allowing for faster inference/training times. Refer to the <a href="../training/distributed_inference">Distributed inferece</a> guide to learn more.',K,C,U,m,y,X,q,ae="Configuration for applying different parallelisms.",z,b,A,f,P,Y,M,oe="Configuration for context parallelism.",j,d,w,Z,L,re="Apply context parallel on a model.",R,N,B,E,Q;return v=new ce({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),$=new te({props:{title:"Parallelism",local:"parallelism",headingTag:"h1"}}),C=new te({props:{title:"ParallelConfig",local:"diffusers.ParallelConfig",headingTag:"h2"}}),y=new ee({props:{name:"class diffusers.ParallelConfig",anchor:"diffusers.ParallelConfig",parameters:[{name:"context_parallel_config",val:": typing.Optional[diffusers.models._modeling_parallel.ContextParallelConfig] = None"},{name:"_rank",val:": int = None"},{name:"_world_size",val:": int = None"},{name:"_device",val:": device = None"},{name:"_mesh",val:": DeviceMesh = None"}],parametersDescription:[{anchor:"diffusers.ParallelConfig.context_parallel_config",description:`<strong>context_parallel_config</strong> (<code>ContextParallelConfig</code>, <em>optional</em>) — | |
| Configuration for context parallelism.`,name:"context_parallel_config"}],source:"https://github.com/huggingface/diffusers/blob/vr_11739/src/diffusers/models/_modeling_parallel.py#L130"}}),b=new te({props:{title:"ContextParallelConfig",local:"diffusers.ContextParallelConfig",headingTag:"h2"}}),P=new ee({props:{name:"class diffusers.ContextParallelConfig",anchor:"diffusers.ContextParallelConfig",parameters:[{name:"ring_degree",val:": typing.Optional[int] = None"},{name:"ulysses_degree",val:": typing.Optional[int] = None"},{name:"convert_to_fp32",val:": bool = True"},{name:"rotate_method",val:": typing.Literal['allgather', 'alltoall'] = 'allgather'"},{name:"_rank",val:": int = None"},{name:"_world_size",val:": int = None"},{name:"_device",val:": device = None"},{name:"_mesh",val:": DeviceMesh = None"},{name:"_flattened_mesh",val:": DeviceMesh = None"},{name:"_ring_mesh",val:": DeviceMesh = None"},{name:"_ulysses_mesh",val:": DeviceMesh = None"},{name:"_ring_local_rank",val:": int = None"},{name:"_ulysses_local_rank",val:": int = None"}],parametersDescription:[{anchor:"diffusers.ContextParallelConfig.ring_degree",description:`<strong>ring_degree</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) — | |
| Number of devices to use for Ring Attention. Sequence is split across devices. Each device computes | |
| attention between its local Q and KV chunks passed sequentially around ring. Lower memory (only holds 1/N | |
| of KV at a time), overlaps compute with communication, but requires N iterations to see all tokens. Best | |
| for long sequences with limited memory/bandwidth. Number of devices to use for ring attention within a | |
| context parallel region. Must be a divisor of the total number of devices in the context parallel mesh.`,name:"ring_degree"},{anchor:"diffusers.ContextParallelConfig.ulysses_degree",description:`<strong>ulysses_degree</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) — | |
| Number of devices to use for Ulysses Attention. Sequence split is across devices. Each device computes | |
| local QKV, then all-gathers all KV chunks to compute full attention in one pass. Higher memory (stores all | |
| KV), requires high-bandwidth all-to-all communication, but lower latency. Best for moderate sequences with | |
| good interconnect bandwidth.`,name:"ulysses_degree"},{anchor:"diffusers.ContextParallelConfig.convert_to_fp32",description:`<strong>convert_to_fp32</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to convert output and LSE to float32 for ring attention numerical stability.`,name:"convert_to_fp32"},{anchor:"diffusers.ContextParallelConfig.rotate_method",description:`<strong>rotate_method</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"allgather"</code>) — | |
| Method to use for rotating key/value states across devices in ring attention. Currently, only <code>"allgather"</code> | |
| is supported.`,name:"rotate_method"}],source:"https://github.com/huggingface/diffusers/blob/vr_11739/src/diffusers/models/_modeling_parallel.py#L41"}}),w=new ee({props:{name:"diffusers.hooks.apply_context_parallel",anchor:"diffusers.hooks.apply_context_parallel",parameters:[{name:"module",val:": Module"},{name:"parallel_config",val:": ContextParallelConfig"},{name:"plan",val:": typing.Dict[str, typing.Dict[str, typing.Union[typing.Dict[typing.Union[str, int], typing.Union[diffusers.models._modeling_parallel.ContextParallelInput, typing.List[diffusers.models._modeling_parallel.ContextParallelInput], typing.Tuple[diffusers.models._modeling_parallel.ContextParallelInput, ...]]], diffusers.models._modeling_parallel.ContextParallelOutput, typing.List[diffusers.models._modeling_parallel.ContextParallelOutput], typing.Tuple[diffusers.models._modeling_parallel.ContextParallelOutput, ...]]]]"}],source:"https://github.com/huggingface/diffusers/blob/vr_11739/src/diffusers/hooks/context_parallel.py#L78"}}),N=new ue({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/parallel.md"}}),{c(){s=r("meta"),O=a(),k=r("p"),S=a(),p(v.$$.fragment),I=a(),p($.$$.fragment),H=a(),x=r("p"),x.innerHTML=le,K=a(),p(C.$$.fragment),U=a(),m=r("div"),p(y.$$.fragment),X=a(),q=r("p"),q.textContent=ae,z=a(),p(b.$$.fragment),A=a(),f=r("div"),p(P.$$.fragment),Y=a(),M=r("p"),M.textContent=oe,j=a(),d=r("div"),p(w.$$.fragment),Z=a(),L=r("p"),L.textContent=re,R=a(),p(N.$$.fragment),B=a(),E=r("p"),this.h()},l(e){const t=pe("svelte-u9bgzb",document.head);s=i(t,"META",{name:!0,content:!0}),t.forEach(n),O=o(e),k=i(e,"P",{}),T(k).forEach(n),S=o(e),c(v.$$.fragment,e),I=o(e),c($.$$.fragment,e),H=o(e),x=i(e,"P",{"data-svelte-h":!0}),J(x)!=="svelte-cblplr"&&(x.innerHTML=le),K=o(e),c(C.$$.fragment,e),U=o(e),m=i(e,"DIV",{class:!0});var G=T(m);c(y.$$.fragment,G),X=o(G),q=i(G,"P",{"data-svelte-h":!0}),J(q)!=="svelte-1xt9qrn"&&(q.textContent=ae),G.forEach(n),z=o(e),c(b.$$.fragment,e),A=o(e),f=i(e,"DIV",{class:!0});var W=T(f);c(P.$$.fragment,W),Y=o(W),M=i(W,"P",{"data-svelte-h":!0}),J(M)!=="svelte-1ng6p0q"&&(M.textContent=oe),W.forEach(n),j=o(e),d=i(e,"DIV",{class:!0});var F=T(d);c(w.$$.fragment,F),Z=o(F),L=i(F,"P",{"data-svelte-h":!0}),J(L)!=="svelte-jjn757"&&(L.textContent=re),F.forEach(n),R=o(e),c(N.$$.fragment,e),B=o(e),E=i(e,"P",{}),T(E).forEach(n),this.h()},h(){V(s,"name","hf:doc:metadata"),V(s,"content",_e),V(m,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),V(f,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),V(d,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){D(document.head,s),l(e,O,t),l(e,k,t),l(e,S,t),u(v,e,t),l(e,I,t),u($,e,t),l(e,H,t),l(e,x,t),l(e,K,t),u(C,e,t),l(e,U,t),l(e,m,t),u(y,m,null),D(m,X),D(m,q),l(e,z,t),u(b,e,t),l(e,A,t),l(e,f,t),u(P,f,null),D(f,Y),D(f,M),l(e,j,t),l(e,d,t),u(w,d,null),D(d,Z),D(d,L),l(e,R,t),u(N,e,t),l(e,B,t),l(e,E,t),Q=!0},p:se,i(e){Q||(g(v.$$.fragment,e),g($.$$.fragment,e),g(C.$$.fragment,e),g(y.$$.fragment,e),g(b.$$.fragment,e),g(P.$$.fragment,e),g(w.$$.fragment,e),g(N.$$.fragment,e),Q=!0)},o(e){_(v.$$.fragment,e),_($.$$.fragment,e),_(C.$$.fragment,e),_(y.$$.fragment,e),_(b.$$.fragment,e),_(P.$$.fragment,e),_(w.$$.fragment,e),_(N.$$.fragment,e),Q=!1},d(e){e&&(n(O),n(k),n(S),n(I),n(H),n(x),n(K),n(U),n(m),n(z),n(A),n(f),n(j),n(d),n(R),n(B),n(E)),n(s),h(v,e),h($,e),h(C,e),h(y),h(b,e),h(P),h(w),h(N,e)}}}const _e='{"title":"Parallelism","local":"parallelism","sections":[{"title":"ParallelConfig","local":"diffusers.ParallelConfig","sections":[],"depth":2},{"title":"ContextParallelConfig","local":"diffusers.ContextParallelConfig","sections":[],"depth":2}],"depth":1}';function he(ne){return me(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class be extends fe{constructor(s){super(),de(this,s,he,ge,ie,{})}}export{be as component}; | |
Xet Storage Details
- Size:
- 8.75 kB
- Xet hash:
- 3d51be150f773ad1dc051004410ec99f959485deae00e9ad850d30b57b7b4aba
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.