Buckets:

rtrm's picture
download
raw
8.25 kB
import{s as se,n as ie,o as me}from"../chunks/scheduler.53228c21.js";import{S as fe,i as de,e as r,s as l,c,h as ce,a as s,d as n,b as o,f as T,g as p,j as J,k as V,l as q,m as a,n as u,t as g,o as _,p as h}from"../chunks/index.100fac89.js";import{C as pe}from"../chunks/CopyLLMTxtMenu.50ab6782.js";import{D as ee}from"../chunks/Docstring.d95185c4.js";import{H as te,E as ue}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.720a8c3c.js";function ge(ne){let i,S,E,H,v,K,$,z,x,ae='Parallelism strategies help speed up diffusion transformers by distributing computations across multiple devices, allowing for faster inference/training times. Refer to the <a href="../training/distributed_inference">Distributed inferece</a> guide to learn more.',A,C,j,m,b,X,M,le="Configuration for applying different parallelisms.",I,y,R,f,P,Y,D,oe="Configuration for context parallelism.",B,d,w,Z,k,re="Apply context parallel on a model.",Q,N,U,L,F;return v=new pe({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),$=new te({props:{title:"Parallelism",local:"parallelism",headingTag:"h1"}}),C=new te({props:{title:"ParallelConfig",local:"diffusers.ParallelConfig",headingTag:"h2"}}),b=new ee({props:{name:"class diffusers.ParallelConfig",anchor:"diffusers.ParallelConfig",parameters:[{name:"context_parallel_config",val:": diffusers.models._modeling_parallel.ContextParallelConfig | None = None"},{name:"_rank",val:": int = None"},{name:"_world_size",val:": int = None"},{name:"_device",val:": device = None"},{name:"_mesh",val:": DeviceMesh = None"}],parametersDescription:[{anchor:"diffusers.ParallelConfig.context_parallel_config",description:`<strong>context_parallel_config</strong> (<code>ContextParallelConfig</code>, <em>optional</em>) &#x2014;
Configuration for context parallelism.`,name:"context_parallel_config"}],source:"https://github.com/huggingface/diffusers/blob/vr_12652/src/diffusers/models/_modeling_parallel.py#L135"}}),y=new te({props:{title:"ContextParallelConfig",local:"diffusers.ContextParallelConfig",headingTag:"h2"}}),P=new ee({props:{name:"class diffusers.ContextParallelConfig",anchor:"diffusers.ContextParallelConfig",parameters:[{name:"ring_degree",val:": int | None = None"},{name:"ulysses_degree",val:": int | None = None"},{name:"convert_to_fp32",val:": bool = True"},{name:"rotate_method",val:": typing.Literal['allgather', 'alltoall'] = 'allgather'"},{name:"ulysses_anything",val:": bool = False"},{name:"_rank",val:": int = None"},{name:"_world_size",val:": int = None"},{name:"_device",val:": device = None"},{name:"_mesh",val:": DeviceMesh = None"},{name:"_flattened_mesh",val:": DeviceMesh = None"},{name:"_ring_mesh",val:": DeviceMesh = None"},{name:"_ulysses_mesh",val:": DeviceMesh = None"},{name:"_ring_local_rank",val:": int = None"},{name:"_ulysses_local_rank",val:": int = None"}],parametersDescription:[{anchor:"diffusers.ContextParallelConfig.ring_degree",description:`<strong>ring_degree</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) &#x2014;
Number of devices to use for Ring Attention. Sequence is split across devices. Each device computes
attention between its local Q and KV chunks passed sequentially around ring. Lower memory (only holds 1/N
of KV at a time), overlaps compute with communication, but requires N iterations to see all tokens. Best
for long sequences with limited memory/bandwidth. Number of devices to use for ring attention within a
context parallel region. Must be a divisor of the total number of devices in the context parallel mesh.`,name:"ring_degree"},{anchor:"diffusers.ContextParallelConfig.ulysses_degree",description:`<strong>ulysses_degree</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) &#x2014;
Number of devices to use for Ulysses Attention. Sequence split is across devices. Each device computes
local QKV, then all-gathers all KV chunks to compute full attention in one pass. Higher memory (stores all
KV), requires high-bandwidth all-to-all communication, but lower latency. Best for moderate sequences with
good interconnect bandwidth.`,name:"ulysses_degree"},{anchor:"diffusers.ContextParallelConfig.convert_to_fp32",description:`<strong>convert_to_fp32</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to convert output and LSE to float32 for ring attention numerical stability.`,name:"convert_to_fp32"},{anchor:"diffusers.ContextParallelConfig.rotate_method",description:`<strong>rotate_method</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;allgather&quot;</code>) &#x2014;
Method to use for rotating key/value states across devices in ring attention. Currently, only <code>&quot;allgather&quot;</code>
is supported.`,name:"rotate_method"}],source:"https://github.com/huggingface/diffusers/blob/vr_12652/src/diffusers/models/_modeling_parallel.py#L42"}}),w=new ee({props:{name:"diffusers.hooks.apply_context_parallel",anchor:"diffusers.hooks.apply_context_parallel",parameters:[{name:"module",val:": Module"},{name:"parallel_config",val:": ContextParallelConfig"},{name:"plan",val:": dict"}],source:"https://github.com/huggingface/diffusers/blob/vr_12652/src/diffusers/hooks/context_parallel.py#L81"}}),N=new ue({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/parallel.md"}}),{c(){i=r("meta"),S=l(),E=r("p"),H=l(),c(v.$$.fragment),K=l(),c($.$$.fragment),z=l(),x=r("p"),x.innerHTML=ae,A=l(),c(C.$$.fragment),j=l(),m=r("div"),c(b.$$.fragment),X=l(),M=r("p"),M.textContent=le,I=l(),c(y.$$.fragment),R=l(),f=r("div"),c(P.$$.fragment),Y=l(),D=r("p"),D.textContent=oe,B=l(),d=r("div"),c(w.$$.fragment),Z=l(),k=r("p"),k.textContent=re,Q=l(),c(N.$$.fragment),U=l(),L=r("p"),this.h()},l(e){const t=ce("svelte-u9bgzb",document.head);i=s(t,"META",{name:!0,content:!0}),t.forEach(n),S=o(e),E=s(e,"P",{}),T(E).forEach(n),H=o(e),p(v.$$.fragment,e),K=o(e),p($.$$.fragment,e),z=o(e),x=s(e,"P",{"data-svelte-h":!0}),J(x)!=="svelte-cblplr"&&(x.innerHTML=ae),A=o(e),p(C.$$.fragment,e),j=o(e),m=s(e,"DIV",{class:!0});var G=T(m);p(b.$$.fragment,G),X=o(G),M=s(G,"P",{"data-svelte-h":!0}),J(M)!=="svelte-1xt9qrn"&&(M.textContent=le),G.forEach(n),I=o(e),p(y.$$.fragment,e),R=o(e),f=s(e,"DIV",{class:!0});var O=T(f);p(P.$$.fragment,O),Y=o(O),D=s(O,"P",{"data-svelte-h":!0}),J(D)!=="svelte-1ng6p0q"&&(D.textContent=oe),O.forEach(n),B=o(e),d=s(e,"DIV",{class:!0});var W=T(d);p(w.$$.fragment,W),Z=o(W),k=s(W,"P",{"data-svelte-h":!0}),J(k)!=="svelte-jjn757"&&(k.textContent=re),W.forEach(n),Q=o(e),p(N.$$.fragment,e),U=o(e),L=s(e,"P",{}),T(L).forEach(n),this.h()},h(){V(i,"name","hf:doc:metadata"),V(i,"content",_e),V(m,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),V(f,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),V(d,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){q(document.head,i),a(e,S,t),a(e,E,t),a(e,H,t),u(v,e,t),a(e,K,t),u($,e,t),a(e,z,t),a(e,x,t),a(e,A,t),u(C,e,t),a(e,j,t),a(e,m,t),u(b,m,null),q(m,X),q(m,M),a(e,I,t),u(y,e,t),a(e,R,t),a(e,f,t),u(P,f,null),q(f,Y),q(f,D),a(e,B,t),a(e,d,t),u(w,d,null),q(d,Z),q(d,k),a(e,Q,t),u(N,e,t),a(e,U,t),a(e,L,t),F=!0},p:ie,i(e){F||(g(v.$$.fragment,e),g($.$$.fragment,e),g(C.$$.fragment,e),g(b.$$.fragment,e),g(y.$$.fragment,e),g(P.$$.fragment,e),g(w.$$.fragment,e),g(N.$$.fragment,e),F=!0)},o(e){_(v.$$.fragment,e),_($.$$.fragment,e),_(C.$$.fragment,e),_(b.$$.fragment,e),_(y.$$.fragment,e),_(P.$$.fragment,e),_(w.$$.fragment,e),_(N.$$.fragment,e),F=!1},d(e){e&&(n(S),n(E),n(H),n(K),n(z),n(x),n(A),n(j),n(m),n(I),n(R),n(f),n(B),n(d),n(Q),n(U),n(L)),n(i),h(v,e),h($,e),h(C,e),h(b),h(y,e),h(P),h(w),h(N,e)}}}const _e='{"title":"Parallelism","local":"parallelism","sections":[{"title":"ParallelConfig","local":"diffusers.ParallelConfig","sections":[],"depth":2},{"title":"ContextParallelConfig","local":"diffusers.ContextParallelConfig","sections":[],"depth":2}],"depth":1}';function he(ne){return me(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ye extends fe{constructor(i){super(),de(this,i,he,ge,se,{})}}export{ye as component};

Xet Storage Details

Size:
8.25 kB
·
Xet hash:
b3b46eb3ad15b8f02b9dd19e0b6b748719f305bddb68bf8e2d1f0956e7b11369

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.