Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / diffusers /pr_13769 /en /_app /immutable /nodes /112.b68bd07f.js

HuggingFaceDocBuilder's picture

HuggingFaceDocBuilder

about 1 month ago

10 kB

	import{s as re,n as ie,o as de}from"../chunks/scheduler.53228c21.js";import{S as me,i as ce,e as l,s as a,c as f,h as fe,a as r,d as n,b as s,f as T,g,j as J,k as S,l as q,m as o,n as u,t as p,o as h,p as _}from"../chunks/index.100fac89.js";import{C as ge}from"../chunks/CopyLLMTxtMenu.f7e332d5.js";import{D as ee}from"../chunks/Docstring.8934f3ee.js";import{H as te,E as ue}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.b70fb789.js";function pe(ne){let i,V,E,A,v,F,b,z,x,oe='Parallelism strategies help speed up diffusion transformers by distributing computations across multiple devices, allowing for faster inference/training times. Refer to the <a href="../training/distributed_inference">Distributed inferece</a> guide to learn more.',H,$,K,d,y,X,D,ae="Configuration for applying different parallelisms.",W,C,I,m,P,Y,M,se="Configuration for context parallelism.",R,c,w,Z,k,le="Apply context parallel on a model.",U,N,j,L,B;return v=new ge({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),b=new te({props:{title:"Parallelism",local:"parallelism",headingTag:"h1"}}),$=new te({props:{title:"ParallelConfig",local:"diffusers.ParallelConfig",headingTag:"h2"}}),y=new ee({props:{name:"class diffusers.ParallelConfig",anchor:"diffusers.ParallelConfig",parameters:[{name:"context_parallel_config",val:": diffusers.models._modeling_parallel.ContextParallelConfig \| None = None"},{name:"_rank",val:": int = None"},{name:"_world_size",val:": int = None"},{name:"_device",val:": device = None"},{name:"_mesh",val:": DeviceMesh = None"}],parametersDescription:[{anchor:"diffusers.ParallelConfig.context_parallel_config",description:`<strong>context_parallel_config</strong> (<code>ContextParallelConfig</code>, <em>optional</em>) —
	Configuration for context parallelism.`,name:"context_parallel_config"}],source:"https://github.com/huggingface/diffusers/blob/vr_13769/src/diffusers/models/_modeling_parallel.py#L158"}}),C=new te({props:{title:"ContextParallelConfig",local:"diffusers.ContextParallelConfig",headingTag:"h2"}}),P=new ee({props:{name:"class diffusers.ContextParallelConfig",anchor:"diffusers.ContextParallelConfig",parameters:[{name:"ring_degree",val:": int \| None = None"},{name:"ulysses_degree",val:": int \| None = None"},{name:"convert_to_fp32",val:": bool = True"},{name:"rotate_method",val:": typing.Literal['allgather', 'alltoall'] = 'allgather'"},{name:"mesh",val:": torch.distributed.device_mesh.DeviceMesh \| None = None"},{name:"ulysses_anything",val:": bool = False"},{name:"ring_anything",val:": bool = False"},{name:"_rank",val:": int = None"},{name:"_world_size",val:": int = None"},{name:"_device",val:": device = None"},{name:"_mesh",val:": DeviceMesh = None"},{name:"_flattened_mesh",val:": DeviceMesh = None"},{name:"_ring_mesh",val:": DeviceMesh = None"},{name:"_ulysses_mesh",val:": DeviceMesh = None"},{name:"_ring_local_rank",val:": int = None"},{name:"_ulysses_local_rank",val:": int = None"}],parametersDescription:[{anchor:"diffusers.ContextParallelConfig.ring_degree",description:`<strong>ring_degree</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) —
	Number of devices to use for Ring Attention. Sequence is split across devices. Each device computes
	attention between its local Q and KV chunks passed sequentially around ring. Lower memory (only holds 1/N
	of KV at a time), overlaps compute with communication, but requires N iterations to see all tokens. Best
	for long sequences with limited memory/bandwidth. Number of devices to use for ring attention within a
	context parallel region. Must be a divisor of the total number of devices in the context parallel mesh.`,name:"ring_degree"},{anchor:"diffusers.ContextParallelConfig.ulysses_degree",description:`<strong>ulysses_degree</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) —
	Number of devices to use for Ulysses Attention. Sequence split is across devices. Each device computes
	local QKV, then all-gathers all KV chunks to compute full attention in one pass. Higher memory (stores all
	KV), requires high-bandwidth all-to-all communication, but lower latency. Best for moderate sequences with
	good interconnect bandwidth.`,name:"ulysses_degree"},{anchor:"diffusers.ContextParallelConfig.convert_to_fp32",description:`<strong>convert_to_fp32</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether to convert output and LSE to float32 for ring attention numerical stability.`,name:"convert_to_fp32"},{anchor:"diffusers.ContextParallelConfig.rotate_method",description:`<strong>rotate_method</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"allgather"</code>) —
	Method to use for rotating key/value states across devices in ring attention. Currently, only <code>"allgather"</code>
	is supported.`,name:"rotate_method"},{anchor:"diffusers.ContextParallelConfig.ulysses_anything",description:`<strong>ulysses_anything</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether to enable “Ulysses Anything” mode, which supports arbitrary sequence lengths and head counts that
	are not evenly divisible by <code>ulysses_degree</code>. When enabled, <code>ulysses_degree</code> must be greater than 1 and
	<code>ring_degree</code> must be 1.`,name:"ulysses_anything"},{anchor:"diffusers.ContextParallelConfig.ring_anything",description:`<strong>ring_anything</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether to enable “Ring Anything” mode, which supports arbitrary sequence lengths. When enabled,
	<code>ring_degree</code> must be greater than 1 and <code>ulysses_degree</code> must be 1.`,name:"ring_anything"},{anchor:"diffusers.ContextParallelConfig.mesh",description:`<strong>mesh</strong> (<code>torch.distributed.device_mesh.DeviceMesh</code>, <em>optional</em>) —
	A custom device mesh to use for context parallelism. If provided, this mesh will be used instead of
	creating a new one. This is useful when combining context parallelism with other parallelism strategies
	(e.g., FSDP, tensor parallelism) that share the same device mesh. The mesh must have both “ring” and
	“ulysses” dimensions. Use size 1 for dimensions not being used (e.g., <code>mesh_shape=(2, 1, 4)</code> with
	<code>mesh_dim_names=("ring", "ulysses", "fsdp")</code> for ring attention only with FSDP).`,name:"mesh"}],source:"https://github.com/huggingface/diffusers/blob/vr_13769/src/diffusers/models/_modeling_parallel.py#L42"}}),w=new ee({props:{name:"diffusers.hooks.apply_context_parallel",anchor:"diffusers.hooks.apply_context_parallel",parameters:[{name:"module",val:": Module"},{name:"parallel_config",val:": ContextParallelConfig"},{name:"plan",val:": dict"}],source:"https://github.com/huggingface/diffusers/blob/vr_13769/src/diffusers/hooks/context_parallel.py#L80"}}),N=new ue({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/parallel.md"}}),{c(){i=l("meta"),V=a(),E=l("p"),A=a(),f(v.$$.fragment),F=a(),f(b.$$.fragment),z=a(),x=l("p"),x.innerHTML=oe,H=a(),f($.$$.fragment),K=a(),d=l("div"),f(y.$$.fragment),X=a(),D=l("p"),D.textContent=ae,W=a(),f(C.$$.fragment),I=a(),m=l("div"),f(P.$$.fragment),Y=a(),M=l("p"),M.textContent=se,R=a(),c=l("div"),f(w.$$.fragment),Z=a(),k=l("p"),k.textContent=le,U=a(),f(N.$$.fragment),j=a(),L=l("p"),this.h()},l(e){const t=fe("svelte-u9bgzb",document.head);i=r(t,"META",{name:!0,content:!0}),t.forEach(n),V=s(e),E=r(e,"P",{}),T(E).forEach(n),A=s(e),g(v.$$.fragment,e),F=s(e),g(b.$$.fragment,e),z=s(e),x=r(e,"P",{"data-svelte-h":!0}),J(x)!=="svelte-cblplr"&&(x.innerHTML=oe),H=s(e),g($.$$.fragment,e),K=s(e),d=r(e,"DIV",{class:!0});var Q=T(d);g(y.$$.fragment,Q),X=s(Q),D=r(Q,"P",{"data-svelte-h":!0}),J(D)!=="svelte-1xt9qrn"&&(D.textContent=ae),Q.forEach(n),W=s(e),g(C.$$.fragment,e),I=s(e),m=r(e,"DIV",{class:!0});var G=T(m);g(P.$$.fragment,G),Y=s(G),M=r(G,"P",{"data-svelte-h":!0}),J(M)!=="svelte-1ng6p0q"&&(M.textContent=se),G.forEach(n),R=s(e),c=r(e,"DIV",{class:!0});var O=T(c);g(w.$$.fragment,O),Z=s(O),k=r(O,"P",{"data-svelte-h":!0}),J(k)!=="svelte-jjn757"&&(k.textContent=le),O.forEach(n),U=s(e),g(N.$$.fragment,e),j=s(e),L=r(e,"P",{}),T(L).forEach(n),this.h()},h(){S(i,"name","hf:doc:metadata"),S(i,"content",he),S(d,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(m,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(c,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){q(document.head,i),o(e,V,t),o(e,E,t),o(e,A,t),u(v,e,t),o(e,F,t),u(b,e,t),o(e,z,t),o(e,x,t),o(e,H,t),u($,e,t),o(e,K,t),o(e,d,t),u(y,d,null),q(d,X),q(d,D),o(e,W,t),u(C,e,t),o(e,I,t),o(e,m,t),u(P,m,null),q(m,Y),q(m,M),o(e,R,t),o(e,c,t),u(w,c,null),q(c,Z),q(c,k),o(e,U,t),u(N,e,t),o(e,j,t),o(e,L,t),B=!0},p:ie,i(e){B\|\|(p(v.$$.fragment,e),p(b.$$.fragment,e),p($.$$.fragment,e),p(y.$$.fragment,e),p(C.$$.fragment,e),p(P.$$.fragment,e),p(w.$$.fragment,e),p(N.$$.fragment,e),B=!0)},o(e){h(v.$$.fragment,e),h(b.$$.fragment,e),h($.$$.fragment,e),h(y.$$.fragment,e),h(C.$$.fragment,e),h(P.$$.fragment,e),h(w.$$.fragment,e),h(N.$$.fragment,e),B=!1},d(e){e&&(n(V),n(E),n(A),n(F),n(z),n(x),n(H),n(K),n(d),n(W),n(I),n(m),n(R),n(c),n(U),n(j),n(L)),n(i),_(v,e),_(b,e),_($,e),_(y),_(C,e),_(P),_(w),_(N,e)}}}const he='{"title":"Parallelism","local":"parallelism","sections":[{"title":"ParallelConfig","local":"diffusers.ParallelConfig","sections":[],"depth":2},{"title":"ContextParallelConfig","local":"diffusers.ContextParallelConfig","sections":[],"depth":2}],"depth":1}';function _e(ne){return de(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ce extends me{constructor(i){super(),ce(this,i,_e,pe,re,{})}}export{Ce as component};

Xet Storage Details

Size:: 10 kB
Xet hash:: ca835b6c6340276df5ff4c2c0ece7925ec1556eb51dec07adccccbee2c2be166

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.