Buckets:
| import{s as Je,n as qe,o as Ee}from"../chunks/scheduler.53228c21.js";import{S as Ie,i as je,e as r,s as o,c as f,h as Ue,a as d,d as n,b as s,f as X,g,j as u,k as J,l as i,m as l,n as h,t as _,o as T,p as $}from"../chunks/index.cac5d66a.js";import{D as de}from"../chunks/Docstring.ae6a0c34.js";import{C as Ze}from"../chunks/CodeBlock.606cbaf4.js";import{H as le,E as Ge}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.5d043803.js";function Qe(ve){let b,F,R,S,x,Y,w,xe='Anima is a text-to-image model that reuses the <a href="/docs/diffusers/pr_13876/en/api/models/cosmos_transformer3d#diffusers.CosmosTransformer3DModel">CosmosTransformer3DModel</a> with a Qwen3 text encoder, a T5-token text conditioner, and the <a href="/docs/diffusers/pr_13876/en/api/models/autoencoderkl_qwenimage#diffusers.AutoencoderKLQwenImage">AutoencoderKLQwenImage</a> VAE.',K,M,O,C,ee,p,y,me,q,we="A ModularPipeline for Anima.",ce,A,Me="<p>> This is an experimental feature and is likely to change in the future.</p>",te,k,ne,a,B,pe,E,Ce="Auto Modular pipeline for text-to-image generation using Anima.",ue,I,ye="Supported workflows:",fe,j,Ae="<li><code>text2image</code>: requires <code>prompt</code></li>",ge,U,ke=`Components: | |
| text_encoder (<code>Qwen3Model</code>) tokenizer (<code>Qwen2Tokenizer</code>) t5_tokenizer (<code>T5TokenizerFast</code>) text_conditioner | |
| (<code>AnimaTextConditioner</code>) guider (<code>ClassifierFreeGuidance</code>) transformer (<code>CosmosTransformer3DModel</code>) scheduler | |
| (<code>FlowMatchEulerDiscreteScheduler</code>) vae (<code>AutoencoderKLQwenImage</code>) image_processor (<code>VaeImageProcessor</code>)`,he,Z,Be=`Inputs: | |
| prompt (<code>str</code>): | |
| The prompt or prompts to guide image generation. | |
| negative_prompt (<code>str</code>, <em>optional</em>): | |
| The prompt or prompts not to guide the image generation. | |
| max_sequence_length (<code>int</code>, <em>optional</em>, defaults to 512): | |
| Maximum sequence length for prompt encoding. | |
| num_images_per_prompt (<code>int</code>, <em>optional</em>, defaults to 1): | |
| The number of images to generate per prompt. | |
| height (<code>int</code>, <em>optional</em>): | |
| The height in pixels of the generated image. | |
| width (<code>int</code>, <em>optional</em>): | |
| The width in pixels of the generated image. | |
| latents (<code>Tensor</code>, <em>optional</em>): | |
| Pre-generated noisy latents for image generation. | |
| generator (<code>Generator</code>, <em>optional</em>): | |
| Torch generator for deterministic generation. | |
| num_inference_steps (<code>int</code>, <em>optional</em>, defaults to 50): | |
| The number of denoising steps. | |
| sigmas (<code>list</code>, <em>optional</em>): | |
| Custom sigmas for the denoising process. | |
| *<em>denoiser_input_fields (<code>None</code>, </em>optional<em>): | |
| The conditional model inputs for the Anima denoiser. | |
| output_type (<code>str</code>, </em>optional*, defaults to pil): | |
| Output format: ‘pil’, ‘np’, ‘pt’.`,_e,G,Le=`Outputs: | |
| images (<code>list</code>): | |
| Generated images.`,oe,L,se,c,P,Te,Q,Pe="Text conditioner used by Anima to map Qwen3 hidden states and T5 token ids to Cosmos text embeddings.",$e,z,He=`Anima reuses the Cosmos Predict2 DiT. The only model-specific conditioning module is this LLM adapter, which | |
| cross-attends from learned T5 token embeddings to Qwen3 text encoder hidden states before the diffusion loop. | |
| <code>target_dim</code> is the conditioner output dimension and must match the transformer’s <code>text_embed_dim</code>.`,be,D,H,ie,N,ae,V,re;return x=new le({props:{title:"Anima",local:"anima",headingTag:"h1"}}),M=new Ze({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTW9kdWxhclBpcGVsaW5lJTBBJTBBcGlwZSUyMCUzRCUyME1vZHVsYXJQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTIyY2lyY2xlc3RvbmUtbGFicyUyRkFuaW1hLUJhc2UtdjEuMC1EaWZmdXNlcnMlMjIpJTBBcGlwZS5sb2FkX2NvbXBvbmVudHModG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEFwaXBlLnRvKCUyMmN1ZGElMjIpJTBBJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKHByb21wdCUzRCUyMm1hc3RlcnBpZWNlJTJDJTIwYmVzdCUyMHF1YWxpdHklMkMlMjAxZ2lybCUyQyUyMHNvbG8lMkMlMjBjaXR5JTIwbGlnaHRzJTIyKS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> ModularPipeline | |
| pipe = ModularPipeline.from_pretrained(<span class="hljs-string">"circlestone-labs/Anima-Base-v1.0-Diffusers"</span>) | |
| pipe.load_components(torch_dtype=torch.bfloat16) | |
| pipe.to(<span class="hljs-string">"cuda"</span>) | |
| image = pipe(prompt=<span class="hljs-string">"masterpiece, best quality, 1girl, solo, city lights"</span>).images[<span class="hljs-number">0</span>]`,lang:"python",wrap:!1}}),C=new le({props:{title:"AnimaModularPipeline",local:"diffusers.AnimaModularPipeline",headingTag:"h2"}}),y=new de({props:{name:"class diffusers.AnimaModularPipeline",anchor:"diffusers.AnimaModularPipeline",parameters:[{name:"blocks",val:": diffusers.modular_pipelines.modular_pipeline.ModularPipelineBlocks | None = None"},{name:"pretrained_model_name_or_path",val:": str | os.PathLike | None = None"},{name:"components_manager",val:": diffusers.modular_pipelines.components_manager.ComponentsManager | None = None"},{name:"collection",val:": str | None = None"},{name:"modular_config_dict",val:": dict[str, typing.Any] | None = None"},{name:"config_dict",val:": dict[str, typing.Any] | None = None"},{name:"**kwargs",val:""}],source:"https://github.com/huggingface/diffusers/blob/vr_13876/src/diffusers/modular_pipelines/anima/modular_pipeline.py#L19"}}),k=new le({props:{title:"AnimaAutoBlocks",local:"diffusers.AnimaAutoBlocks",headingTag:"h2"}}),B=new de({props:{name:"class diffusers.AnimaAutoBlocks",anchor:"diffusers.AnimaAutoBlocks",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_13876/src/diffusers/modular_pipelines/anima/modular_blocks_anima.py#L126"}}),L=new le({props:{title:"AnimaTextConditioner",local:"diffusers.AnimaTextConditioner",headingTag:"h2"}}),P=new de({props:{name:"class diffusers.AnimaTextConditioner",anchor:"diffusers.AnimaTextConditioner",parameters:[{name:"source_dim",val:": int = 1024"},{name:"target_dim",val:": int = 1024"},{name:"model_dim",val:": int = 1024"},{name:"num_layers",val:": int = 6"},{name:"num_attention_heads",val:": int = 16"},{name:"mlp_ratio",val:": float = 4.0"},{name:"target_vocab_size",val:": int = 32128"},{name:"use_self_attention",val:": bool = True"},{name:"use_layer_norm",val:": bool = False"},{name:"min_sequence_length",val:": int = 512"}],source:"https://github.com/huggingface/diffusers/blob/vr_13876/src/diffusers/models/condition_embedders/condition_embedder_anima.py#L229"}}),H=new de({props:{name:"forward",anchor:"diffusers.AnimaTextConditioner.forward",parameters:[{name:"source_hidden_states",val:": Tensor"},{name:"target_input_ids",val:": Tensor"},{name:"target_attention_mask",val:": torch.Tensor | None = None"},{name:"source_attention_mask",val:": torch.Tensor | None = None"}],parametersDescription:[{anchor:"diffusers.AnimaTextConditioner.forward.source_hidden_states",description:`<strong>source_hidden_states</strong> (<code>torch.Tensor</code> of shape <code>(batch_size, source_sequence_length, source_dim)</code>) — | |
| Qwen3 text encoder hidden states to condition on.`,name:"source_hidden_states"},{anchor:"diffusers.AnimaTextConditioner.forward.target_input_ids",description:`<strong>target_input_ids</strong> (<code>torch.Tensor</code> of shape <code>(batch_size, target_sequence_length)</code>) — | |
| T5 token ids used as learned query tokens.`,name:"target_input_ids"},{anchor:"diffusers.AnimaTextConditioner.forward.target_attention_mask",description:`<strong>target_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Attention mask for the target T5 token ids.`,name:"target_attention_mask"},{anchor:"diffusers.AnimaTextConditioner.forward.source_attention_mask",description:`<strong>source_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Attention mask for the source Qwen3 hidden states.`,name:"source_attention_mask"}],source:"https://github.com/huggingface/diffusers/blob/vr_13876/src/diffusers/models/condition_embedders/condition_embedder_anima.py#L285",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Text conditioning embeddings for the Cosmos transformer.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>torch.Tensor</code></p> | |
| `}}),N=new Ge({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/anima.md"}}),{c(){b=r("meta"),F=o(),R=r("p"),S=o(),f(x.$$.fragment),Y=o(),w=r("p"),w.innerHTML=xe,K=o(),f(M.$$.fragment),O=o(),f(C.$$.fragment),ee=o(),p=r("div"),f(y.$$.fragment),me=o(),q=r("p"),q.textContent=we,ce=o(),A=r("blockquote"),A.innerHTML=Me,te=o(),f(k.$$.fragment),ne=o(),a=r("div"),f(B.$$.fragment),pe=o(),E=r("p"),E.textContent=Ce,ue=o(),I=r("p"),I.textContent=ye,fe=o(),j=r("ul"),j.innerHTML=Ae,ge=o(),U=r("p"),U.innerHTML=ke,he=o(),Z=r("p"),Z.innerHTML=Be,_e=o(),G=r("p"),G.innerHTML=Le,oe=o(),f(L.$$.fragment),se=o(),c=r("div"),f(P.$$.fragment),Te=o(),Q=r("p"),Q.textContent=Pe,$e=o(),z=r("p"),z.innerHTML=He,be=o(),D=r("div"),f(H.$$.fragment),ie=o(),f(N.$$.fragment),ae=o(),V=r("p"),this.h()},l(e){const t=Ue("svelte-u9bgzb",document.head);b=d(t,"META",{name:!0,content:!0}),t.forEach(n),F=s(e),R=d(e,"P",{}),X(R).forEach(n),S=s(e),g(x.$$.fragment,e),Y=s(e),w=d(e,"P",{"data-svelte-h":!0}),u(w)!=="svelte-40ei1d"&&(w.innerHTML=xe),K=s(e),g(M.$$.fragment,e),O=s(e),g(C.$$.fragment,e),ee=s(e),p=d(e,"DIV",{class:!0});var W=X(p);g(y.$$.fragment,W),me=s(W),q=d(W,"P",{"data-svelte-h":!0}),u(q)!=="svelte-1nwzq6u"&&(q.textContent=we),ce=s(W),A=d(W,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),u(A)!=="svelte-1o9sbyc"&&(A.innerHTML=Me),W.forEach(n),te=s(e),g(k.$$.fragment,e),ne=s(e),a=d(e,"DIV",{class:!0});var m=X(a);g(B.$$.fragment,m),pe=s(m),E=d(m,"P",{"data-svelte-h":!0}),u(E)!=="svelte-169sbp1"&&(E.textContent=Ce),ue=s(m),I=d(m,"P",{"data-svelte-h":!0}),u(I)!=="svelte-ls6ro2"&&(I.textContent=ye),fe=s(m),j=d(m,"UL",{"data-svelte-h":!0}),u(j)!=="svelte-v0h72t"&&(j.innerHTML=Ae),ge=s(m),U=d(m,"P",{"data-svelte-h":!0}),u(U)!=="svelte-1629ysr"&&(U.innerHTML=ke),he=s(m),Z=d(m,"P",{"data-svelte-h":!0}),u(Z)!=="svelte-1de4bkj"&&(Z.innerHTML=Be),_e=s(m),G=d(m,"P",{"data-svelte-h":!0}),u(G)!=="svelte-pgeti7"&&(G.innerHTML=Le),m.forEach(n),oe=s(e),g(L.$$.fragment,e),se=s(e),c=d(e,"DIV",{class:!0});var v=X(c);g(P.$$.fragment,v),Te=s(v),Q=d(v,"P",{"data-svelte-h":!0}),u(Q)!=="svelte-1f5qxsj"&&(Q.textContent=Pe),$e=s(v),z=d(v,"P",{"data-svelte-h":!0}),u(z)!=="svelte-nnrwra"&&(z.innerHTML=He),be=s(v),D=d(v,"DIV",{class:!0});var Ne=X(D);g(H.$$.fragment,Ne),Ne.forEach(n),v.forEach(n),ie=s(e),g(N.$$.fragment,e),ae=s(e),V=d(e,"P",{}),X(V).forEach(n),this.h()},h(){J(b,"name","hf:doc:metadata"),J(b,"content",ze),J(A,"class","warning"),J(p,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),J(a,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),J(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),J(c,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){i(document.head,b),l(e,F,t),l(e,R,t),l(e,S,t),h(x,e,t),l(e,Y,t),l(e,w,t),l(e,K,t),h(M,e,t),l(e,O,t),h(C,e,t),l(e,ee,t),l(e,p,t),h(y,p,null),i(p,me),i(p,q),i(p,ce),i(p,A),l(e,te,t),h(k,e,t),l(e,ne,t),l(e,a,t),h(B,a,null),i(a,pe),i(a,E),i(a,ue),i(a,I),i(a,fe),i(a,j),i(a,ge),i(a,U),i(a,he),i(a,Z),i(a,_e),i(a,G),l(e,oe,t),h(L,e,t),l(e,se,t),l(e,c,t),h(P,c,null),i(c,Te),i(c,Q),i(c,$e),i(c,z),i(c,be),i(c,D),h(H,D,null),l(e,ie,t),h(N,e,t),l(e,ae,t),l(e,V,t),re=!0},p:qe,i(e){re||(_(x.$$.fragment,e),_(M.$$.fragment,e),_(C.$$.fragment,e),_(y.$$.fragment,e),_(k.$$.fragment,e),_(B.$$.fragment,e),_(L.$$.fragment,e),_(P.$$.fragment,e),_(H.$$.fragment,e),_(N.$$.fragment,e),re=!0)},o(e){T(x.$$.fragment,e),T(M.$$.fragment,e),T(C.$$.fragment,e),T(y.$$.fragment,e),T(k.$$.fragment,e),T(B.$$.fragment,e),T(L.$$.fragment,e),T(P.$$.fragment,e),T(H.$$.fragment,e),T(N.$$.fragment,e),re=!1},d(e){e&&(n(F),n(R),n(S),n(Y),n(w),n(K),n(O),n(ee),n(p),n(te),n(ne),n(a),n(oe),n(se),n(c),n(ie),n(ae),n(V)),n(b),$(x,e),$(M,e),$(C,e),$(y),$(k,e),$(B),$(L,e),$(P),$(H),$(N,e)}}}const ze='{"title":"Anima","local":"anima","sections":[{"title":"AnimaModularPipeline","local":"diffusers.AnimaModularPipeline","sections":[],"depth":2},{"title":"AnimaAutoBlocks","local":"diffusers.AnimaAutoBlocks","sections":[],"depth":2},{"title":"AnimaTextConditioner","local":"diffusers.AnimaTextConditioner","sections":[],"depth":2}],"depth":1}';function De(ve){return Ee(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Se extends Ie{constructor(b){super(),je(this,b,De,Qe,Je,{})}}export{Se as component}; | |
Xet Storage Details
- Size:
- 12.9 kB
- Xet hash:
- fafe84768f7feb709ad0e1d80bb5ac7a310df991acbc8236e354f0a4f741a347
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.