Buckets:
| import{s as ut,o as ft,n as je}from"../chunks/scheduler.8c3d61f6.js";import{S as _t,i as ht,g as l,s as i,r as u,A as bt,h as p,f as n,c as s,j as A,u as f,x as g,k as C,y as c,a,v as _,d as h,t as b,w as v}from"../chunks/index.da70eac4.js";import{T as ct}from"../chunks/Tip.1d9b8c37.js";import{D as ie}from"../chunks/Docstring.6b390b9a.js";import{C as gt}from"../chunks/CodeBlock.00a903b3.js";import{E as mt}from"../chunks/ExampleCodeBlock.db12be95.js";import{H as qe,E as vt}from"../chunks/EditOnGithub.1e64e623.js";function wt(M){let o,x='Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers.md">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading.md#reuse-a-pipeline">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.';return{c(){o=l("p"),o.innerHTML=x},l(d){o=p(d,"P",{"data-svelte-h":!0}),g(o)!=="svelte-w7r39y"&&(o.innerHTML=x)},m(d,m){a(d,o,m)},p:je,d(d){d&&n(o)}}}function xt(M){let o,x='Make sure to pass the <code>variant</code> argument for downloaded checkpoints to use lower disk space. Set it to <code>"fp16"</code> for models with recommended dtype as <code>torch.float16</code>, and <code>"bf16"</code> for models with recommended dtype as <code>torch.bfloat16</code>. By default, <code>torch.float32</code> weights are downloaded, which use twice the amount of disk storage. Additionally, <code>torch.float32</code> weights can be downcasted on-the-fly by specifying the <code>torch_dtype</code> argument. Read about it in the <a href="https://huggingface.co/docs/diffusers/v0.31.0/en/api/pipelines/overview#diffusers.DiffusionPipeline.from_pretrained" rel="nofollow">docs</a>.';return{c(){o=l("p"),o.innerHTML=x},l(d){o=p(d,"P",{"data-svelte-h":!0}),g(o)!=="svelte-1k3bkk9"&&(o.innerHTML=x)},m(d,m){a(d,o,m)},p:je,d(d){d&&n(o)}}}function yt(M){let o,x="Examples:",d,m,w;return m=new gt({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU2FuYVBpcGVsaW5lJTBBJTBBcGlwZSUyMCUzRCUyMFNhbmFQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyRWZmaWNpZW50LUxhcmdlLU1vZGVsJTJGU2FuYV8xNjAwTV8xMDI0cHhfZGlmZnVzZXJzJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDMyJTBBKSUwQXBpcGUudG8oJTIyY3VkYSUyMiklMEFwaXBlLnRleHRfZW5jb2Rlci50byh0b3JjaC5iZmxvYXQxNiklMEFwaXBlLnRyYW5zZm9ybWVyJTIwJTNEJTIwcGlwZS50cmFuc2Zvcm1lci50byh0b3JjaC5mbG9hdDE2KSUwQSUwQWltYWdlJTIwJTNEJTIwcGlwZShwcm9tcHQlM0QnYSUyMGN5YmVycHVuayUyMGNhdCUyMHdpdGglMjBhJTIwbmVvbiUyMHNpZ24lMjB0aGF0JTIwc2F5cyUyMCUyMlNhbmElMjInKSU1QjAlNUQlMEFpbWFnZSU1QjAlNUQuc2F2ZSglMjJvdXRwdXQucG5nJTIyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> SanaPipeline | |
| <span class="hljs-meta">>>> </span>pipe = SanaPipeline.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Efficient-Large-Model/Sana_1600M_1024px_diffusers"</span>, torch_dtype=torch.float32 | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span>pipe.text_encoder.to(torch.bfloat16) | |
| <span class="hljs-meta">>>> </span>pipe.transformer = pipe.transformer.to(torch.float16) | |
| <span class="hljs-meta">>>> </span>image = pipe(prompt=<span class="hljs-string">'a cyberpunk cat with a neon sign that says "Sana"'</span>)[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>image[<span class="hljs-number">0</span>].save(<span class="hljs-string">"output.png"</span>)`,wrap:!1}}),{c(){o=l("p"),o.textContent=x,d=i(),u(m.$$.fragment)},l(r){o=p(r,"P",{"data-svelte-h":!0}),g(o)!=="svelte-kvfsh7"&&(o.textContent=x),d=s(r),f(m.$$.fragment,r)},m(r,y){a(r,o,y),a(r,d,y),_(m,r,y),w=!0},p:je,i(r){w||(h(m.$$.fragment,r),w=!0)},o(r){b(m.$$.fragment,r),w=!1},d(r){r&&(n(o),n(d)),v(m,r)}}}function Pt(M){let o,x="Examples:",d,m,w;return m=new gt({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU2FuYVBBR1BpcGVsaW5lJTBBJTBBcGlwZSUyMCUzRCUyMFNhbmFQQUdQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyRWZmaWNpZW50LUxhcmdlLU1vZGVsJTJGU2FuYV8xNjAwTV8xMDI0cHhfZGlmZnVzZXJzJTIyJTJDJTBBJTIwJTIwJTIwJTIwcGFnX2FwcGxpZWRfbGF5ZXJzJTNEJTVCJTIydHJhbnNmb3JtZXJfYmxvY2tzLjglMjIlNUQlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MzIlMkMlMEEpJTBBcGlwZS50byglMjJjdWRhJTIyKSUwQXBpcGUudGV4dF9lbmNvZGVyLnRvKHRvcmNoLmJmbG9hdDE2KSUwQXBpcGUudHJhbnNmb3JtZXIlMjAlM0QlMjBwaXBlLnRyYW5zZm9ybWVyLnRvKHRvcmNoLmZsb2F0MTYpJTBBJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKHByb21wdCUzRCdhJTIwY3liZXJwdW5rJTIwY2F0JTIwd2l0aCUyMGElMjBuZW9uJTIwc2lnbiUyMHRoYXQlMjBzYXlzJTIwJTIyU2FuYSUyMicpJTVCMCU1RCUwQWltYWdlJTVCMCU1RC5zYXZlKCUyMm91dHB1dC5wbmclMjIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> SanaPAGPipeline | |
| <span class="hljs-meta">>>> </span>pipe = SanaPAGPipeline.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Efficient-Large-Model/Sana_1600M_1024px_diffusers"</span>, | |
| <span class="hljs-meta">... </span> pag_applied_layers=[<span class="hljs-string">"transformer_blocks.8"</span>], | |
| <span class="hljs-meta">... </span> torch_dtype=torch.float32, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span>pipe.text_encoder.to(torch.bfloat16) | |
| <span class="hljs-meta">>>> </span>pipe.transformer = pipe.transformer.to(torch.float16) | |
| <span class="hljs-meta">>>> </span>image = pipe(prompt=<span class="hljs-string">'a cyberpunk cat with a neon sign that says "Sana"'</span>)[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>image[<span class="hljs-number">0</span>].save(<span class="hljs-string">"output.png"</span>)`,wrap:!1}}),{c(){o=l("p"),o.textContent=x,d=i(),u(m.$$.fragment)},l(r){o=p(r,"P",{"data-svelte-h":!0}),g(o)!=="svelte-kvfsh7"&&(o.textContent=x),d=s(r),f(m.$$.fragment,r)},m(r,y){a(r,o,y),a(r,d,y),_(m,r,y),w=!0},p:je,i(r){w||(h(m.$$.fragment,r),w=!0)},o(r){b(m.$$.fragment,r),w=!1},d(r){r&&(n(o),n(d)),v(m,r)}}}function Tt(M){let o,x,d,m,w,r,y,Ye='<a href="https://huggingface.co/papers/2410.10629" rel="nofollow">SANA: Efficient High-Resolution Image Synthesis with Linear Diffusion Transformers</a> from NVIDIA and MIT HAN Lab, by Enze Xie, Junsong Chen, Junyu Chen, Han Cai, Haotian Tang, Yujun Lin, Zhekai Zhang, Muyang Li, Ligeng Zhu, Yao Lu, Song Han.',fe,B,Xe="The abstract from the paper is:",_e,D,Qe="<em>We introduce Sana, a text-to-image framework that can efficiently generate images up to 4096×4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU. Core designs include: (1) Deep compression autoencoder: unlike traditional AEs, which compress images only 8×, we trained an AE that can compress images 32×, effectively reducing the number of latent tokens. (2) Linear DiT: we replace all vanilla attention in DiT with linear attention, which is more efficient at high resolutions without sacrificing quality. (3) Decoder-only text encoder: we replaced T5 with modern decoder-only small LLM as the text encoder and designed complex human instruction with in-context learning to enhance the image-text alignment. (4) Efficient training and sampling: we propose Flow-DPM-Solver to reduce sampling steps, with efficient caption labeling and selection to accelerate convergence. As a result, Sana-0.6B is very competitive with modern giant diffusion model (e.g. Flux-12B), being 20 times smaller and 100+ times faster in measured throughput. Moreover, Sana-0.6B can be deployed on a 16GB laptop GPU, taking less than 1 second to generate a 1024×1024 resolution image. Sana enables content creation at low cost. Code and model will be publicly released.</em>",he,J,be,O,Ke='This pipeline was contributed by <a href="https://github.com/lawrence-cj" rel="nofollow">lawrence-cj</a> and <a href="https://github.com/chenjy2003" rel="nofollow">chenjy2003</a>. The original codebase can be found <a href="https://github.com/NVlabs/Sana" rel="nofollow">here</a>. The original weights can be found under <a href="https://huggingface.co/Efficient-Large-Model" rel="nofollow">hf.co/Efficient-Large-Model</a>.',ve,Z,et="Available models:",we,F,tt='<thead><tr><th align="center">Model</th> <th align="center">Recommended dtype</th></tr></thead> <tbody><tr><td align="center"><a href="https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px_diffusers" rel="nofollow"><code>Efficient-Large-Model/Sana_1600M_1024px_diffusers</code></a></td> <td align="center"><code>torch.float16</code></td></tr> <tr><td align="center"><a href="https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px_MultiLing_diffusers" rel="nofollow"><code>Efficient-Large-Model/Sana_1600M_1024px_MultiLing_diffusers</code></a></td> <td align="center"><code>torch.float16</code></td></tr> <tr><td align="center"><a href="https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers" rel="nofollow"><code>Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers</code></a></td> <td align="center"><code>torch.bfloat16</code></td></tr> <tr><td align="center"><a href="https://huggingface.co/Efficient-Large-Model/Sana_1600M_512px_diffusers" rel="nofollow"><code>Efficient-Large-Model/Sana_1600M_512px_diffusers</code></a></td> <td align="center"><code>torch.float16</code></td></tr> <tr><td align="center"><a href="https://huggingface.co/Efficient-Large-Model/Sana_1600M_512px_MultiLing_diffusers" rel="nofollow"><code>Efficient-Large-Model/Sana_1600M_512px_MultiLing_diffusers</code></a></td> <td align="center"><code>torch.float16</code></td></tr> <tr><td align="center"><a href="https://huggingface.co/Efficient-Large-Model/Sana_600M_1024px_diffusers" rel="nofollow"><code>Efficient-Large-Model/Sana_600M_1024px_diffusers</code></a></td> <td align="center"><code>torch.float16</code></td></tr> <tr><td align="center"><a href="https://huggingface.co/Efficient-Large-Model/Sana_600M_512px_diffusers" rel="nofollow"><code>Efficient-Large-Model/Sana_600M_512px_diffusers</code></a></td> <td align="center"><code>torch.float16</code></td></tr></tbody>',xe,H,nt='Refer to <a href="https://huggingface.co/collections/Efficient-Large-Model/sana-673efba2a57ed99843f11f9e" rel="nofollow">this</a> collection for more information.',ye,V,ot="Note: The recommended dtype mentioned is for the transformer weights. The text encoder and VAE weights must stay in <code>torch.bfloat16</code> or <code>torch.float32</code> for the model to work correctly. Please refer to the inference example below to see how to load the model with the recommended dtype.",Pe,q,Te,z,Se,P,R,Ee,se,at='Pipeline for text-to-image generation using <a href="https://huggingface.co/papers/2410.10629" rel="nofollow">Sana</a>.',Ne,k,W,Ue,re,it="Function invoked when calling the pipeline for generation.",Be,j,De,E,Y,Oe,le,st="Encodes the prompt into text encoder hidden states.",$e,X,Me,T,Q,Ze,pe,rt=`Pipeline for text-to-image generation using <a href="https://huggingface.co/papers/2410.10629" rel="nofollow">Sana</a>. This pipeline | |
| supports the use of <a href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/pag" rel="nofollow">Perturbed Attention Guidance | |
| (PAG)</a>.`,Fe,I,K,He,de,lt="Function invoked when calling the pipeline for generation.",Ve,N,ze,U,ee,Re,ce,pt="Encodes the prompt into text encoder hidden states.",ke,te,Ie,L,ne,We,me,dt="Output class for Sana pipelines.",Le,oe,Ge,ue,Ae;return w=new qe({props:{title:"SanaPipeline",local:"sanapipeline",headingTag:"h1"}}),J=new ct({props:{$$slots:{default:[wt]},$$scope:{ctx:M}}}),q=new ct({props:{$$slots:{default:[xt]},$$scope:{ctx:M}}}),z=new qe({props:{title:"SanaPipeline",local:"diffusers.SanaPipeline",headingTag:"h2"}}),R=new ie({props:{name:"class diffusers.SanaPipeline",anchor:"diffusers.SanaPipeline",parameters:[{name:"tokenizer",val:": AutoTokenizer"},{name:"text_encoder",val:": AutoModelForCausalLM"},{name:"vae",val:": AutoencoderDC"},{name:"transformer",val:": SanaTransformer2DModel"},{name:"scheduler",val:": DPMSolverMultistepScheduler"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/sana/pipeline_sana.py#L137"}}),W=new ie({props:{name:"__call__",anchor:"diffusers.SanaPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"negative_prompt",val:": str = ''"},{name:"num_inference_steps",val:": int = 20"},{name:"timesteps",val:": typing.List[int] = None"},{name:"sigmas",val:": typing.List[float] = None"},{name:"guidance_scale",val:": float = 4.5"},{name:"num_images_per_prompt",val:": typing.Optional[int] = 1"},{name:"height",val:": int = 1024"},{name:"width",val:": int = 1024"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"clean_caption",val:": bool = True"},{name:"use_resolution_binning",val:": bool = True"},{name:"attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int, typing.Dict], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"max_sequence_length",val:": int = 300"},{name:"complex_human_instruction",val:`: typing.List[str] = ["Given a user prompt, generate an 'Enhanced prompt' that provides detailed visual descriptions suitable for image generation. Evaluate the level of detail in the user prompt:", '- If the prompt is simple, focus on adding specifics about colors, shapes, sizes, textures, and spatial relationships to create vivid and concrete scenes.', '- If the prompt is already detailed, refine and enhance the existing details slightly without overcomplicating.', 'Here are examples of how to transform or refine prompts:', '- User Prompt: A cat sleeping -> Enhanced: A small, fluffy white cat curled up in a round shape, sleeping peacefully on a warm sunny windowsill, surrounded by pots of blooming red flowers.', '- User Prompt: A busy city street -> Enhanced: A bustling city street scene at dusk, featuring glowing street lamps, a diverse crowd of people in colorful clothing, and a double-decker bus passing by towering glass skyscrapers.', 'Please generate only the enhanced description for the prompt below and avoid including any additional commentary or evaluations:', 'User Prompt: ']`}],parametersDescription:[{anchor:"diffusers.SanaPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.SanaPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.SanaPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 20) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.SanaPipeline.__call__.timesteps",description:`<strong>timesteps</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Custom timesteps to use for the denoising process with schedulers which support a <code>timesteps</code> argument | |
| in their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is | |
| passed will be used. Must be in descending order.`,name:"timesteps"},{anchor:"diffusers.SanaPipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) — | |
| Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in | |
| their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed | |
| will be used.`,name:"sigmas"},{anchor:"diffusers.SanaPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 4.5) — | |
| Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>. | |
| <code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen | |
| Paper</a>. Guidance scale is enabled by setting <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to the text <code>prompt</code>, | |
| usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.SanaPipeline.__call__.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of images to generate per prompt.`,name:"num_images_per_prompt"},{anchor:"diffusers.SanaPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.SanaPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.SanaPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) in the DDIM paper: <a href="https://arxiv.org/abs/2010.02502" rel="nofollow">https://arxiv.org/abs/2010.02502</a>. Only applies to | |
| <a href="/docs/diffusers/pr_10312/en/api/schedulers/ddim#diffusers.DDIMScheduler">schedulers.DDIMScheduler</a>, will be ignored for others.`,name:"eta"},{anchor:"diffusers.SanaPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.SanaPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will ge generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.SanaPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.SanaPipeline.__call__.prompt_attention_mask",description:"<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — Pre-generated attention mask for text embeddings.",name:"prompt_attention_mask"},{anchor:"diffusers.SanaPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. For PixArt-Sigma this negative prompt should be "". If not | |
| provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.SanaPipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for negative text embeddings.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.SanaPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.SanaPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.stable_diffusion.IFPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.SanaPipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.SanaPipeline.__call__.clean_caption",description:`<strong>clean_caption</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to clean the caption before creating embeddings. Requires <code>beautifulsoup4</code> and <code>ftfy</code> to | |
| be installed. If the dependencies are not installed, the embeddings will be created from the raw | |
| prompt.`,name:"clean_caption"},{anchor:"diffusers.SanaPipeline.__call__.use_resolution_binning",description:`<strong>use_resolution_binning</strong> (<code>bool</code> defaults to <code>True</code>) — | |
| If set to <code>True</code>, the requested height and width are first mapped to the closest resolutions using | |
| <code>ASPECT_RATIO_1024_BIN</code>. After the produced latents are decoded into images, they are resized back to | |
| the requested resolution. Useful for generating non-square images.`,name:"use_resolution_binning"},{anchor:"diffusers.SanaPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls at the end of each denoising steps during the inference. The function is called | |
| with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by | |
| <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.SanaPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) — | |
| The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list | |
| will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the | |
| <code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.SanaPipeline.__call__.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code> defaults to <code>300</code>) — | |
| Maximum sequence length to use with the <code>prompt</code>.`,name:"max_sequence_length"},{anchor:"diffusers.SanaPipeline.__call__.complex_human_instruction",description:`<strong>complex_human_instruction</strong> (<code>List[str]</code>, <em>optional</em>) — | |
| Instructions for complex human attention: | |
| <a href="https://github.com/NVlabs/Sana/blob/main/configs/sana_app_config/Sana_1600M_app.yaml#L55" rel="nofollow">https://github.com/NVlabs/Sana/blob/main/configs/sana_app_config/Sana_1600M_app.yaml#L55</a>.`,name:"complex_human_instruction"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/sana/pipeline_sana.py#L592",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <a | |
| href="/docs/diffusers/pr_10312/en/api/pipelines/sana#diffusers.pipelines.sana.pipeline_output.SanaPipelineOutput" | |
| >SanaPipelineOutput</a> is returned, | |
| otherwise a <code>tuple</code> is returned where the first element is a list with the generated images</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/pr_10312/en/api/pipelines/sana#diffusers.pipelines.sana.pipeline_output.SanaPipelineOutput" | |
| >SanaPipelineOutput</a> or <code>tuple</code></p> | |
| `}}),j=new mt({props:{anchor:"diffusers.SanaPipeline.__call__.example",$$slots:{default:[yt]},$$scope:{ctx:M}}}),Y=new ie({props:{name:"encode_prompt",anchor:"diffusers.SanaPipeline.encode_prompt",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"negative_prompt",val:": str = ''"},{name:"num_images_per_prompt",val:": int = 1"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"clean_caption",val:": bool = False"},{name:"max_sequence_length",val:": int = 300"},{name:"complex_human_instruction",val:": typing.Optional[typing.List[str]] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"}],parametersDescription:[{anchor:"diffusers.SanaPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.SanaPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt not to guide the image generation. If not defined, one has to pass <code>negative_prompt_embeds</code> | |
| instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is less than <code>1</code>). For | |
| PixArt-Alpha, this should be "".`,name:"negative_prompt"},{anchor:"diffusers.SanaPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.SanaPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.SanaPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>): | |
| torch device to place the resulting embeddings on`,name:"device"},{anchor:"diffusers.SanaPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.SanaPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. For Sana, it’s should be the embeddings of the "" string.`,name:"negative_prompt_embeds"},{anchor:"diffusers.SanaPipeline.encode_prompt.clean_caption",description:`<strong>clean_caption</strong> (<code>bool</code>, defaults to <code>False</code>) — | |
| If <code>True</code>, the function will preprocess and clean the provided caption before encoding.`,name:"clean_caption"},{anchor:"diffusers.SanaPipeline.encode_prompt.max_sequence_length",description:"<strong>max_sequence_length</strong> (<code>int</code>, defaults to 300) — Maximum sequence length to use for the prompt.",name:"max_sequence_length"},{anchor:"diffusers.SanaPipeline.encode_prompt.complex_human_instruction",description:`<strong>complex_human_instruction</strong> (<code>list[str]</code>, defaults to <code>complex_human_instruction</code>) — | |
| If <code>complex_human_instruction</code> is not empty, the function will use the complex Human instruction for | |
| the prompt.`,name:"complex_human_instruction"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/sana/pipeline_sana.py#L170"}}),X=new qe({props:{title:"SanaPAGPipeline",local:"diffusers.SanaPAGPipeline",headingTag:"h2"}}),Q=new ie({props:{name:"class diffusers.SanaPAGPipeline",anchor:"diffusers.SanaPAGPipeline",parameters:[{name:"tokenizer",val:": AutoTokenizer"},{name:"text_encoder",val:": AutoModelForCausalLM"},{name:"vae",val:": AutoencoderDC"},{name:"transformer",val:": SanaTransformer2DModel"},{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"pag_applied_layers",val:": typing.Union[str, typing.List[str]] = 'transformer_blocks.0'"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/pag/pipeline_pag_sana.py#L136"}}),K=new ie({props:{name:"__call__",anchor:"diffusers.SanaPAGPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"negative_prompt",val:": str = ''"},{name:"num_inference_steps",val:": int = 20"},{name:"timesteps",val:": typing.List[int] = None"},{name:"sigmas",val:": typing.List[float] = None"},{name:"guidance_scale",val:": float = 4.5"},{name:"num_images_per_prompt",val:": typing.Optional[int] = 1"},{name:"height",val:": int = 1024"},{name:"width",val:": int = 1024"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"clean_caption",val:": bool = True"},{name:"use_resolution_binning",val:": bool = True"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int, typing.Dict], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"max_sequence_length",val:": int = 300"},{name:"complex_human_instruction",val:`: typing.List[str] = ["Given a user prompt, generate an 'Enhanced prompt' that provides detailed visual descriptions suitable for image generation. Evaluate the level of detail in the user prompt:", '- If the prompt is simple, focus on adding specifics about colors, shapes, sizes, textures, and spatial relationships to create vivid and concrete scenes.', '- If the prompt is already detailed, refine and enhance the existing details slightly without overcomplicating.', 'Here are examples of how to transform or refine prompts:', '- User Prompt: A cat sleeping -> Enhanced: A small, fluffy white cat curled up in a round shape, sleeping peacefully on a warm sunny windowsill, surrounded by pots of blooming red flowers.', '- User Prompt: A busy city street -> Enhanced: A bustling city street scene at dusk, featuring glowing street lamps, a diverse crowd of people in colorful clothing, and a double-decker bus passing by towering glass skyscrapers.', 'Please generate only the enhanced description for the prompt below and avoid including any additional commentary or evaluations:', 'User Prompt: ']`},{name:"pag_scale",val:": float = 3.0"},{name:"pag_adaptive_scale",val:": float = 0.0"}],parametersDescription:[{anchor:"diffusers.SanaPAGPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.SanaPAGPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.SanaPAGPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 20) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.SanaPAGPipeline.__call__.timesteps",description:`<strong>timesteps</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Custom timesteps to use for the denoising process with schedulers which support a <code>timesteps</code> argument | |
| in their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is | |
| passed will be used. Must be in descending order.`,name:"timesteps"},{anchor:"diffusers.SanaPAGPipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) — | |
| Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in | |
| their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed | |
| will be used.`,name:"sigmas"},{anchor:"diffusers.SanaPAGPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 4.5) — | |
| Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>. | |
| <code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen | |
| Paper</a>. Guidance scale is enabled by setting <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to the text <code>prompt</code>, | |
| usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.SanaPAGPipeline.__call__.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of images to generate per prompt.`,name:"num_images_per_prompt"},{anchor:"diffusers.SanaPAGPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.SanaPAGPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.SanaPAGPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) in the DDIM paper: <a href="https://arxiv.org/abs/2010.02502" rel="nofollow">https://arxiv.org/abs/2010.02502</a>. Only applies to | |
| <a href="/docs/diffusers/pr_10312/en/api/schedulers/ddim#diffusers.DDIMScheduler">schedulers.DDIMScheduler</a>, will be ignored for others.`,name:"eta"},{anchor:"diffusers.SanaPAGPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.SanaPAGPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will ge generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.SanaPAGPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.SanaPAGPipeline.__call__.prompt_attention_mask",description:"<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — Pre-generated attention mask for text embeddings.",name:"prompt_attention_mask"},{anchor:"diffusers.SanaPAGPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. For PixArt-Sigma this negative prompt should be "". If not | |
| provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.SanaPAGPipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for negative text embeddings.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.SanaPAGPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.SanaPAGPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.stable_diffusion.IFPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.SanaPAGPipeline.__call__.clean_caption",description:`<strong>clean_caption</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to clean the caption before creating embeddings. Requires <code>beautifulsoup4</code> and <code>ftfy</code> to | |
| be installed. If the dependencies are not installed, the embeddings will be created from the raw | |
| prompt.`,name:"clean_caption"},{anchor:"diffusers.SanaPAGPipeline.__call__.use_resolution_binning",description:`<strong>use_resolution_binning</strong> (<code>bool</code> defaults to <code>True</code>) — | |
| If set to <code>True</code>, the requested height and width are first mapped to the closest resolutions using | |
| <code>ASPECT_RATIO_1024_BIN</code>. After the produced latents are decoded into images, they are resized back to | |
| the requested resolution. Useful for generating non-square images.`,name:"use_resolution_binning"},{anchor:"diffusers.SanaPAGPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls at the end of each denoising steps during the inference. The function is called | |
| with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by | |
| <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.SanaPAGPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) — | |
| The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list | |
| will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the | |
| <code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.SanaPAGPipeline.__call__.max_sequence_length",description:"<strong>max_sequence_length</strong> (<code>int</code> defaults to 300) — Maximum sequence length to use with the <code>prompt</code>.",name:"max_sequence_length"},{anchor:"diffusers.SanaPAGPipeline.__call__.complex_human_instruction",description:`<strong>complex_human_instruction</strong> (<code>List[str]</code>, <em>optional</em>) — | |
| Instructions for complex human attention: | |
| <a href="https://github.com/NVlabs/Sana/blob/main/configs/sana_app_config/Sana_1600M_app.yaml#L55" rel="nofollow">https://github.com/NVlabs/Sana/blob/main/configs/sana_app_config/Sana_1600M_app.yaml#L55</a>.`,name:"complex_human_instruction"},{anchor:"diffusers.SanaPAGPipeline.__call__.pag_scale",description:`<strong>pag_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 3.0) — | |
| The scale factor for the perturbed attention guidance. If it is set to 0.0, the perturbed attention | |
| guidance will not be used.`,name:"pag_scale"},{anchor:"diffusers.SanaPAGPipeline.__call__.pag_adaptive_scale",description:`<strong>pag_adaptive_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| The adaptive scale factor for the perturbed attention guidance. If it is set to 0.0, <code>pag_scale</code> is | |
| used.`,name:"pag_adaptive_scale"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/pag/pipeline_pag_sana.py#L578",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <a | |
| href="/docs/diffusers/pr_10312/en/api/pipelines/latent_diffusion#diffusers.ImagePipelineOutput" | |
| >ImagePipelineOutput</a> is returned, otherwise a <code>tuple</code> is | |
| returned where the first element is a list with the generated images</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/pr_10312/en/api/pipelines/latent_diffusion#diffusers.ImagePipelineOutput" | |
| >ImagePipelineOutput</a> or <code>tuple</code></p> | |
| `}}),N=new mt({props:{anchor:"diffusers.SanaPAGPipeline.__call__.example",$$slots:{default:[Pt]},$$scope:{ctx:M}}}),ee=new ie({props:{name:"encode_prompt",anchor:"diffusers.SanaPAGPipeline.encode_prompt",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"negative_prompt",val:": str = ''"},{name:"num_images_per_prompt",val:": int = 1"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"clean_caption",val:": bool = False"},{name:"max_sequence_length",val:": int = 300"},{name:"complex_human_instruction",val:": typing.Optional[typing.List[str]] = None"}],parametersDescription:[{anchor:"diffusers.SanaPAGPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.SanaPAGPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt not to guide the image generation. If not defined, one has to pass <code>negative_prompt_embeds</code> | |
| instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is less than <code>1</code>). For | |
| PixArt-Alpha, this should be "".`,name:"negative_prompt"},{anchor:"diffusers.SanaPAGPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.SanaPAGPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.SanaPAGPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>): | |
| torch device to place the resulting embeddings on`,name:"device"},{anchor:"diffusers.SanaPAGPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.SanaPAGPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. For Sana, it’s should be the embeddings of the "" string.`,name:"negative_prompt_embeds"},{anchor:"diffusers.SanaPAGPipeline.encode_prompt.clean_caption",description:`<strong>clean_caption</strong> (<code>bool</code>, defaults to <code>False</code>) — | |
| If <code>True</code>, the function will preprocess and clean the provided caption before encoding.`,name:"clean_caption"},{anchor:"diffusers.SanaPAGPipeline.encode_prompt.max_sequence_length",description:"<strong>max_sequence_length</strong> (<code>int</code>, defaults to 300) — Maximum sequence length to use for the prompt.",name:"max_sequence_length"},{anchor:"diffusers.SanaPAGPipeline.encode_prompt.complex_human_instruction",description:`<strong>complex_human_instruction</strong> (<code>list[str]</code>, defaults to <code>complex_human_instruction</code>) — | |
| If <code>complex_human_instruction</code> is not empty, the function will use the complex Human instruction for | |
| the prompt.`,name:"complex_human_instruction"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/pag/pipeline_pag_sana.py#L173"}}),te=new qe({props:{title:"SanaPipelineOutput",local:"diffusers.pipelines.sana.pipeline_output.SanaPipelineOutput",headingTag:"h2"}}),ne=new ie({props:{name:"class diffusers.pipelines.sana.pipeline_output.SanaPipelineOutput",anchor:"diffusers.pipelines.sana.pipeline_output.SanaPipelineOutput",parameters:[{name:"images",val:": typing.Union[typing.List[PIL.Image.Image], numpy.ndarray]"}],parametersDescription:[{anchor:"diffusers.pipelines.sana.pipeline_output.SanaPipelineOutput.images",description:`<strong>images</strong> (<code>List[PIL.Image.Image]</code> or <code>np.ndarray</code>) — | |
| List of denoised PIL images of length <code>batch_size</code> or numpy array of shape <code>(batch_size, height, width, num_channels)</code>. PIL images or numpy array present the denoised images of the diffusion pipeline.`,name:"images"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/sana/pipeline_output.py#L10"}}),oe=new vt({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/sana.md"}}),{c(){o=l("meta"),x=i(),d=l("p"),m=i(),u(w.$$.fragment),r=i(),y=l("p"),y.innerHTML=Ye,fe=i(),B=l("p"),B.textContent=Xe,_e=i(),D=l("p"),D.innerHTML=Qe,he=i(),u(J.$$.fragment),be=i(),O=l("p"),O.innerHTML=Ke,ve=i(),Z=l("p"),Z.textContent=et,we=i(),F=l("table"),F.innerHTML=tt,xe=i(),H=l("p"),H.innerHTML=nt,ye=i(),V=l("p"),V.innerHTML=ot,Pe=i(),u(q.$$.fragment),Te=i(),u(z.$$.fragment),Se=i(),P=l("div"),u(R.$$.fragment),Ee=i(),se=l("p"),se.innerHTML=at,Ne=i(),k=l("div"),u(W.$$.fragment),Ue=i(),re=l("p"),re.textContent=it,Be=i(),u(j.$$.fragment),De=i(),E=l("div"),u(Y.$$.fragment),Oe=i(),le=l("p"),le.textContent=st,$e=i(),u(X.$$.fragment),Me=i(),T=l("div"),u(Q.$$.fragment),Ze=i(),pe=l("p"),pe.innerHTML=rt,Fe=i(),I=l("div"),u(K.$$.fragment),He=i(),de=l("p"),de.textContent=lt,Ve=i(),u(N.$$.fragment),ze=i(),U=l("div"),u(ee.$$.fragment),Re=i(),ce=l("p"),ce.textContent=pt,ke=i(),u(te.$$.fragment),Ie=i(),L=l("div"),u(ne.$$.fragment),We=i(),me=l("p"),me.textContent=dt,Le=i(),u(oe.$$.fragment),Ge=i(),ue=l("p"),this.h()},l(e){const t=bt("svelte-u9bgzb",document.head);o=p(t,"META",{name:!0,content:!0}),t.forEach(n),x=s(e),d=p(e,"P",{}),A(d).forEach(n),m=s(e),f(w.$$.fragment,e),r=s(e),y=p(e,"P",{"data-svelte-h":!0}),g(y)!=="svelte-h8aibn"&&(y.innerHTML=Ye),fe=s(e),B=p(e,"P",{"data-svelte-h":!0}),g(B)!=="svelte-1cwsb16"&&(B.textContent=Xe),_e=s(e),D=p(e,"P",{"data-svelte-h":!0}),g(D)!=="svelte-1uh3w7a"&&(D.innerHTML=Qe),he=s(e),f(J.$$.fragment,e),be=s(e),O=p(e,"P",{"data-svelte-h":!0}),g(O)!=="svelte-1eg28j8"&&(O.innerHTML=Ke),ve=s(e),Z=p(e,"P",{"data-svelte-h":!0}),g(Z)!=="svelte-1bob28v"&&(Z.textContent=et),we=s(e),F=p(e,"TABLE",{"data-svelte-h":!0}),g(F)!=="svelte-15iaehi"&&(F.innerHTML=tt),xe=s(e),H=p(e,"P",{"data-svelte-h":!0}),g(H)!=="svelte-1swlfqd"&&(H.innerHTML=nt),ye=s(e),V=p(e,"P",{"data-svelte-h":!0}),g(V)!=="svelte-okwwje"&&(V.innerHTML=ot),Pe=s(e),f(q.$$.fragment,e),Te=s(e),f(z.$$.fragment,e),Se=s(e),P=p(e,"DIV",{class:!0});var S=A(P);f(R.$$.fragment,S),Ee=s(S),se=p(S,"P",{"data-svelte-h":!0}),g(se)!=="svelte-1ot17tf"&&(se.innerHTML=at),Ne=s(S),k=p(S,"DIV",{class:!0});var G=A(k);f(W.$$.fragment,G),Ue=s(G),re=p(G,"P",{"data-svelte-h":!0}),g(re)!=="svelte-v78lg8"&&(re.textContent=it),Be=s(G),f(j.$$.fragment,G),G.forEach(n),De=s(S),E=p(S,"DIV",{class:!0});var ae=A(E);f(Y.$$.fragment,ae),Oe=s(ae),le=p(ae,"P",{"data-svelte-h":!0}),g(le)!=="svelte-16q0ax1"&&(le.textContent=st),ae.forEach(n),S.forEach(n),$e=s(e),f(X.$$.fragment,e),Me=s(e),T=p(e,"DIV",{class:!0});var $=A(T);f(Q.$$.fragment,$),Ze=s($),pe=p($,"P",{"data-svelte-h":!0}),g(pe)!=="svelte-vrq74s"&&(pe.innerHTML=rt),Fe=s($),I=p($,"DIV",{class:!0});var ge=A(I);f(K.$$.fragment,ge),He=s(ge),de=p(ge,"P",{"data-svelte-h":!0}),g(de)!=="svelte-v78lg8"&&(de.textContent=lt),Ve=s(ge),f(N.$$.fragment,ge),ge.forEach(n),ze=s($),U=p($,"DIV",{class:!0});var Ce=A(U);f(ee.$$.fragment,Ce),Re=s(Ce),ce=p(Ce,"P",{"data-svelte-h":!0}),g(ce)!=="svelte-16q0ax1"&&(ce.textContent=pt),Ce.forEach(n),$.forEach(n),ke=s(e),f(te.$$.fragment,e),Ie=s(e),L=p(e,"DIV",{class:!0});var Je=A(L);f(ne.$$.fragment,Je),We=s(Je),me=p(Je,"P",{"data-svelte-h":!0}),g(me)!=="svelte-1h3n85u"&&(me.textContent=dt),Je.forEach(n),Le=s(e),f(oe.$$.fragment,e),Ge=s(e),ue=p(e,"P",{}),A(ue).forEach(n),this.h()},h(){C(o,"name","hf:doc:metadata"),C(o,"content",St),C(k,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(E,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(I,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){c(document.head,o),a(e,x,t),a(e,d,t),a(e,m,t),_(w,e,t),a(e,r,t),a(e,y,t),a(e,fe,t),a(e,B,t),a(e,_e,t),a(e,D,t),a(e,he,t),_(J,e,t),a(e,be,t),a(e,O,t),a(e,ve,t),a(e,Z,t),a(e,we,t),a(e,F,t),a(e,xe,t),a(e,H,t),a(e,ye,t),a(e,V,t),a(e,Pe,t),_(q,e,t),a(e,Te,t),_(z,e,t),a(e,Se,t),a(e,P,t),_(R,P,null),c(P,Ee),c(P,se),c(P,Ne),c(P,k),_(W,k,null),c(k,Ue),c(k,re),c(k,Be),_(j,k,null),c(P,De),c(P,E),_(Y,E,null),c(E,Oe),c(E,le),a(e,$e,t),_(X,e,t),a(e,Me,t),a(e,T,t),_(Q,T,null),c(T,Ze),c(T,pe),c(T,Fe),c(T,I),_(K,I,null),c(I,He),c(I,de),c(I,Ve),_(N,I,null),c(T,ze),c(T,U),_(ee,U,null),c(U,Re),c(U,ce),a(e,ke,t),_(te,e,t),a(e,Ie,t),a(e,L,t),_(ne,L,null),c(L,We),c(L,me),a(e,Le,t),_(oe,e,t),a(e,Ge,t),a(e,ue,t),Ae=!0},p(e,[t]){const S={};t&2&&(S.$$scope={dirty:t,ctx:e}),J.$set(S);const G={};t&2&&(G.$$scope={dirty:t,ctx:e}),q.$set(G);const ae={};t&2&&(ae.$$scope={dirty:t,ctx:e}),j.$set(ae);const $={};t&2&&($.$$scope={dirty:t,ctx:e}),N.$set($)},i(e){Ae||(h(w.$$.fragment,e),h(J.$$.fragment,e),h(q.$$.fragment,e),h(z.$$.fragment,e),h(R.$$.fragment,e),h(W.$$.fragment,e),h(j.$$.fragment,e),h(Y.$$.fragment,e),h(X.$$.fragment,e),h(Q.$$.fragment,e),h(K.$$.fragment,e),h(N.$$.fragment,e),h(ee.$$.fragment,e),h(te.$$.fragment,e),h(ne.$$.fragment,e),h(oe.$$.fragment,e),Ae=!0)},o(e){b(w.$$.fragment,e),b(J.$$.fragment,e),b(q.$$.fragment,e),b(z.$$.fragment,e),b(R.$$.fragment,e),b(W.$$.fragment,e),b(j.$$.fragment,e),b(Y.$$.fragment,e),b(X.$$.fragment,e),b(Q.$$.fragment,e),b(K.$$.fragment,e),b(N.$$.fragment,e),b(ee.$$.fragment,e),b(te.$$.fragment,e),b(ne.$$.fragment,e),b(oe.$$.fragment,e),Ae=!1},d(e){e&&(n(x),n(d),n(m),n(r),n(y),n(fe),n(B),n(_e),n(D),n(he),n(be),n(O),n(ve),n(Z),n(we),n(F),n(xe),n(H),n(ye),n(V),n(Pe),n(Te),n(Se),n(P),n($e),n(Me),n(T),n(ke),n(Ie),n(L),n(Le),n(Ge),n(ue)),n(o),v(w,e),v(J,e),v(q,e),v(z,e),v(R),v(W),v(j),v(Y),v(X,e),v(Q),v(K),v(N),v(ee),v(te,e),v(ne),v(oe,e)}}}const St='{"title":"SanaPipeline","local":"sanapipeline","sections":[{"title":"SanaPipeline","local":"diffusers.SanaPipeline","sections":[],"depth":2},{"title":"SanaPAGPipeline","local":"diffusers.SanaPAGPipeline","sections":[],"depth":2},{"title":"SanaPipelineOutput","local":"diffusers.pipelines.sana.pipeline_output.SanaPipelineOutput","sections":[],"depth":2}],"depth":1}';function $t(M){return ft(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Jt extends _t{constructor(o){super(),ht(this,o,$t,Tt,ut,{})}}export{Jt as component}; | |
Xet Storage Details
- Size:
- 56.5 kB
- Xet hash:
- f5088e58d3c8d4ae11a5c907f0ca1dd0e02c432c2056412d0626658931d6e8dc
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.