Buckets:
| import{s as st,o as rt,n as ot}from"../chunks/scheduler.8c3d61f6.js";import{S as lt,i as mt,g as l,s as i,r as u,A as pt,h as m,f as n,c as a,j as $,u as g,x as v,k as U,y as o,a as s,v as f,d as h,t as _,w as b}from"../chunks/index.da70eac4.js";import{T as dt}from"../chunks/Tip.1d9b8c37.js";import{D as K}from"../chunks/Docstring.6b390b9a.js";import{C as Xe}from"../chunks/CodeBlock.00a903b3.js";import{E as ct}from"../chunks/ExampleCodeBlock.db12be95.js";import{H as Je,E as ut}from"../chunks/EditOnGithub.1e64e623.js";function gt(ee){let r,w='Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading#reuse-a-pipeline">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.';return{c(){r=l("p"),r.innerHTML=w},l(y){r=m(y,"P",{"data-svelte-h":!0}),v(r)!=="svelte-1qn15hi"&&(r.innerHTML=w)},m(y,T){s(y,r,T)},p:ot,d(y){y&&n(r)}}}function ft(ee){let r,w="Examples:",y,T,M;return T=new Xe({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTHVtaW5hMlRleHQySW1nUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwTHVtaW5hMlRleHQySW1nUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUyMkFscGhhLVZMTE0lMkZMdW1pbmEtSW1hZ2UtMi4wJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEElMjMlMjBFbmFibGUlMjBtZW1vcnklMjBvcHRpbWl6YXRpb25zLiUwQXBpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJVcHBlciUyMGJvZHklMjBvZiUyMGElMjB5b3VuZyUyMHdvbWFuJTIwaW4lMjBhJTIwVmljdG9yaWFuLWVyYSUyMG91dGZpdCUyMHdpdGglMjBicmFzcyUyMGdvZ2dsZXMlMjBhbmQlMjBsZWF0aGVyJTIwc3RyYXBzLiUyMEJhY2tncm91bmQlMjBzaG93cyUyMGFuJTIwaW5kdXN0cmlhbCUyMHJldm9sdXRpb24lMjBjaXR5c2NhcGUlMjB3aXRoJTIwc21va3klMjBza2llcyUyMGFuZCUyMHRhbGwlMkMlMjBtZXRhbCUyMHN0cnVjdHVyZXMlMjIlMEFpbWFnZSUyMCUzRCUyMHBpcGUocHJvbXB0KS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> Lumina2Text2ImgPipeline | |
| <span class="hljs-meta">>>> </span>pipe = Lumina2Text2ImgPipeline.from_pretrained(<span class="hljs-string">"Alpha-VLLM/Lumina-Image-2.0"</span>, torch_dtype=torch.bfloat16) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Enable memory optimizations.</span> | |
| <span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload() | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"Upper body of a young woman in a Victorian-era outfit with brass goggles and leather straps. Background shows an industrial revolution cityscape with smoky skies and tall, metal structures"</span> | |
| <span class="hljs-meta">>>> </span>image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),{c(){r=l("p"),r.textContent=w,y=i(),u(T.$$.fragment)},l(c){r=m(c,"P",{"data-svelte-h":!0}),v(r)!=="svelte-kvfsh7"&&(r.textContent=w),y=a(c),g(T.$$.fragment,c)},m(c,x){s(c,r,x),s(c,y,x),f(T,c,x),M=!0},p:ot,i(c){M||(h(T.$$.fragment,c),M=!0)},o(c){_(T.$$.fragment,c),M=!1},d(c){c&&(n(r),n(y)),b(T,c)}}}function ht(ee){let r,w,y,T,M,c,x,Ye='<a href="https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0" rel="nofollow">Lumina Image 2.0: A Unified and Efficient Image Generative Model</a> is a 2 billion parameter flow-based diffusion transformer capable of generating diverse images from text descriptions.',pe,C,De="The abstract from the paper is:",de,E,ze="<em>We introduce Lumina-Image 2.0, an advanced text-to-image model that surpasses previous state-of-the-art methods across multiple benchmarks, while also shedding light on its potential to evolve into a generalist vision intelligence model. Lumina-Image 2.0 exhibits three key properties: (1) Unification – it adopts a unified architecture that treats text and image tokens as a joint sequence, enabling natural cross-modal interactions and facilitating task expansion. Besides, since high-quality captioners can provide semantically better-aligned text-image training pairs, we introduce a unified captioning system, UniCaptioner, which generates comprehensive and precise captions for the model. This not only accelerates model convergence but also enhances prompt adherence, variable-length prompt handling, and task generalization via prompt templates. (2) Efficiency – to improve the efficiency of the unified architecture, we develop a set of optimization techniques that improve semantic learning and fine-grained texture generation during training while incorporating inference-time acceleration strategies without compromising image quality. (3) Transparency – we open-source all training details, code, and models to ensure full reproducibility, aiming to bridge the gap between well-resourced closed-source research teams and independent developers.</em>",ce,J,ue,V,ge,B,Qe="Single file loading for Lumina Image 2.0 is available for the <code>Lumina2Transformer2DModel</code>",fe,F,he,H,_e,R,Se="GGUF Quantized checkpoints for the <code>Lumina2Transformer2DModel</code> can be loaded via <code>from_single_file</code> with the <code>GGUFQuantizationConfig</code>",be,q,ye,N,ve,p,X,ke,te,Ae="Pipeline for text-to-image generation using Lumina-T2I.",Ze,ne,Oe=`This model inherits from <a href="/docs/diffusers/pr_10727/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods the | |
| library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)`,je,I,Y,Ge,ie,Ke="Function invoked when calling the pipeline for generation.",Pe,k,We,Z,D,Ce,ae,et=`Disable sliced VAE decoding. If <code>enable_vae_slicing</code> was previously enabled, this method will go back to | |
| computing decoding in one step.`,Ee,j,z,Ve,oe,tt=`Disable tiled VAE decoding. If <code>enable_vae_tiling</code> was previously enabled, this method will go back to | |
| computing decoding in one step.`,Be,G,Q,Fe,se,nt=`Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to | |
| compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.`,He,P,S,Re,re,it=`Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to | |
| compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow | |
| processing larger images.`,qe,W,A,Ne,le,at="Encodes the prompt into text encoder hidden states.",Te,O,Me,me,xe;return M=new Je({props:{title:"Lumina2",local:"lumina2",headingTag:"h1"}}),J=new dt({props:{$$slots:{default:[gt]},$$scope:{ctx:ee}}}),V=new Je({props:{title:"Using Single File loading with Lumina Image 2.0",local:"using-single-file-loading-with-lumina-image-20",headingTag:"h2"}}),F=new Xe({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTHVtaW5hMlRyYW5zZm9ybWVyMkRNb2RlbCUyQyUyMEx1bWluYTJUZXh0MkltZ1BpcGVsaW5lJTBBJTBBY2twdF9wYXRoJTIwJTNEJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRkFscGhhLVZMTE0lMkZMdW1pbmEtSW1hZ2UtMi4wJTJGYmxvYiUyRm1haW4lMkZjb25zb2xpZGF0ZWQuMDAtb2YtMDEucHRoJTIyJTBBdHJhbnNmb3JtZXIlMjAlM0QlMjBMdW1pbmEyVHJhbnNmb3JtZXIyRE1vZGVsLmZyb21fc2luZ2xlX2ZpbGUoJTBBJTIwJTIwJTIwJTIwY2twdF9wYXRoJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUwQSklMEElMEFwaXBlJTIwJTNEJTIwTHVtaW5hMlRleHQySW1nUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMkFscGhhLVZMTE0lMkZMdW1pbmEtSW1hZ2UtMi4wJTIyJTJDJTIwdHJhbnNmb3JtZXIlM0R0cmFuc2Zvcm1lciUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQWltYWdlJTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjAlMjJhJTIwY2F0JTIwaG9sZGluZyUyMGElMjBzaWduJTIwdGhhdCUyMHNheXMlMjBoZWxsbyUyMiUyQyUwQSUyMCUyMCUyMCUyMGdlbmVyYXRvciUzRHRvcmNoLkdlbmVyYXRvciglMjJjcHUlMjIpLm1hbnVhbF9zZWVkKDApJTJDJTBBKS5pbWFnZXMlNUIwJTVEJTBBaW1hZ2Uuc2F2ZSglMjJsdW1pbmEtc2luZ2xlLWZpbGUucG5nJTIyKSUwQQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> Lumina2Transformer2DModel, Lumina2Text2ImgPipeline | |
| ckpt_path = <span class="hljs-string">"https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0/blob/main/consolidated.00-of-01.pth"</span> | |
| transformer = Lumina2Transformer2DModel.from_single_file( | |
| ckpt_path, torch_dtype=torch.bfloat16 | |
| ) | |
| pipe = Lumina2Text2ImgPipeline.from_pretrained( | |
| <span class="hljs-string">"Alpha-VLLM/Lumina-Image-2.0"</span>, transformer=transformer, torch_dtype=torch.bfloat16 | |
| ) | |
| pipe.enable_model_cpu_offload() | |
| image = pipe( | |
| <span class="hljs-string">"a cat holding a sign that says hello"</span>, | |
| generator=torch.Generator(<span class="hljs-string">"cpu"</span>).manual_seed(<span class="hljs-number">0</span>), | |
| ).images[<span class="hljs-number">0</span>] | |
| image.save(<span class="hljs-string">"lumina-single-file.png"</span>) | |
| `,wrap:!1}}),H=new Je({props:{title:"Using GGUF Quantized Checkpoints with Lumina Image 2.0",local:"using-gguf-quantized-checkpoints-with-lumina-image-20",headingTag:"h2"}}),q=new Xe({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEx1bWluYTJUcmFuc2Zvcm1lcjJETW9kZWwlMkMlMjBMdW1pbmEyVGV4dDJJbWdQaXBlbGluZSUyQyUyMEdHVUZRdWFudGl6YXRpb25Db25maWclMjAlMEElMEFja3B0X3BhdGglMjAlM0QlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGY2FsY3VpcyUyRmx1bWluYS1nZ3VmJTJGYmxvYiUyRm1haW4lMkZsdW1pbmEyLXE0XzAuZ2d1ZiUyMiUwQXRyYW5zZm9ybWVyJTIwJTNEJTIwTHVtaW5hMlRyYW5zZm9ybWVyMkRNb2RlbC5mcm9tX3NpbmdsZV9maWxlKCUwQSUyMCUyMCUyMCUyMGNrcHRfcGF0aCUyQyUwQSUyMCUyMCUyMCUyMHF1YW50aXphdGlvbl9jb25maWclM0RHR1VGUXVhbnRpemF0aW9uQ29uZmlnKGNvbXB1dGVfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTJDJTBBKSUwQSUwQXBpcGUlMjAlM0QlMjBMdW1pbmEyVGV4dDJJbWdQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyQWxwaGEtVkxMTSUyRkx1bWluYS1JbWFnZS0yLjAlMjIlMkMlMjB0cmFuc2Zvcm1lciUzRHRyYW5zZm9ybWVyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUwQSklMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMCUyMmElMjBjYXQlMjBob2xkaW5nJTIwYSUyMHNpZ24lMjB0aGF0JTIwc2F5cyUyMGhlbGxvJTIyJTJDJTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yJTNEdG9yY2guR2VuZXJhdG9yKCUyMmNwdSUyMikubWFudWFsX3NlZWQoMCklMkMlMEEpLmltYWdlcyU1QjAlNUQlMEFpbWFnZS5zYXZlKCUyMmx1bWluYS1nZ3VmLnBuZyUyMik=",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> Lumina2Transformer2DModel, Lumina2Text2ImgPipeline, GGUFQuantizationConfig | |
| ckpt_path = <span class="hljs-string">"https://huggingface.co/calcuis/lumina-gguf/blob/main/lumina2-q4_0.gguf"</span> | |
| transformer = Lumina2Transformer2DModel.from_single_file( | |
| ckpt_path, | |
| quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16), | |
| torch_dtype=torch.bfloat16, | |
| ) | |
| pipe = Lumina2Text2ImgPipeline.from_pretrained( | |
| <span class="hljs-string">"Alpha-VLLM/Lumina-Image-2.0"</span>, transformer=transformer, torch_dtype=torch.bfloat16 | |
| ) | |
| pipe.enable_model_cpu_offload() | |
| image = pipe( | |
| <span class="hljs-string">"a cat holding a sign that says hello"</span>, | |
| generator=torch.Generator(<span class="hljs-string">"cpu"</span>).manual_seed(<span class="hljs-number">0</span>), | |
| ).images[<span class="hljs-number">0</span>] | |
| image.save(<span class="hljs-string">"lumina-gguf.png"</span>)`,wrap:!1}}),N=new Je({props:{title:"Lumina2Text2ImgPipeline",local:"diffusers.Lumina2Text2ImgPipeline",headingTag:"h2"}}),X=new K({props:{name:"class diffusers.Lumina2Text2ImgPipeline",anchor:"diffusers.Lumina2Text2ImgPipeline",parameters:[{name:"transformer",val:": Lumina2Transformer2DModel"},{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": AutoModel"},{name:"tokenizer",val:": AutoTokenizer"}],parametersDescription:[{anchor:"diffusers.Lumina2Text2ImgPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_10727/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.Lumina2Text2ImgPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>AutoModel</code>) — | |
| Frozen text-encoder. Lumina-T2I uses | |
| <a href="https://huggingface.co/docs/transformers/model_doc/t5#transformers.AutoModel" rel="nofollow">T5</a>, specifically the | |
| <a href="https://huggingface.co/Alpha-VLLM/tree/main/t5-v1_1-xxl" rel="nofollow">t5-v1_1-xxl</a> variant.`,name:"text_encoder"},{anchor:"diffusers.Lumina2Text2ImgPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>AutoModel</code>) — | |
| Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/model_doc/t5#transformers.AutoModel" rel="nofollow">AutoModel</a>.`,name:"tokenizer"},{anchor:"diffusers.Lumina2Text2ImgPipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_10727/en/api/models/transformer2d#diffusers.Transformer2DModel">Transformer2DModel</a>) — | |
| A text conditioned <code>Transformer2DModel</code> to denoise the encoded image latents.`,name:"transformer"},{anchor:"diffusers.Lumina2Text2ImgPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_10727/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) — | |
| A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L143"}}),Y=new K({props:{name:"__call__",anchor:"diffusers.Lumina2Text2ImgPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"height",val:": typing.Optional[int] = None"},{name:"num_inference_steps",val:": int = 30"},{name:"guidance_scale",val:": float = 4.0"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"sigmas",val:": typing.List[float] = None"},{name:"num_images_per_prompt",val:": typing.Optional[int] = 1"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int, typing.Dict], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"system_prompt",val:": typing.Optional[str] = None"},{name:"cfg_trunc_ratio",val:": float = 1.0"},{name:"cfg_normalization",val:": bool = True"},{name:"use_mask_in_transformer",val:": bool = True"},{name:"max_sequence_length",val:": int = 256"}],parametersDescription:[{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 30) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) — | |
| Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in | |
| their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed | |
| will be used.`,name:"sigmas"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 4.0) — | |
| Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>. | |
| <code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen | |
| Paper</a>. Guidance scale is enabled by setting <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to the text <code>prompt</code>, | |
| usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of images to generate per prompt.`,name:"num_images_per_prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) in the DDIM paper: <a href="https://arxiv.org/abs/2010.02502" rel="nofollow">https://arxiv.org/abs/2010.02502</a>. Only applies to | |
| <a href="/docs/diffusers/pr_10727/en/api/schedulers/ddim#diffusers.DDIMScheduler">schedulers.DDIMScheduler</a>, will be ignored for others.`,name:"eta"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will ge generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.prompt_attention_mask",description:"<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — Pre-generated attention mask for text embeddings.",name:"prompt_attention_mask"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. For Lumina-T2I this negative prompt should be "". If not | |
| provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for negative text embeddings.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.stable_diffusion.IFPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls at the end of each denoising steps during the inference. The function is called | |
| with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by | |
| <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) — | |
| The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list | |
| will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the | |
| <code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.system_prompt",description:`<strong>system_prompt</strong> (<code>str</code>, <em>optional</em>) — | |
| The system prompt to use for the image generation.`,name:"system_prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.cfg_trunc_ratio",description:`<strong>cfg_trunc_ratio</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1.0</code>) — | |
| The ratio of the timestep interval to apply normalization-based guidance scale.`,name:"cfg_trunc_ratio"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.cfg_normalization",description:`<strong>cfg_normalization</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to apply normalization-based guidance scale.`,name:"cfg_normalization"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.use_mask_in_transformer",description:`<strong>use_mask_in_transformer</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to use attention mask in <code>Lumina2Transformer2DModel</code>. Set <code>False</code> for performance gain.`,name:"use_mask_in_transformer"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code>, defaults to <code>256</code>) — | |
| Maximum sequence length to use with the <code>prompt</code>.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L503",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <a | |
| href="/docs/diffusers/pr_10727/en/api/pipelines/unclip#diffusers.ImagePipelineOutput" | |
| >ImagePipelineOutput</a> is returned, otherwise a <code>tuple</code> is | |
| returned where the first element is a list with the generated images</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/pr_10727/en/api/pipelines/unclip#diffusers.ImagePipelineOutput" | |
| >ImagePipelineOutput</a> or <code>tuple</code></p> | |
| `}}),k=new ct({props:{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.example",$$slots:{default:[ft]},$$scope:{ctx:ee}}}),D=new K({props:{name:"disable_vae_slicing",anchor:"diffusers.Lumina2Text2ImgPipeline.disable_vae_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L445"}}),z=new K({props:{name:"disable_vae_tiling",anchor:"diffusers.Lumina2Text2ImgPipeline.disable_vae_tiling",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L460"}}),Q=new K({props:{name:"enable_vae_slicing",anchor:"diffusers.Lumina2Text2ImgPipeline.enable_vae_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L438"}}),S=new K({props:{name:"enable_vae_tiling",anchor:"diffusers.Lumina2Text2ImgPipeline.enable_vae_tiling",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L452"}}),A=new K({props:{name:"encode_prompt",anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"num_images_per_prompt",val:": int = 1"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"system_prompt",val:": typing.Optional[str] = None"},{name:"max_sequence_length",val:": int = 256"}],parametersDescription:[{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt not to guide the image generation. If not defined, one has to pass <code>negative_prompt_embeds</code> | |
| instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is less than <code>1</code>). For | |
| Lumina-T2I, this should be "".`,name:"negative_prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>): | |
| torch device to place the resulting embeddings on`,name:"device"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. For Lumina-T2I, it’s should be the embeddings of the "" string.`,name:"negative_prompt_embeds"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code>, defaults to <code>256</code>) — | |
| Maximum sequence length to use for the prompt.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L247"}}),O=new ut({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/lumina2.md"}}),{c(){r=l("meta"),w=i(),y=l("p"),T=i(),u(M.$$.fragment),c=i(),x=l("p"),x.innerHTML=Ye,pe=i(),C=l("p"),C.textContent=De,de=i(),E=l("p"),E.innerHTML=ze,ce=i(),u(J.$$.fragment),ue=i(),u(V.$$.fragment),ge=i(),B=l("p"),B.innerHTML=Qe,fe=i(),u(F.$$.fragment),he=i(),u(H.$$.fragment),_e=i(),R=l("p"),R.innerHTML=Se,be=i(),u(q.$$.fragment),ye=i(),u(N.$$.fragment),ve=i(),p=l("div"),u(X.$$.fragment),ke=i(),te=l("p"),te.textContent=Ae,Ze=i(),ne=l("p"),ne.innerHTML=Oe,je=i(),I=l("div"),u(Y.$$.fragment),Ge=i(),ie=l("p"),ie.textContent=Ke,Pe=i(),u(k.$$.fragment),We=i(),Z=l("div"),u(D.$$.fragment),Ce=i(),ae=l("p"),ae.innerHTML=et,Ee=i(),j=l("div"),u(z.$$.fragment),Ve=i(),oe=l("p"),oe.innerHTML=tt,Be=i(),G=l("div"),u(Q.$$.fragment),Fe=i(),se=l("p"),se.textContent=nt,He=i(),P=l("div"),u(S.$$.fragment),Re=i(),re=l("p"),re.textContent=it,qe=i(),W=l("div"),u(A.$$.fragment),Ne=i(),le=l("p"),le.textContent=at,Te=i(),u(O.$$.fragment),Me=i(),me=l("p"),this.h()},l(e){const t=pt("svelte-u9bgzb",document.head);r=m(t,"META",{name:!0,content:!0}),t.forEach(n),w=a(e),y=m(e,"P",{}),$(y).forEach(n),T=a(e),g(M.$$.fragment,e),c=a(e),x=m(e,"P",{"data-svelte-h":!0}),v(x)!=="svelte-1yt6rrf"&&(x.innerHTML=Ye),pe=a(e),C=m(e,"P",{"data-svelte-h":!0}),v(C)!=="svelte-1cwsb16"&&(C.textContent=De),de=a(e),E=m(e,"P",{"data-svelte-h":!0}),v(E)!=="svelte-14hqiub"&&(E.innerHTML=ze),ce=a(e),g(J.$$.fragment,e),ue=a(e),g(V.$$.fragment,e),ge=a(e),B=m(e,"P",{"data-svelte-h":!0}),v(B)!=="svelte-k5cnxs"&&(B.innerHTML=Qe),fe=a(e),g(F.$$.fragment,e),he=a(e),g(H.$$.fragment,e),_e=a(e),R=m(e,"P",{"data-svelte-h":!0}),v(R)!=="svelte-1sse30f"&&(R.innerHTML=Se),be=a(e),g(q.$$.fragment,e),ye=a(e),g(N.$$.fragment,e),ve=a(e),p=m(e,"DIV",{class:!0});var d=$(p);g(X.$$.fragment,d),ke=a(d),te=m(d,"P",{"data-svelte-h":!0}),v(te)!=="svelte-1ennvvi"&&(te.textContent=Ae),Ze=a(d),ne=m(d,"P",{"data-svelte-h":!0}),v(ne)!=="svelte-1ipmcmq"&&(ne.innerHTML=Oe),je=a(d),I=m(d,"DIV",{class:!0});var L=$(I);g(Y.$$.fragment,L),Ge=a(L),ie=m(L,"P",{"data-svelte-h":!0}),v(ie)!=="svelte-v78lg8"&&(ie.textContent=Ke),Pe=a(L),g(k.$$.fragment,L),L.forEach(n),We=a(d),Z=m(d,"DIV",{class:!0});var we=$(Z);g(D.$$.fragment,we),Ce=a(we),ae=m(we,"P",{"data-svelte-h":!0}),v(ae)!=="svelte-1s3c06i"&&(ae.innerHTML=et),we.forEach(n),Ee=a(d),j=m(d,"DIV",{class:!0});var Ie=$(j);g(z.$$.fragment,Ie),Ve=a(Ie),oe=m(Ie,"P",{"data-svelte-h":!0}),v(oe)!=="svelte-pkn4ui"&&(oe.innerHTML=tt),Ie.forEach(n),Be=a(d),G=m(d,"DIV",{class:!0});var Le=$(G);g(Q.$$.fragment,Le),Fe=a(Le),se=m(Le,"P",{"data-svelte-h":!0}),v(se)!=="svelte-14bnrb6"&&(se.textContent=nt),Le.forEach(n),He=a(d),P=m(d,"DIV",{class:!0});var $e=$(P);g(S.$$.fragment,$e),Re=a($e),re=m($e,"P",{"data-svelte-h":!0}),v(re)!=="svelte-1xwrf7t"&&(re.textContent=it),$e.forEach(n),qe=a(d),W=m(d,"DIV",{class:!0});var Ue=$(W);g(A.$$.fragment,Ue),Ne=a(Ue),le=m(Ue,"P",{"data-svelte-h":!0}),v(le)!=="svelte-16q0ax1"&&(le.textContent=at),Ue.forEach(n),d.forEach(n),Te=a(e),g(O.$$.fragment,e),Me=a(e),me=m(e,"P",{}),$(me).forEach(n),this.h()},h(){U(r,"name","hf:doc:metadata"),U(r,"content",_t),U(I,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),U(Z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),U(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),U(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),U(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),U(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),U(p,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){o(document.head,r),s(e,w,t),s(e,y,t),s(e,T,t),f(M,e,t),s(e,c,t),s(e,x,t),s(e,pe,t),s(e,C,t),s(e,de,t),s(e,E,t),s(e,ce,t),f(J,e,t),s(e,ue,t),f(V,e,t),s(e,ge,t),s(e,B,t),s(e,fe,t),f(F,e,t),s(e,he,t),f(H,e,t),s(e,_e,t),s(e,R,t),s(e,be,t),f(q,e,t),s(e,ye,t),f(N,e,t),s(e,ve,t),s(e,p,t),f(X,p,null),o(p,ke),o(p,te),o(p,Ze),o(p,ne),o(p,je),o(p,I),f(Y,I,null),o(I,Ge),o(I,ie),o(I,Pe),f(k,I,null),o(p,We),o(p,Z),f(D,Z,null),o(Z,Ce),o(Z,ae),o(p,Ee),o(p,j),f(z,j,null),o(j,Ve),o(j,oe),o(p,Be),o(p,G),f(Q,G,null),o(G,Fe),o(G,se),o(p,He),o(p,P),f(S,P,null),o(P,Re),o(P,re),o(p,qe),o(p,W),f(A,W,null),o(W,Ne),o(W,le),s(e,Te,t),f(O,e,t),s(e,Me,t),s(e,me,t),xe=!0},p(e,[t]){const d={};t&2&&(d.$$scope={dirty:t,ctx:e}),J.$set(d);const L={};t&2&&(L.$$scope={dirty:t,ctx:e}),k.$set(L)},i(e){xe||(h(M.$$.fragment,e),h(J.$$.fragment,e),h(V.$$.fragment,e),h(F.$$.fragment,e),h(H.$$.fragment,e),h(q.$$.fragment,e),h(N.$$.fragment,e),h(X.$$.fragment,e),h(Y.$$.fragment,e),h(k.$$.fragment,e),h(D.$$.fragment,e),h(z.$$.fragment,e),h(Q.$$.fragment,e),h(S.$$.fragment,e),h(A.$$.fragment,e),h(O.$$.fragment,e),xe=!0)},o(e){_(M.$$.fragment,e),_(J.$$.fragment,e),_(V.$$.fragment,e),_(F.$$.fragment,e),_(H.$$.fragment,e),_(q.$$.fragment,e),_(N.$$.fragment,e),_(X.$$.fragment,e),_(Y.$$.fragment,e),_(k.$$.fragment,e),_(D.$$.fragment,e),_(z.$$.fragment,e),_(Q.$$.fragment,e),_(S.$$.fragment,e),_(A.$$.fragment,e),_(O.$$.fragment,e),xe=!1},d(e){e&&(n(w),n(y),n(T),n(c),n(x),n(pe),n(C),n(de),n(E),n(ce),n(ue),n(ge),n(B),n(fe),n(he),n(_e),n(R),n(be),n(ye),n(ve),n(p),n(Te),n(Me),n(me)),n(r),b(M,e),b(J,e),b(V,e),b(F,e),b(H,e),b(q,e),b(N,e),b(X),b(Y),b(k),b(D),b(z),b(Q),b(S),b(A),b(O,e)}}}const _t='{"title":"Lumina2","local":"lumina2","sections":[{"title":"Using Single File loading with Lumina Image 2.0","local":"using-single-file-loading-with-lumina-image-20","sections":[],"depth":2},{"title":"Using GGUF Quantized Checkpoints with Lumina Image 2.0","local":"using-gguf-quantized-checkpoints-with-lumina-image-20","sections":[],"depth":2},{"title":"Lumina2Text2ImgPipeline","local":"diffusers.Lumina2Text2ImgPipeline","sections":[],"depth":2}],"depth":1}';function bt(ee){return rt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Lt extends lt{constructor(r){super(),mt(this,r,bt,ht,st,{})}}export{Lt as component}; | |
Xet Storage Details
- Size:
- 35.9 kB
- Xet hash:
- 260c37a246b10ab568a00108aebfd75a8c911fbe775ce537efc2f40a168c853d
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.