Buckets:
| import{s as pt,o as dt,n as mt}from"../chunks/scheduler.53228c21.js";import{S as ct,i as ut,e as r,s as i,c as u,h as gt,a as l,d as n,b as a,f as J,g,j as f,k as T,l as o,m as s,n as h,t as _,o as b,p as y}from"../chunks/index.100fac89.js";import{D as K}from"../chunks/Docstring.8eea0d47.js";import{C as Se}from"../chunks/CodeBlock.d30a6509.js";import{E as ft}from"../chunks/ExampleCodeBlock.5e9b5749.js";import{H as Ge,E as ht}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.92f39b94.js";function _t(de){let c,P="Examples:",x,v,w;return v=new Se({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTHVtaW5hMlBpcGVsaW5lJTBBJTBBcGlwZSUyMCUzRCUyMEx1bWluYTJQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTIyQWxwaGEtVkxMTSUyRkx1bWluYS1JbWFnZS0yLjAlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQSUyMyUyMEVuYWJsZSUyMG1lbW9yeSUyMG9wdGltaXphdGlvbnMuJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMlVwcGVyJTIwYm9keSUyMG9mJTIwYSUyMHlvdW5nJTIwd29tYW4lMjBpbiUyMGElMjBWaWN0b3JpYW4tZXJhJTIwb3V0Zml0JTIwd2l0aCUyMGJyYXNzJTIwZ29nZ2xlcyUyMGFuZCUyMGxlYXRoZXIlMjBzdHJhcHMuJTIwQmFja2dyb3VuZCUyMHNob3dzJTIwYW4lMjBpbmR1c3RyaWFsJTIwcmV2b2x1dGlvbiUyMGNpdHlzY2FwZSUyMHdpdGglMjBzbW9reSUyMHNraWVzJTIwYW5kJTIwdGFsbCUyQyUyMG1ldGFsJTIwc3RydWN0dXJlcyUyMiUwQWltYWdlJTIwJTNEJTIwcGlwZShwcm9tcHQpLmltYWdlcyU1QjAlNUQ=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> Lumina2Pipeline | |
| <span class="hljs-meta">>>> </span>pipe = Lumina2Pipeline.from_pretrained(<span class="hljs-string">"Alpha-VLLM/Lumina-Image-2.0"</span>, torch_dtype=torch.bfloat16) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Enable memory optimizations.</span> | |
| <span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload() | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"Upper body of a young woman in a Victorian-era outfit with brass goggles and leather straps. Background shows an industrial revolution cityscape with smoky skies and tall, metal structures"</span> | |
| <span class="hljs-meta">>>> </span>image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),{c(){c=r("p"),c.textContent=P,x=i(),u(v.$$.fragment)},l(m){c=l(m,"P",{"data-svelte-h":!0}),f(c)!=="svelte-kvfsh7"&&(c.textContent=P),x=a(m),g(v.$$.fragment,m)},m(m,M){s(m,c,M),s(m,x,M),h(v,m,M),w=!0},p:mt,i(m){w||(_(v.$$.fragment,m),w=!0)},o(m){b(v.$$.fragment,m),w=!1},d(m){m&&(n(c),n(x)),y(v,m)}}}function bt(de){let c,P,x,v,w,m,M,Qe='<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>',me,Z,Ae='<a href="https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0" rel="nofollow">Lumina Image 2.0: A Unified and Efficient Image Generative Model</a> is a 2 billion parameter flow-based diffusion transformer capable of generating diverse images from text descriptions.',ce,j,Xe="The abstract from the paper is:",ue,E,De="<em>We introduce Lumina-Image 2.0, an advanced text-to-image model that surpasses previous state-of-the-art methods across multiple benchmarks, while also shedding light on its potential to evolve into a generalist vision intelligence model. Lumina-Image 2.0 exhibits three key properties: (1) Unification – it adopts a unified architecture that treats text and image tokens as a joint sequence, enabling natural cross-modal interactions and facilitating task expansion. Besides, since high-quality captioners can provide semantically better-aligned text-image training pairs, we introduce a unified captioning system, UniCaptioner, which generates comprehensive and precise captions for the model. This not only accelerates model convergence but also enhances prompt adherence, variable-length prompt handling, and task generalization via prompt templates. (2) Efficiency – to improve the efficiency of the unified architecture, we develop a set of optimization techniques that improve semantic learning and fine-grained texture generation during training while incorporating inference-time acceleration strategies without compromising image quality. (3) Transparency – we open-source all training details, code, and models to ensure full reproducibility, aiming to bridge the gap between well-resourced closed-source research teams and independent developers.</em>",ge,$,Oe='<p>Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading#reuse-a-pipeline">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.</p>',fe,B,he,Y,Ke="Single file loading for Lumina Image 2.0 is available for the <code>Lumina2Transformer2DModel</code>",_e,V,be,z,ye,N,et="GGUF Quantized checkpoints for the <code>Lumina2Transformer2DModel</code> can be loaded via <code>from_single_file</code> with the <code>GGUFQuantizationConfig</code>",ve,H,we,q,Me,p,F,We,ee,tt="Pipeline for text-to-image generation using Lumina-T2I.",Ce,te,nt=`This model inherits from <a href="/docs/diffusers/pr_12762/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods the | |
| library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)`,Pe,L,R,Ze,ne,it="Function invoked when calling the pipeline for generation.",je,k,Ee,U,S,Be,ie,at=`Disable sliced VAE decoding. If <code>enable_vae_slicing</code> was previously enabled, this method will go back to | |
| computing decoding in one step.`,Ye,I,Q,Ve,ae,ot=`Disable tiled VAE decoding. If <code>enable_vae_tiling</code> was previously enabled, this method will go back to | |
| computing decoding in one step.`,ze,G,A,Ne,oe,st=`Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to | |
| compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.`,He,W,X,qe,se,rt=`Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to | |
| compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow | |
| processing larger images.`,Fe,C,D,Re,re,lt="Encodes the prompt into text encoder hidden states.",Te,O,xe,pe,Le;return w=new Ge({props:{title:"Lumina2",local:"lumina2",headingTag:"h1"}}),B=new Ge({props:{title:"Using Single File loading with Lumina Image 2.0",local:"using-single-file-loading-with-lumina-image-20",headingTag:"h2"}}),V=new Se({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTHVtaW5hMlRyYW5zZm9ybWVyMkRNb2RlbCUyQyUyMEx1bWluYTJQaXBlbGluZSUwQSUwQWNrcHRfcGF0aCUyMCUzRCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZBbHBoYS1WTExNJTJGTHVtaW5hLUltYWdlLTIuMCUyRmJsb2IlMkZtYWluJTJGY29uc29saWRhdGVkLjAwLW9mLTAxLnB0aCUyMiUwQXRyYW5zZm9ybWVyJTIwJTNEJTIwTHVtaW5hMlRyYW5zZm9ybWVyMkRNb2RlbC5mcm9tX3NpbmdsZV9maWxlKCUwQSUyMCUyMCUyMCUyMGNrcHRfcGF0aCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpJTBBJTBBcGlwZSUyMCUzRCUyMEx1bWluYTJQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyQWxwaGEtVkxMTSUyRkx1bWluYS1JbWFnZS0yLjAlMjIlMkMlMjB0cmFuc2Zvcm1lciUzRHRyYW5zZm9ybWVyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUwQSklMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMCUyMmElMjBjYXQlMjBob2xkaW5nJTIwYSUyMHNpZ24lMjB0aGF0JTIwc2F5cyUyMGhlbGxvJTIyJTJDJTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yJTNEdG9yY2guR2VuZXJhdG9yKCUyMmNwdSUyMikubWFudWFsX3NlZWQoMCklMkMlMEEpLmltYWdlcyU1QjAlNUQlMEFpbWFnZS5zYXZlKCUyMmx1bWluYS1zaW5nbGUtZmlsZS5wbmclMjIpJTBB",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> Lumina2Transformer2DModel, Lumina2Pipeline | |
| ckpt_path = <span class="hljs-string">"https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0/blob/main/consolidated.00-of-01.pth"</span> | |
| transformer = Lumina2Transformer2DModel.from_single_file( | |
| ckpt_path, torch_dtype=torch.bfloat16 | |
| ) | |
| pipe = Lumina2Pipeline.from_pretrained( | |
| <span class="hljs-string">"Alpha-VLLM/Lumina-Image-2.0"</span>, transformer=transformer, torch_dtype=torch.bfloat16 | |
| ) | |
| pipe.enable_model_cpu_offload() | |
| image = pipe( | |
| <span class="hljs-string">"a cat holding a sign that says hello"</span>, | |
| generator=torch.Generator(<span class="hljs-string">"cpu"</span>).manual_seed(<span class="hljs-number">0</span>), | |
| ).images[<span class="hljs-number">0</span>] | |
| image.save(<span class="hljs-string">"lumina-single-file.png"</span>) | |
| `,wrap:!1}}),z=new Ge({props:{title:"Using GGUF Quantized Checkpoints with Lumina Image 2.0",local:"using-gguf-quantized-checkpoints-with-lumina-image-20",headingTag:"h2"}}),H=new Se({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEx1bWluYTJUcmFuc2Zvcm1lcjJETW9kZWwlMkMlMjBMdW1pbmEyUGlwZWxpbmUlMkMlMjBHR1VGUXVhbnRpemF0aW9uQ29uZmlnJTIwJTBBJTBBY2twdF9wYXRoJTIwJTNEJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmNhbGN1aXMlMkZsdW1pbmEtZ2d1ZiUyRmJsb2IlMkZtYWluJTJGbHVtaW5hMi1xNF8wLmdndWYlMjIlMEF0cmFuc2Zvcm1lciUyMCUzRCUyMEx1bWluYTJUcmFuc2Zvcm1lcjJETW9kZWwuZnJvbV9zaW5nbGVfZmlsZSglMEElMjAlMjAlMjAlMjBja3B0X3BhdGglMkMlMEElMjAlMjAlMjAlMjBxdWFudGl6YXRpb25fY29uZmlnJTNER0dVRlF1YW50aXphdGlvbkNvbmZpZyhjb21wdXRlX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYpJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUyQyUwQSklMEElMEFwaXBlJTIwJTNEJTIwTHVtaW5hMlBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJBbHBoYS1WTExNJTJGTHVtaW5hLUltYWdlLTIuMCUyMiUyQyUyMHRyYW5zZm9ybWVyJTNEdHJhbnNmb3JtZXIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQXBpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEFpbWFnZSUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwJTIyYSUyMGNhdCUyMGhvbGRpbmclMjBhJTIwc2lnbiUyMHRoYXQlMjBzYXlzJTIwaGVsbG8lMjIlMkMlMEElMjAlMjAlMjAlMjBnZW5lcmF0b3IlM0R0b3JjaC5HZW5lcmF0b3IoJTIyY3B1JTIyKS5tYW51YWxfc2VlZCgwKSUyQyUwQSkuaW1hZ2VzJTVCMCU1RCUwQWltYWdlLnNhdmUoJTIybHVtaW5hLWdndWYucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> Lumina2Transformer2DModel, Lumina2Pipeline, GGUFQuantizationConfig | |
| ckpt_path = <span class="hljs-string">"https://huggingface.co/calcuis/lumina-gguf/blob/main/lumina2-q4_0.gguf"</span> | |
| transformer = Lumina2Transformer2DModel.from_single_file( | |
| ckpt_path, | |
| quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16), | |
| torch_dtype=torch.bfloat16, | |
| ) | |
| pipe = Lumina2Pipeline.from_pretrained( | |
| <span class="hljs-string">"Alpha-VLLM/Lumina-Image-2.0"</span>, transformer=transformer, torch_dtype=torch.bfloat16 | |
| ) | |
| pipe.enable_model_cpu_offload() | |
| image = pipe( | |
| <span class="hljs-string">"a cat holding a sign that says hello"</span>, | |
| generator=torch.Generator(<span class="hljs-string">"cpu"</span>).manual_seed(<span class="hljs-number">0</span>), | |
| ).images[<span class="hljs-number">0</span>] | |
| image.save(<span class="hljs-string">"lumina-gguf.png"</span>)`,wrap:!1}}),q=new Ge({props:{title:"Lumina2Pipeline",local:"diffusers.Lumina2Pipeline",headingTag:"h2"}}),F=new K({props:{name:"class diffusers.Lumina2Pipeline",anchor:"diffusers.Lumina2Pipeline",parameters:[{name:"transformer",val:": Lumina2Transformer2DModel"},{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": Gemma2PreTrainedModel"},{name:"tokenizer",val:": typing.Union[transformers.models.gemma.tokenization_gemma.GemmaTokenizer, transformers.models.gemma.tokenization_gemma_fast.GemmaTokenizerFast]"}],parametersDescription:[{anchor:"diffusers.Lumina2Pipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_12762/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.Lumina2Pipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>Gemma2PreTrainedModel</code>) — | |
| Frozen Gemma2 text-encoder.`,name:"text_encoder"},{anchor:"diffusers.Lumina2Pipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>GemmaTokenizer</code> or <code>GemmaTokenizerFast</code>) — | |
| Gemma tokenizer.`,name:"tokenizer"},{anchor:"diffusers.Lumina2Pipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_12762/en/api/models/transformer2d#diffusers.Transformer2DModel">Transformer2DModel</a>) — | |
| A text conditioned <code>Transformer2DModel</code> to denoise the encoded image latents.`,name:"transformer"},{anchor:"diffusers.Lumina2Pipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_12762/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) — | |
| A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/vr_12762/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L137"}}),R=new K({props:{name:"__call__",anchor:"diffusers.Lumina2Pipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"height",val:": typing.Optional[int] = None"},{name:"num_inference_steps",val:": int = 30"},{name:"guidance_scale",val:": float = 4.0"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"sigmas",val:": typing.List[float] = None"},{name:"num_images_per_prompt",val:": typing.Optional[int] = 1"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int, typing.Dict], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"system_prompt",val:": typing.Optional[str] = None"},{name:"cfg_trunc_ratio",val:": float = 1.0"},{name:"cfg_normalization",val:": bool = True"},{name:"max_sequence_length",val:": int = 256"}],parametersDescription:[{anchor:"diffusers.Lumina2Pipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.Lumina2Pipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.Lumina2Pipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 30) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.Lumina2Pipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) — | |
| Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in | |
| their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed | |
| will be used.`,name:"sigmas"},{anchor:"diffusers.Lumina2Pipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 4.0) — | |
| Guidance scale as defined in <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">Classifier-Free Diffusion | |
| Guidance</a>. <code>guidance_scale</code> is defined as <code>w</code> of equation 2. | |
| of <a href="https://huggingface.co/papers/2205.11487" rel="nofollow">Imagen Paper</a>. Guidance scale is enabled by setting | |
| <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to | |
| the text <code>prompt</code>, usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.Lumina2Pipeline.__call__.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of images to generate per prompt.`,name:"num_images_per_prompt"},{anchor:"diffusers.Lumina2Pipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.Lumina2Pipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.Lumina2Pipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) in the DDIM paper: <a href="https://huggingface.co/papers/2010.02502" rel="nofollow">https://huggingface.co/papers/2010.02502</a>. Only | |
| applies to <a href="/docs/diffusers/pr_12762/en/api/schedulers/ddim#diffusers.DDIMScheduler">schedulers.DDIMScheduler</a>, will be ignored for others.`,name:"eta"},{anchor:"diffusers.Lumina2Pipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.Lumina2Pipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.Lumina2Pipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.Lumina2Pipeline.__call__.prompt_attention_mask",description:"<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — Pre-generated attention mask for text embeddings.",name:"prompt_attention_mask"},{anchor:"diffusers.Lumina2Pipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. For Lumina-T2I this negative prompt should be "". If not | |
| provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.Lumina2Pipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for negative text embeddings.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.Lumina2Pipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.Lumina2Pipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.stable_diffusion.IFPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.Lumina2Pipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.Lumina2Pipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls at the end of each denoising steps during the inference. The function is called | |
| with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by | |
| <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.Lumina2Pipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) — | |
| The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list | |
| will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the | |
| <code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.Lumina2Pipeline.__call__.system_prompt",description:`<strong>system_prompt</strong> (<code>str</code>, <em>optional</em>) — | |
| The system prompt to use for the image generation.`,name:"system_prompt"},{anchor:"diffusers.Lumina2Pipeline.__call__.cfg_trunc_ratio",description:`<strong>cfg_trunc_ratio</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1.0</code>) — | |
| The ratio of the timestep interval to apply normalization-based guidance scale.`,name:"cfg_trunc_ratio"},{anchor:"diffusers.Lumina2Pipeline.__call__.cfg_normalization",description:`<strong>cfg_normalization</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to apply normalization-based guidance scale.`,name:"cfg_normalization"},{anchor:"diffusers.Lumina2Pipeline.__call__.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code>, defaults to <code>256</code>) — | |
| Maximum sequence length to use with the <code>prompt</code>.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_12762/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L524",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <a | |
| href="/docs/diffusers/pr_12762/en/api/pipelines/ddim#diffusers.ImagePipelineOutput" | |
| >ImagePipelineOutput</a> is returned, otherwise a <code>tuple</code> is | |
| returned where the first element is a list with the generated images</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/pr_12762/en/api/pipelines/ddim#diffusers.ImagePipelineOutput" | |
| >ImagePipelineOutput</a> or <code>tuple</code></p> | |
| `}}),k=new ft({props:{anchor:"diffusers.Lumina2Pipeline.__call__.example",$$slots:{default:[_t]},$$scope:{ctx:de}}}),S=new K({props:{name:"disable_vae_slicing",anchor:"diffusers.Lumina2Pipeline.disable_vae_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_12762/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L444"}}),Q=new K({props:{name:"disable_vae_tiling",anchor:"diffusers.Lumina2Pipeline.disable_vae_tiling",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_12762/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L471"}}),A=new K({props:{name:"enable_vae_slicing",anchor:"diffusers.Lumina2Pipeline.enable_vae_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_12762/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L431"}}),X=new K({props:{name:"enable_vae_tiling",anchor:"diffusers.Lumina2Pipeline.enable_vae_tiling",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_12762/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L457"}}),D=new K({props:{name:"encode_prompt",anchor:"diffusers.Lumina2Pipeline.encode_prompt",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"num_images_per_prompt",val:": int = 1"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"system_prompt",val:": typing.Optional[str] = None"},{name:"max_sequence_length",val:": int = 256"}],parametersDescription:[{anchor:"diffusers.Lumina2Pipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.Lumina2Pipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt not to guide the image generation. If not defined, one has to pass <code>negative_prompt_embeds</code> | |
| instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is less than <code>1</code>). For | |
| Lumina-T2I, this should be "".`,name:"negative_prompt"},{anchor:"diffusers.Lumina2Pipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.Lumina2Pipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.Lumina2Pipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>): | |
| torch device to place the resulting embeddings on`,name:"device"},{anchor:"diffusers.Lumina2Pipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.Lumina2Pipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. For Lumina-T2I, it’s should be the embeddings of the "" string.`,name:"negative_prompt_embeds"},{anchor:"diffusers.Lumina2Pipeline.encode_prompt.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code>, defaults to <code>256</code>) — | |
| Maximum sequence length to use for the prompt.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_12762/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L238"}}),O=new ht({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/lumina2.md"}}),{c(){c=r("meta"),P=i(),x=r("p"),v=i(),u(w.$$.fragment),m=i(),M=r("div"),M.innerHTML=Qe,me=i(),Z=r("p"),Z.innerHTML=Ae,ce=i(),j=r("p"),j.textContent=Xe,ue=i(),E=r("p"),E.innerHTML=De,ge=i(),$=r("blockquote"),$.innerHTML=Oe,fe=i(),u(B.$$.fragment),he=i(),Y=r("p"),Y.innerHTML=Ke,_e=i(),u(V.$$.fragment),be=i(),u(z.$$.fragment),ye=i(),N=r("p"),N.innerHTML=et,ve=i(),u(H.$$.fragment),we=i(),u(q.$$.fragment),Me=i(),p=r("div"),u(F.$$.fragment),We=i(),ee=r("p"),ee.textContent=tt,Ce=i(),te=r("p"),te.innerHTML=nt,Pe=i(),L=r("div"),u(R.$$.fragment),Ze=i(),ne=r("p"),ne.textContent=it,je=i(),u(k.$$.fragment),Ee=i(),U=r("div"),u(S.$$.fragment),Be=i(),ie=r("p"),ie.innerHTML=at,Ye=i(),I=r("div"),u(Q.$$.fragment),Ve=i(),ae=r("p"),ae.innerHTML=ot,ze=i(),G=r("div"),u(A.$$.fragment),Ne=i(),oe=r("p"),oe.textContent=st,He=i(),W=r("div"),u(X.$$.fragment),qe=i(),se=r("p"),se.textContent=rt,Fe=i(),C=r("div"),u(D.$$.fragment),Re=i(),re=r("p"),re.textContent=lt,Te=i(),u(O.$$.fragment),xe=i(),pe=r("p"),this.h()},l(e){const t=gt("svelte-u9bgzb",document.head);c=l(t,"META",{name:!0,content:!0}),t.forEach(n),P=a(e),x=l(e,"P",{}),J(x).forEach(n),v=a(e),g(w.$$.fragment,e),m=a(e),M=l(e,"DIV",{class:!0,"data-svelte-h":!0}),f(M)!=="svelte-si9ct8"&&(M.innerHTML=Qe),me=a(e),Z=l(e,"P",{"data-svelte-h":!0}),f(Z)!=="svelte-1yt6rrf"&&(Z.innerHTML=Ae),ce=a(e),j=l(e,"P",{"data-svelte-h":!0}),f(j)!=="svelte-1cwsb16"&&(j.textContent=Xe),ue=a(e),E=l(e,"P",{"data-svelte-h":!0}),f(E)!=="svelte-14hqiub"&&(E.innerHTML=De),ge=a(e),$=l(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),f($)!=="svelte-r1jcqf"&&($.innerHTML=Oe),fe=a(e),g(B.$$.fragment,e),he=a(e),Y=l(e,"P",{"data-svelte-h":!0}),f(Y)!=="svelte-k5cnxs"&&(Y.innerHTML=Ke),_e=a(e),g(V.$$.fragment,e),be=a(e),g(z.$$.fragment,e),ye=a(e),N=l(e,"P",{"data-svelte-h":!0}),f(N)!=="svelte-1sse30f"&&(N.innerHTML=et),ve=a(e),g(H.$$.fragment,e),we=a(e),g(q.$$.fragment,e),Me=a(e),p=l(e,"DIV",{class:!0});var d=J(p);g(F.$$.fragment,d),We=a(d),ee=l(d,"P",{"data-svelte-h":!0}),f(ee)!=="svelte-1ennvvi"&&(ee.textContent=tt),Ce=a(d),te=l(d,"P",{"data-svelte-h":!0}),f(te)!=="svelte-xb8txp"&&(te.innerHTML=nt),Pe=a(d),L=l(d,"DIV",{class:!0});var le=J(L);g(R.$$.fragment,le),Ze=a(le),ne=l(le,"P",{"data-svelte-h":!0}),f(ne)!=="svelte-v78lg8"&&(ne.textContent=it),je=a(le),g(k.$$.fragment,le),le.forEach(n),Ee=a(d),U=l(d,"DIV",{class:!0});var Je=J(U);g(S.$$.fragment,Je),Be=a(Je),ie=l(Je,"P",{"data-svelte-h":!0}),f(ie)!=="svelte-1s3c06i"&&(ie.innerHTML=at),Je.forEach(n),Ye=a(d),I=l(d,"DIV",{class:!0});var $e=J(I);g(Q.$$.fragment,$e),Ve=a($e),ae=l($e,"P",{"data-svelte-h":!0}),f(ae)!=="svelte-pkn4ui"&&(ae.innerHTML=ot),$e.forEach(n),ze=a(d),G=l(d,"DIV",{class:!0});var ke=J(G);g(A.$$.fragment,ke),Ne=a(ke),oe=l(ke,"P",{"data-svelte-h":!0}),f(oe)!=="svelte-14bnrb6"&&(oe.textContent=st),ke.forEach(n),He=a(d),W=l(d,"DIV",{class:!0});var Ue=J(W);g(X.$$.fragment,Ue),qe=a(Ue),se=l(Ue,"P",{"data-svelte-h":!0}),f(se)!=="svelte-1xwrf7t"&&(se.textContent=rt),Ue.forEach(n),Fe=a(d),C=l(d,"DIV",{class:!0});var Ie=J(C);g(D.$$.fragment,Ie),Re=a(Ie),re=l(Ie,"P",{"data-svelte-h":!0}),f(re)!=="svelte-16q0ax1"&&(re.textContent=lt),Ie.forEach(n),d.forEach(n),Te=a(e),g(O.$$.fragment,e),xe=a(e),pe=l(e,"P",{}),J(pe).forEach(n),this.h()},h(){T(c,"name","hf:doc:metadata"),T(c,"content",yt),T(M,"class","flex flex-wrap space-x-1"),T($,"class","tip"),T(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(I,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(p,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){o(document.head,c),s(e,P,t),s(e,x,t),s(e,v,t),h(w,e,t),s(e,m,t),s(e,M,t),s(e,me,t),s(e,Z,t),s(e,ce,t),s(e,j,t),s(e,ue,t),s(e,E,t),s(e,ge,t),s(e,$,t),s(e,fe,t),h(B,e,t),s(e,he,t),s(e,Y,t),s(e,_e,t),h(V,e,t),s(e,be,t),h(z,e,t),s(e,ye,t),s(e,N,t),s(e,ve,t),h(H,e,t),s(e,we,t),h(q,e,t),s(e,Me,t),s(e,p,t),h(F,p,null),o(p,We),o(p,ee),o(p,Ce),o(p,te),o(p,Pe),o(p,L),h(R,L,null),o(L,Ze),o(L,ne),o(L,je),h(k,L,null),o(p,Ee),o(p,U),h(S,U,null),o(U,Be),o(U,ie),o(p,Ye),o(p,I),h(Q,I,null),o(I,Ve),o(I,ae),o(p,ze),o(p,G),h(A,G,null),o(G,Ne),o(G,oe),o(p,He),o(p,W),h(X,W,null),o(W,qe),o(W,se),o(p,Fe),o(p,C),h(D,C,null),o(C,Re),o(C,re),s(e,Te,t),h(O,e,t),s(e,xe,t),s(e,pe,t),Le=!0},p(e,[t]){const d={};t&2&&(d.$$scope={dirty:t,ctx:e}),k.$set(d)},i(e){Le||(_(w.$$.fragment,e),_(B.$$.fragment,e),_(V.$$.fragment,e),_(z.$$.fragment,e),_(H.$$.fragment,e),_(q.$$.fragment,e),_(F.$$.fragment,e),_(R.$$.fragment,e),_(k.$$.fragment,e),_(S.$$.fragment,e),_(Q.$$.fragment,e),_(A.$$.fragment,e),_(X.$$.fragment,e),_(D.$$.fragment,e),_(O.$$.fragment,e),Le=!0)},o(e){b(w.$$.fragment,e),b(B.$$.fragment,e),b(V.$$.fragment,e),b(z.$$.fragment,e),b(H.$$.fragment,e),b(q.$$.fragment,e),b(F.$$.fragment,e),b(R.$$.fragment,e),b(k.$$.fragment,e),b(S.$$.fragment,e),b(Q.$$.fragment,e),b(A.$$.fragment,e),b(X.$$.fragment,e),b(D.$$.fragment,e),b(O.$$.fragment,e),Le=!1},d(e){e&&(n(P),n(x),n(v),n(m),n(M),n(me),n(Z),n(ce),n(j),n(ue),n(E),n(ge),n($),n(fe),n(he),n(Y),n(_e),n(be),n(ye),n(N),n(ve),n(we),n(Me),n(p),n(Te),n(xe),n(pe)),n(c),y(w,e),y(B,e),y(V,e),y(z,e),y(H,e),y(q,e),y(F),y(R),y(k),y(S),y(Q),y(A),y(X),y(D),y(O,e)}}}const yt='{"title":"Lumina2","local":"lumina2","sections":[{"title":"Using Single File loading with Lumina Image 2.0","local":"using-single-file-loading-with-lumina-image-20","sections":[],"depth":2},{"title":"Using GGUF Quantized Checkpoints with Lumina Image 2.0","local":"using-gguf-quantized-checkpoints-with-lumina-image-20","sections":[],"depth":2},{"title":"Lumina2Pipeline","local":"diffusers.Lumina2Pipeline","sections":[],"depth":2}],"depth":1}';function vt(de){return dt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class $t extends ct{constructor(c){super(),ut(this,c,vt,bt,pt,{})}}export{$t as component}; | |
Xet Storage Details
- Size:
- 35.4 kB
- Xet hash:
- ea205c2037c4b28439a1d9013d801b5225739db49810e997fceef7e1c067ae4d
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.