Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / diffusers /pr_10727 /en /_app /immutable /nodes /109.eee7b222.js

rtrm's picture

about 1 month ago

35.9 kB

	import{s as st,o as rt,n as ot}from"../chunks/scheduler.8c3d61f6.js";import{S as lt,i as mt,g as l,s as i,r as u,A as pt,h as m,f as n,c as a,j as $,u as g,x as v,k as U,y as o,a as s,v as f,d as h,t as _,w as b}from"../chunks/index.da70eac4.js";import{T as dt}from"../chunks/Tip.1d9b8c37.js";import{D as K}from"../chunks/Docstring.6b390b9a.js";import{C as Xe}from"../chunks/CodeBlock.00a903b3.js";import{E as ct}from"../chunks/ExampleCodeBlock.db12be95.js";import{H as Je,E as ut}from"../chunks/EditOnGithub.1e64e623.js";function gt(ee){let r,w='Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading#reuse-a-pipeline">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.';return{c(){r=l("p"),r.innerHTML=w},l(y){r=m(y,"P",{"data-svelte-h":!0}),v(r)!=="svelte-1qn15hi"&&(r.innerHTML=w)},m(y,T){s(y,r,T)},p:ot,d(y){y&&n(r)}}}function ft(ee){let r,w="Examples:",y,T,M;return T=new Xe({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTHVtaW5hMlRleHQySW1nUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwTHVtaW5hMlRleHQySW1nUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUyMkFscGhhLVZMTE0lMkZMdW1pbmEtSW1hZ2UtMi4wJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEElMjMlMjBFbmFibGUlMjBtZW1vcnklMjBvcHRpbWl6YXRpb25zLiUwQXBpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJVcHBlciUyMGJvZHklMjBvZiUyMGElMjB5b3VuZyUyMHdvbWFuJTIwaW4lMjBhJTIwVmljdG9yaWFuLWVyYSUyMG91dGZpdCUyMHdpdGglMjBicmFzcyUyMGdvZ2dsZXMlMjBhbmQlMjBsZWF0aGVyJTIwc3RyYXBzLiUyMEJhY2tncm91bmQlMjBzaG93cyUyMGFuJTIwaW5kdXN0cmlhbCUyMHJldm9sdXRpb24lMjBjaXR5c2NhcGUlMjB3aXRoJTIwc21va3klMjBza2llcyUyMGFuZCUyMHRhbGwlMkMlMjBtZXRhbCUyMHN0cnVjdHVyZXMlMjIlMEFpbWFnZSUyMCUzRCUyMHBpcGUocHJvbXB0KS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> Lumina2Text2ImgPipeline

	<span class="hljs-meta">>>> </span>pipe = Lumina2Text2ImgPipeline.from_pretrained(<span class="hljs-string">"Alpha-VLLM/Lumina-Image-2.0"</span>, torch_dtype=torch.bfloat16)
	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Enable memory optimizations.</span>
	<span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload()

	<span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"Upper body of a young woman in a Victorian-era outfit with brass goggles and leather straps. Background shows an industrial revolution cityscape with smoky skies and tall, metal structures"</span>
	<span class="hljs-meta">>>> </span>image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),{c(){r=l("p"),r.textContent=w,y=i(),u(T.$$.fragment)},l(c){r=m(c,"P",{"data-svelte-h":!0}),v(r)!=="svelte-kvfsh7"&&(r.textContent=w),y=a(c),g(T.$$.fragment,c)},m(c,x){s(c,r,x),s(c,y,x),f(T,c,x),M=!0},p:ot,i(c){M\|\|(h(T.$$.fragment,c),M=!0)},o(c){_(T.$$.fragment,c),M=!1},d(c){c&&(n(r),n(y)),b(T,c)}}}function ht(ee){let r,w,y,T,M,c,x,Ye='<a href="https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0" rel="nofollow">Lumina Image 2.0: A Unified and Efficient Image Generative Model</a> is a 2 billion parameter flow-based diffusion transformer capable of generating diverse images from text descriptions.',pe,C,De="The abstract from the paper is:",de,E,ze="<em>We introduce Lumina-Image 2.0, an advanced text-to-image model that surpasses previous state-of-the-art methods across multiple benchmarks, while also shedding light on its potential to evolve into a generalist vision intelligence model. Lumina-Image 2.0 exhibits three key properties: (1) Unification – it adopts a unified architecture that treats text and image tokens as a joint sequence, enabling natural cross-modal interactions and facilitating task expansion. Besides, since high-quality captioners can provide semantically better-aligned text-image training pairs, we introduce a unified captioning system, UniCaptioner, which generates comprehensive and precise captions for the model. This not only accelerates model convergence but also enhances prompt adherence, variable-length prompt handling, and task generalization via prompt templates. (2) Efficiency – to improve the efficiency of the unified architecture, we develop a set of optimization techniques that improve semantic learning and fine-grained texture generation during training while incorporating inference-time acceleration strategies without compromising image quality. (3) Transparency – we open-source all training details, code, and models to ensure full reproducibility, aiming to bridge the gap between well-resourced closed-source research teams and independent developers.</em>",ce,J,ue,V,ge,B,Qe="Single file loading for Lumina Image 2.0 is available for the <code>Lumina2Transformer2DModel</code>",fe,F,he,H,_e,R,Se="GGUF Quantized checkpoints for the <code>Lumina2Transformer2DModel</code> can be loaded via <code>from_single_file</code> with the <code>GGUFQuantizationConfig</code>",be,q,ye,N,ve,p,X,ke,te,Ae="Pipeline for text-to-image generation using Lumina-T2I.",Ze,ne,Oe=`This model inherits from <a href="/docs/diffusers/pr_10727/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods the
	library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)`,je,I,Y,Ge,ie,Ke="Function invoked when calling the pipeline for generation.",Pe,k,We,Z,D,Ce,ae,et=`Disable sliced VAE decoding. If <code>enable_vae_slicing</code> was previously enabled, this method will go back to
	computing decoding in one step.`,Ee,j,z,Ve,oe,tt=`Disable tiled VAE decoding. If <code>enable_vae_tiling</code> was previously enabled, this method will go back to
	computing decoding in one step.`,Be,G,Q,Fe,se,nt=`Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
	compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.`,He,P,S,Re,re,it=`Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
	compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
	processing larger images.`,qe,W,A,Ne,le,at="Encodes the prompt into text encoder hidden states.",Te,O,Me,me,xe;return M=new Je({props:{title:"Lumina2",local:"lumina2",headingTag:"h1"}}),J=new dt({props:{$$slots:{default:[gt]},$$scope:{ctx:ee}}}),V=new Je({props:{title:"Using Single File loading with Lumina Image 2.0",local:"using-single-file-loading-with-lumina-image-20",headingTag:"h2"}}),F=new Xe({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTHVtaW5hMlRyYW5zZm9ybWVyMkRNb2RlbCUyQyUyMEx1bWluYTJUZXh0MkltZ1BpcGVsaW5lJTBBJTBBY2twdF9wYXRoJTIwJTNEJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRkFscGhhLVZMTE0lMkZMdW1pbmEtSW1hZ2UtMi4wJTJGYmxvYiUyRm1haW4lMkZjb25zb2xpZGF0ZWQuMDAtb2YtMDEucHRoJTIyJTBBdHJhbnNmb3JtZXIlMjAlM0QlMjBMdW1pbmEyVHJhbnNmb3JtZXIyRE1vZGVsLmZyb21fc2luZ2xlX2ZpbGUoJTBBJTIwJTIwJTIwJTIwY2twdF9wYXRoJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUwQSklMEElMEFwaXBlJTIwJTNEJTIwTHVtaW5hMlRleHQySW1nUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMkFscGhhLVZMTE0lMkZMdW1pbmEtSW1hZ2UtMi4wJTIyJTJDJTIwdHJhbnNmb3JtZXIlM0R0cmFuc2Zvcm1lciUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQWltYWdlJTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjAlMjJhJTIwY2F0JTIwaG9sZGluZyUyMGElMjBzaWduJTIwdGhhdCUyMHNheXMlMjBoZWxsbyUyMiUyQyUwQSUyMCUyMCUyMCUyMGdlbmVyYXRvciUzRHRvcmNoLkdlbmVyYXRvciglMjJjcHUlMjIpLm1hbnVhbF9zZWVkKDApJTJDJTBBKS5pbWFnZXMlNUIwJTVEJTBBaW1hZ2Uuc2F2ZSglMjJsdW1pbmEtc2luZ2xlLWZpbGUucG5nJTIyKSUwQQ==",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> Lumina2Transformer2DModel, Lumina2Text2ImgPipeline

	ckpt_path = <span class="hljs-string">"https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0/blob/main/consolidated.00-of-01.pth"</span>
	transformer = Lumina2Transformer2DModel.from_single_file(
	ckpt_path, torch_dtype=torch.bfloat16
	)

	pipe = Lumina2Text2ImgPipeline.from_pretrained(
	<span class="hljs-string">"Alpha-VLLM/Lumina-Image-2.0"</span>, transformer=transformer, torch_dtype=torch.bfloat16
	)
	pipe.enable_model_cpu_offload()
	image = pipe(
	<span class="hljs-string">"a cat holding a sign that says hello"</span>,
	generator=torch.Generator(<span class="hljs-string">"cpu"</span>).manual_seed(<span class="hljs-number">0</span>),
	).images[<span class="hljs-number">0</span>]
	image.save(<span class="hljs-string">"lumina-single-file.png"</span>)
	`,wrap:!1}}),H=new Je({props:{title:"Using GGUF Quantized Checkpoints with Lumina Image 2.0",local:"using-gguf-quantized-checkpoints-with-lumina-image-20",headingTag:"h2"}}),q=new Xe({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEx1bWluYTJUcmFuc2Zvcm1lcjJETW9kZWwlMkMlMjBMdW1pbmEyVGV4dDJJbWdQaXBlbGluZSUyQyUyMEdHVUZRdWFudGl6YXRpb25Db25maWclMjAlMEElMEFja3B0X3BhdGglMjAlM0QlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGY2FsY3VpcyUyRmx1bWluYS1nZ3VmJTJGYmxvYiUyRm1haW4lMkZsdW1pbmEyLXE0XzAuZ2d1ZiUyMiUwQXRyYW5zZm9ybWVyJTIwJTNEJTIwTHVtaW5hMlRyYW5zZm9ybWVyMkRNb2RlbC5mcm9tX3NpbmdsZV9maWxlKCUwQSUyMCUyMCUyMCUyMGNrcHRfcGF0aCUyQyUwQSUyMCUyMCUyMCUyMHF1YW50aXphdGlvbl9jb25maWclM0RHR1VGUXVhbnRpemF0aW9uQ29uZmlnKGNvbXB1dGVfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTJDJTBBKSUwQSUwQXBpcGUlMjAlM0QlMjBMdW1pbmEyVGV4dDJJbWdQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyQWxwaGEtVkxMTSUyRkx1bWluYS1JbWFnZS0yLjAlMjIlMkMlMjB0cmFuc2Zvcm1lciUzRHRyYW5zZm9ybWVyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUwQSklMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMCUyMmElMjBjYXQlMjBob2xkaW5nJTIwYSUyMHNpZ24lMjB0aGF0JTIwc2F5cyUyMGhlbGxvJTIyJTJDJTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yJTNEdG9yY2guR2VuZXJhdG9yKCUyMmNwdSUyMikubWFudWFsX3NlZWQoMCklMkMlMEEpLmltYWdlcyU1QjAlNUQlMEFpbWFnZS5zYXZlKCUyMmx1bWluYS1nZ3VmLnBuZyUyMik=",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> Lumina2Transformer2DModel, Lumina2Text2ImgPipeline, GGUFQuantizationConfig

	ckpt_path = <span class="hljs-string">"https://huggingface.co/calcuis/lumina-gguf/blob/main/lumina2-q4_0.gguf"</span>
	transformer = Lumina2Transformer2DModel.from_single_file(
	ckpt_path,
	quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
	torch_dtype=torch.bfloat16,
	)

	pipe = Lumina2Text2ImgPipeline.from_pretrained(
	<span class="hljs-string">"Alpha-VLLM/Lumina-Image-2.0"</span>, transformer=transformer, torch_dtype=torch.bfloat16
	)
	pipe.enable_model_cpu_offload()
	image = pipe(
	<span class="hljs-string">"a cat holding a sign that says hello"</span>,
	generator=torch.Generator(<span class="hljs-string">"cpu"</span>).manual_seed(<span class="hljs-number">0</span>),
	).images[<span class="hljs-number">0</span>]
	image.save(<span class="hljs-string">"lumina-gguf.png"</span>)`,wrap:!1}}),N=new Je({props:{title:"Lumina2Text2ImgPipeline",local:"diffusers.Lumina2Text2ImgPipeline",headingTag:"h2"}}),X=new K({props:{name:"class diffusers.Lumina2Text2ImgPipeline",anchor:"diffusers.Lumina2Text2ImgPipeline",parameters:[{name:"transformer",val:": Lumina2Transformer2DModel"},{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": AutoModel"},{name:"tokenizer",val:": AutoTokenizer"}],parametersDescription:[{anchor:"diffusers.Lumina2Text2ImgPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_10727/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) —
	Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.Lumina2Text2ImgPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>AutoModel</code>) —
	Frozen text-encoder. Lumina-T2I uses
	<a href="https://huggingface.co/docs/transformers/model_doc/t5#transformers.AutoModel" rel="nofollow">T5</a>, specifically the
	<a href="https://huggingface.co/Alpha-VLLM/tree/main/t5-v1_1-xxl" rel="nofollow">t5-v1_1-xxl</a> variant.`,name:"text_encoder"},{anchor:"diffusers.Lumina2Text2ImgPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>AutoModel</code>) —
	Tokenizer of class
	<a href="https://huggingface.co/docs/transformers/model_doc/t5#transformers.AutoModel" rel="nofollow">AutoModel</a>.`,name:"tokenizer"},{anchor:"diffusers.Lumina2Text2ImgPipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_10727/en/api/models/transformer2d#diffusers.Transformer2DModel">Transformer2DModel</a>) —
	A text conditioned <code>Transformer2DModel</code> to denoise the encoded image latents.`,name:"transformer"},{anchor:"diffusers.Lumina2Text2ImgPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_10727/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) —
	A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L143"}}),Y=new K({props:{name:"__call__",anchor:"diffusers.Lumina2Text2ImgPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"height",val:": typing.Optional[int] = None"},{name:"num_inference_steps",val:": int = 30"},{name:"guidance_scale",val:": float = 4.0"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"sigmas",val:": typing.List[float] = None"},{name:"num_images_per_prompt",val:": typing.Optional[int] = 1"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int, typing.Dict], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"system_prompt",val:": typing.Optional[str] = None"},{name:"cfg_trunc_ratio",val:": float = 1.0"},{name:"cfg_normalization",val:": bool = True"},{name:"use_mask_in_transformer",val:": bool = True"},{name:"max_sequence_length",val:": int = 256"}],parametersDescription:[{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>.
	instead.`,name:"prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts not to guide the image generation. If not defined, one has to pass
	<code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is
	less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 30) —
	The number of denoising steps. More denoising steps usually lead to a higher quality image at the
	expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) —
	Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in
	their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed
	will be used.`,name:"sigmas"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 4.0) —
	Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>.
	<code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen
	Paper</a>. Guidance scale is enabled by setting <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to the text <code>prompt</code>,
	usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	The number of images to generate per prompt.`,name:"num_images_per_prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size) —
	The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size) —
	The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) —
	Corresponds to parameter eta (η) in the DDIM paper: <a href="https://arxiv.org/abs/2010.02502" rel="nofollow">https://arxiv.org/abs/2010.02502</a>. Only applies to
	<a href="/docs/diffusers/pr_10727/en/api/schedulers/ddim#diffusers.DDIMScheduler">schedulers.DDIMScheduler</a>, will be ignored for others.`,name:"eta"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) —
	One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a>
	to make generation deterministic.`,name:"generator"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
	generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
	tensor will ge generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
	provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.prompt_attention_mask",description:"<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — Pre-generated attention mask for text embeddings.",name:"prompt_attention_mask"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated negative text embeddings. For Lumina-T2I this negative prompt should be "". If not
	provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated attention mask for negative text embeddings.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) —
	The output format of the generate image. Choose between
	<a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether or not to return a <code>~pipelines.stable_diffusion.IFPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) —
	A function that calls at the end of each denoising steps during the inference. The function is called
	with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by
	<code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) —
	The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list
	will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the
	<code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.system_prompt",description:`<strong>system_prompt</strong> (<code>str</code>, <em>optional</em>) —
	The system prompt to use for the image generation.`,name:"system_prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.cfg_trunc_ratio",description:`<strong>cfg_trunc_ratio</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1.0</code>) —
	The ratio of the timestep interval to apply normalization-based guidance scale.`,name:"cfg_trunc_ratio"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.cfg_normalization",description:`<strong>cfg_normalization</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether to apply normalization-based guidance scale.`,name:"cfg_normalization"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.use_mask_in_transformer",description:`<strong>use_mask_in_transformer</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether to use attention mask in <code>Lumina2Transformer2DModel</code>. Set <code>False</code> for performance gain.`,name:"use_mask_in_transformer"},{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code>, defaults to <code>256</code>) —
	Maximum sequence length to use with the <code>prompt</code>.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L503",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>If <code>return_dict</code> is <code>True</code>, <a
	href="/docs/diffusers/pr_10727/en/api/pipelines/unclip#diffusers.ImagePipelineOutput"
	>ImagePipelineOutput</a> is returned, otherwise a <code>tuple</code> is
	returned where the first element is a list with the generated images</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><a
	href="/docs/diffusers/pr_10727/en/api/pipelines/unclip#diffusers.ImagePipelineOutput"
	>ImagePipelineOutput</a> or <code>tuple</code></p>
	`}}),k=new ct({props:{anchor:"diffusers.Lumina2Text2ImgPipeline.__call__.example",$$slots:{default:[ft]},$$scope:{ctx:ee}}}),D=new K({props:{name:"disable_vae_slicing",anchor:"diffusers.Lumina2Text2ImgPipeline.disable_vae_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L445"}}),z=new K({props:{name:"disable_vae_tiling",anchor:"diffusers.Lumina2Text2ImgPipeline.disable_vae_tiling",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L460"}}),Q=new K({props:{name:"enable_vae_slicing",anchor:"diffusers.Lumina2Text2ImgPipeline.enable_vae_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L438"}}),S=new K({props:{name:"enable_vae_tiling",anchor:"diffusers.Lumina2Text2ImgPipeline.enable_vae_tiling",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L452"}}),A=new K({props:{name:"encode_prompt",anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"num_images_per_prompt",val:": int = 1"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"system_prompt",val:": typing.Optional[str] = None"},{name:"max_sequence_length",val:": int = 256"}],parametersDescription:[{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	prompt to be encoded`,name:"prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt not to guide the image generation. If not defined, one has to pass <code>negative_prompt_embeds</code>
	instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is less than <code>1</code>). For
	Lumina-T2I, this should be "".`,name:"negative_prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>):
	torch device to place the resulting embeddings on`,name:"device"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
	provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated negative text embeddings. For Lumina-T2I, it’s should be the embeddings of the "" string.`,name:"negative_prompt_embeds"},{anchor:"diffusers.Lumina2Text2ImgPipeline.encode_prompt.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code>, defaults to <code>256</code>) —
	Maximum sequence length to use for the prompt.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_10727/src/diffusers/pipelines/lumina2/pipeline_lumina2.py#L247"}}),O=new ut({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/lumina2.md"}}),{c(){r=l("meta"),w=i(),y=l("p"),T=i(),u(M.$$.fragment),c=i(),x=l("p"),x.innerHTML=Ye,pe=i(),C=l("p"),C.textContent=De,de=i(),E=l("p"),E.innerHTML=ze,ce=i(),u(J.$$.fragment),ue=i(),u(V.$$.fragment),ge=i(),B=l("p"),B.innerHTML=Qe,fe=i(),u(F.$$.fragment),he=i(),u(H.$$.fragment),_e=i(),R=l("p"),R.innerHTML=Se,be=i(),u(q.$$.fragment),ye=i(),u(N.$$.fragment),ve=i(),p=l("div"),u(X.$$.fragment),ke=i(),te=l("p"),te.textContent=Ae,Ze=i(),ne=l("p"),ne.innerHTML=Oe,je=i(),I=l("div"),u(Y.$$.fragment),Ge=i(),ie=l("p"),ie.textContent=Ke,Pe=i(),u(k.$$.fragment),We=i(),Z=l("div"),u(D.$$.fragment),Ce=i(),ae=l("p"),ae.innerHTML=et,Ee=i(),j=l("div"),u(z.$$.fragment),Ve=i(),oe=l("p"),oe.innerHTML=tt,Be=i(),G=l("div"),u(Q.$$.fragment),Fe=i(),se=l("p"),se.textContent=nt,He=i(),P=l("div"),u(S.$$.fragment),Re=i(),re=l("p"),re.textContent=it,qe=i(),W=l("div"),u(A.$$.fragment),Ne=i(),le=l("p"),le.textContent=at,Te=i(),u(O.$$.fragment),Me=i(),me=l("p"),this.h()},l(e){const t=pt("svelte-u9bgzb",document.head);r=m(t,"META",{name:!0,content:!0}),t.forEach(n),w=a(e),y=m(e,"P",{}),$(y).forEach(n),T=a(e),g(M.$$.fragment,e),c=a(e),x=m(e,"P",{"data-svelte-h":!0}),v(x)!=="svelte-1yt6rrf"&&(x.innerHTML=Ye),pe=a(e),C=m(e,"P",{"data-svelte-h":!0}),v(C)!=="svelte-1cwsb16"&&(C.textContent=De),de=a(e),E=m(e,"P",{"data-svelte-h":!0}),v(E)!=="svelte-14hqiub"&&(E.innerHTML=ze),ce=a(e),g(J.$$.fragment,e),ue=a(e),g(V.$$.fragment,e),ge=a(e),B=m(e,"P",{"data-svelte-h":!0}),v(B)!=="svelte-k5cnxs"&&(B.innerHTML=Qe),fe=a(e),g(F.$$.fragment,e),he=a(e),g(H.$$.fragment,e),_e=a(e),R=m(e,"P",{"data-svelte-h":!0}),v(R)!=="svelte-1sse30f"&&(R.innerHTML=Se),be=a(e),g(q.$$.fragment,e),ye=a(e),g(N.$$.fragment,e),ve=a(e),p=m(e,"DIV",{class:!0});var d=$(p);g(X.$$.fragment,d),ke=a(d),te=m(d,"P",{"data-svelte-h":!0}),v(te)!=="svelte-1ennvvi"&&(te.textContent=Ae),Ze=a(d),ne=m(d,"P",{"data-svelte-h":!0}),v(ne)!=="svelte-1ipmcmq"&&(ne.innerHTML=Oe),je=a(d),I=m(d,"DIV",{class:!0});var L=$(I);g(Y.$$.fragment,L),Ge=a(L),ie=m(L,"P",{"data-svelte-h":!0}),v(ie)!=="svelte-v78lg8"&&(ie.textContent=Ke),Pe=a(L),g(k.$$.fragment,L),L.forEach(n),We=a(d),Z=m(d,"DIV",{class:!0});var we=$(Z);g(D.$$.fragment,we),Ce=a(we),ae=m(we,"P",{"data-svelte-h":!0}),v(ae)!=="svelte-1s3c06i"&&(ae.innerHTML=et),we.forEach(n),Ee=a(d),j=m(d,"DIV",{class:!0});var Ie=$(j);g(z.$$.fragment,Ie),Ve=a(Ie),oe=m(Ie,"P",{"data-svelte-h":!0}),v(oe)!=="svelte-pkn4ui"&&(oe.innerHTML=tt),Ie.forEach(n),Be=a(d),G=m(d,"DIV",{class:!0});var Le=$(G);g(Q.$$.fragment,Le),Fe=a(Le),se=m(Le,"P",{"data-svelte-h":!0}),v(se)!=="svelte-14bnrb6"&&(se.textContent=nt),Le.forEach(n),He=a(d),P=m(d,"DIV",{class:!0});var $e=$(P);g(S.$$.fragment,$e),Re=a($e),re=m($e,"P",{"data-svelte-h":!0}),v(re)!=="svelte-1xwrf7t"&&(re.textContent=it),$e.forEach(n),qe=a(d),W=m(d,"DIV",{class:!0});var Ue=$(W);g(A.$$.fragment,Ue),Ne=a(Ue),le=m(Ue,"P",{"data-svelte-h":!0}),v(le)!=="svelte-16q0ax1"&&(le.textContent=at),Ue.forEach(n),d.forEach(n),Te=a(e),g(O.$$.fragment,e),Me=a(e),me=m(e,"P",{}),$(me).forEach(n),this.h()},h(){U(r,"name","hf:doc:metadata"),U(r,"content",_t),U(I,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),U(Z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),U(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),U(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),U(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),U(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),U(p,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){o(document.head,r),s(e,w,t),s(e,y,t),s(e,T,t),f(M,e,t),s(e,c,t),s(e,x,t),s(e,pe,t),s(e,C,t),s(e,de,t),s(e,E,t),s(e,ce,t),f(J,e,t),s(e,ue,t),f(V,e,t),s(e,ge,t),s(e,B,t),s(e,fe,t),f(F,e,t),s(e,he,t),f(H,e,t),s(e,_e,t),s(e,R,t),s(e,be,t),f(q,e,t),s(e,ye,t),f(N,e,t),s(e,ve,t),s(e,p,t),f(X,p,null),o(p,ke),o(p,te),o(p,Ze),o(p,ne),o(p,je),o(p,I),f(Y,I,null),o(I,Ge),o(I,ie),o(I,Pe),f(k,I,null),o(p,We),o(p,Z),f(D,Z,null),o(Z,Ce),o(Z,ae),o(p,Ee),o(p,j),f(z,j,null),o(j,Ve),o(j,oe),o(p,Be),o(p,G),f(Q,G,null),o(G,Fe),o(G,se),o(p,He),o(p,P),f(S,P,null),o(P,Re),o(P,re),o(p,qe),o(p,W),f(A,W,null),o(W,Ne),o(W,le),s(e,Te,t),f(O,e,t),s(e,Me,t),s(e,me,t),xe=!0},p(e,[t]){const d={};t&2&&(d.$$scope={dirty:t,ctx:e}),J.$set(d);const L={};t&2&&(L.$$scope={dirty:t,ctx:e}),k.$set(L)},i(e){xe\|\|(h(M.$$.fragment,e),h(J.$$.fragment,e),h(V.$$.fragment,e),h(F.$$.fragment,e),h(H.$$.fragment,e),h(q.$$.fragment,e),h(N.$$.fragment,e),h(X.$$.fragment,e),h(Y.$$.fragment,e),h(k.$$.fragment,e),h(D.$$.fragment,e),h(z.$$.fragment,e),h(Q.$$.fragment,e),h(S.$$.fragment,e),h(A.$$.fragment,e),h(O.$$.fragment,e),xe=!0)},o(e){_(M.$$.fragment,e),_(J.$$.fragment,e),_(V.$$.fragment,e),_(F.$$.fragment,e),_(H.$$.fragment,e),_(q.$$.fragment,e),_(N.$$.fragment,e),_(X.$$.fragment,e),_(Y.$$.fragment,e),_(k.$$.fragment,e),_(D.$$.fragment,e),_(z.$$.fragment,e),_(Q.$$.fragment,e),_(S.$$.fragment,e),_(A.$$.fragment,e),_(O.$$.fragment,e),xe=!1},d(e){e&&(n(w),n(y),n(T),n(c),n(x),n(pe),n(C),n(de),n(E),n(ce),n(ue),n(ge),n(B),n(fe),n(he),n(_e),n(R),n(be),n(ye),n(ve),n(p),n(Te),n(Me),n(me)),n(r),b(M,e),b(J,e),b(V,e),b(F,e),b(H,e),b(q,e),b(N,e),b(X),b(Y),b(k),b(D),b(z),b(Q),b(S),b(A),b(O,e)}}}const _t='{"title":"Lumina2","local":"lumina2","sections":[{"title":"Using Single File loading with Lumina Image 2.0","local":"using-single-file-loading-with-lumina-image-20","sections":[],"depth":2},{"title":"Using GGUF Quantized Checkpoints with Lumina Image 2.0","local":"using-gguf-quantized-checkpoints-with-lumina-image-20","sections":[],"depth":2},{"title":"Lumina2Text2ImgPipeline","local":"diffusers.Lumina2Text2ImgPipeline","sections":[],"depth":2}],"depth":1}';function bt(ee){return rt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Lt extends lt{constructor(r){super(),mt(this,r,bt,ht,st,{})}}export{Lt as component};

Xet Storage Details

Size:: 35.9 kB
Xet hash:: 260c37a246b10ab568a00108aebfd75a8c911fbe775ce537efc2f40a168c853d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.