Buckets:

hf-doc-build
/

doc

hf-doc-build/doc / diffusers /v0.37.0 /en /_app /immutable /nodes /145.f112b2ce.js

rtrm's picture

about 2 months ago

37.3 kB

	import{s as He,o as Ee,n as We}from"../chunks/scheduler.53228c21.js";import{S as Ke,i as ze,e as c,s as a,c as g,h as De,a as d,d as n,b as l,f as ee,g as h,j as M,k as B,l as p,m as s,n as _,t as x,o as b,p as w}from"../chunks/index.100fac89.js";import{C as Ve}from"../chunks/CopyLLMTxtMenu.1b09a166.js";import{D as ge}from"../chunks/Docstring.d8d965a7.js";import{C as qe}from"../chunks/CodeBlock.d30a6509.js";import{E as Ne}from"../chunks/ExampleCodeBlock.5ce6dff3.js";import{H as he,E as Qe}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.16f783a9.js";function Se(V){let i,T="Examples:",u,r,m;return r=new qe({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRmx1eDJQaXBlbGluZSUwQSUwQXBpcGUlMjAlM0QlMjBGbHV4MlBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJibGFjay1mb3Jlc3QtbGFicyUyRkZMVVguMi1kZXYlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQXBpcGUudG8oJTIyY3VkYSUyMiklMEFwcm9tcHQlMjAlM0QlMjAlMjJBJTIwY2F0JTIwaG9sZGluZyUyMGElMjBzaWduJTIwdGhhdCUyMHNheXMlMjBoZWxsbyUyMHdvcmxkJTIyJTBBJTIzJTIwRGVwZW5kaW5nJTIwb24lMjB0aGUlMjB2YXJpYW50JTIwYmVpbmclMjB1c2VkJTJDJTIwdGhlJTIwcGlwZWxpbmUlMjBjYWxsJTIwd2lsbCUyMHNsaWdodGx5JTIwdmFyeS4lMEElMjMlMjBSZWZlciUyMHRvJTIwdGhlJTIwcGlwZWxpbmUlMjBkb2N1bWVudGF0aW9uJTIwZm9yJTIwbW9yZSUyMGRldGFpbHMuJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKHByb21wdCUyQyUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0Q1MCUyQyUyMGd1aWRhbmNlX3NjYWxlJTNEMi41KS5pbWFnZXMlNUIwJTVEJTBBaW1hZ2Uuc2F2ZSglMjJmbHV4LnBuZyUyMik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> Flux2Pipeline

	<span class="hljs-meta">>>> </span>pipe = Flux2Pipeline.from_pretrained(<span class="hljs-string">"black-forest-labs/FLUX.2-dev"</span>, torch_dtype=torch.bfloat16)
	<span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>)
	<span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"A cat holding a sign that says hello world"</span>
	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Depending on the variant being used, the pipeline call will slightly vary.</span>
	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Refer to the pipeline documentation for more details.</span>
	<span class="hljs-meta">>>> </span>image = pipe(prompt, num_inference_steps=<span class="hljs-number">50</span>, guidance_scale=<span class="hljs-number">2.5</span>).images[<span class="hljs-number">0</span>]
	<span class="hljs-meta">>>> </span>image.save(<span class="hljs-string">"flux.png"</span>)`,wrap:!1}}),{c(){i=c("p"),i.textContent=T,u=a(),g(r.$$.fragment)},l(o){i=d(o,"P",{"data-svelte-h":!0}),M(i)!=="svelte-kvfsh7"&&(i.textContent=T),u=l(o),h(r.$$.fragment,o)},m(o,f){s(o,i,f),s(o,u,f),_(r,o,f),m=!0},p:We,i(o){m\|\|(x(r.$$.fragment,o),m=!0)},o(o){b(r.$$.fragment,o),m=!1},d(o){o&&(n(i),n(u)),w(r,o)}}}function Ae(V){let i,T="Examples:",u,r,m;return r=new qe({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRmx1eDJLbGVpblBpcGVsaW5lJTBBJTBBcGlwZSUyMCUzRCUyMEZsdXgyS2xlaW5QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyYmxhY2stZm9yZXN0LWxhYnMlMkZGTFVYLjIta2xlaW4tYmFzZS05QiUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpJTBBcGlwZS50byglMjJjdWRhJTIyKSUwQXByb21wdCUyMCUzRCUyMCUyMkElMjBjYXQlMjBob2xkaW5nJTIwYSUyMHNpZ24lMjB0aGF0JTIwc2F5cyUyMGhlbGxvJTIwd29ybGQlMjIlMEElMjMlMjBEZXBlbmRpbmclMjBvbiUyMHRoZSUyMHZhcmlhbnQlMjBiZWluZyUyMHVzZWQlMkMlMjB0aGUlMjBwaXBlbGluZSUyMGNhbGwlMjB3aWxsJTIwc2xpZ2h0bHklMjB2YXJ5LiUwQSUyMyUyMFJlZmVyJTIwdG8lMjB0aGUlMjBwaXBlbGluZSUyMGRvY3VtZW50YXRpb24lMjBmb3IlMjBtb3JlJTIwZGV0YWlscy4lMEFpbWFnZSUyMCUzRCUyMHBpcGUocHJvbXB0JTJDJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDUwJTJDJTIwZ3VpZGFuY2Vfc2NhbGUlM0Q0LjApLmltYWdlcyU1QjAlNUQlMEFpbWFnZS5zYXZlKCUyMmZsdXgyX291dHB1dC5wbmclMjIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> Flux2KleinPipeline

	<span class="hljs-meta">>>> </span>pipe = Flux2KleinPipeline.from_pretrained(
	<span class="hljs-meta">... </span> <span class="hljs-string">"black-forest-labs/FLUX.2-klein-base-9B"</span>, torch_dtype=torch.bfloat16
	<span class="hljs-meta">... </span>)
	<span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>)
	<span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"A cat holding a sign that says hello world"</span>
	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Depending on the variant being used, the pipeline call will slightly vary.</span>
	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Refer to the pipeline documentation for more details.</span>
	<span class="hljs-meta">>>> </span>image = pipe(prompt, num_inference_steps=<span class="hljs-number">50</span>, guidance_scale=<span class="hljs-number">4.0</span>).images[<span class="hljs-number">0</span>]
	<span class="hljs-meta">>>> </span>image.save(<span class="hljs-string">"flux2_output.png"</span>)`,wrap:!1}}),{c(){i=c("p"),i.textContent=T,u=a(),g(r.$$.fragment)},l(o){i=d(o,"P",{"data-svelte-h":!0}),M(i)!=="svelte-kvfsh7"&&(i.textContent=T),u=l(o),h(r.$$.fragment,o)},m(o,f){s(o,i,f),s(o,u,f),_(r,o,f),m=!0},p:We,i(o){m\|\|(x(r.$$.fragment,o),m=!0)},o(o){b(r.$$.fragment,o),m=!1},d(o){o&&(n(i),n(u)),w(r,o)}}}function Xe(V){let i,T,u,r,m,o,f,ne,I,Pe='<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/> <img alt="MPS" src="https://img.shields.io/badge/MPS-000000?style=flat&logo=apple&logoColor=white%22"/>',oe,L,ke='Flux.2 is the recent series of image generation models from Black Forest Labs, preceded by the <a href="./flux">Flux.1</a> series. It is an entirely new model with a new architecture and pre-training done from scratch!',se,U,Ie='Original model checkpoints for Flux can be found <a href="https://huggingface.co/black-forest-labs" rel="nofollow">here</a>. Original inference code can be found <a href="https://github.com/black-forest-labs/flux2" rel="nofollow">here</a>.',ae,j,je='<p>Flux2 can be quite expensive to run on consumer hardware devices. However, you can perform a suite of optimizations to run it faster and in a more memory-friendly manner. Check out <a href="https://huggingface.co/blog/sd3#memory-optimizations-for-sd3" rel="nofollow">this section</a> for more details. Additionally, Flux can benefit from quantization for memory efficiency with a trade-off in inference latency. Refer to <a href="https://huggingface.co/blog/quanto-diffusers" rel="nofollow">this blog post</a> to learn more.</p> <p><a href="../../optimization/cache">Caching</a> may also speed up inference by storing and reusing intermediate outputs.</p>',le,Z,ie,N,Ce=`Flux.2 can potentially generate better better outputs with better prompts. We can “upsample”
	an input prompt by setting the <code>caption_upsample_temperature</code> argument in the pipeline call arguments.
	The <a href="https://github.com/black-forest-labs/flux2/blob/5a5d316b1b42f6b59a8c9194b77c8256be848432/src/flux2/text_encoder.py#L140" rel="nofollow">official implementation</a> recommends this value to be 0.15.`,re,W,ce,y,q,_e,Q,Je="The Flux2 pipeline for text-to-image generation.",xe,S,Ge='Reference: <a href="https://bfl.ai/blog/flux-2" rel="nofollow">https://bfl.ai/blog/flux-2</a>',be,$,H,we,A,Be="Function invoked when calling the pipeline for generation.",ye,C,de,E,pe,v,K,ve,X,Le="The Flux2 Klein pipeline for text-to-image generation.",Me,Y,Ue=`Reference:
	<a href="https://bfl.ai/blog/flux2-klein-towards-interactive-visual-intelligence" rel="nofollow">https://bfl.ai/blog/flux2-klein-towards-interactive-visual-intelligence</a>`,Fe,P,z,Te,R,Ze="Function invoked when calling the pipeline for generation.",$e,J,me,D,ue,te,fe;return m=new Ve({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),f=new he({props:{title:"Flux2",local:"flux2",headingTag:"h1"}}),Z=new he({props:{title:"Caption upsampling",local:"caption-upsampling",headingTag:"h2"}}),W=new he({props:{title:"Flux2Pipeline",local:"diffusers.Flux2Pipeline",headingTag:"h2"}}),q=new ge({props:{name:"class diffusers.Flux2Pipeline",anchor:"diffusers.Flux2Pipeline",parameters:[{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"vae",val:": AutoencoderKLFlux2"},{name:"text_encoder",val:": Mistral3ForConditionalGeneration"},{name:"tokenizer",val:": AutoProcessor"},{name:"transformer",val:": Flux2Transformer2DModel"}],parametersDescription:[{anchor:"diffusers.Flux2Pipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/v0.37.0/en/api/models/flux2_transformer#diffusers.Flux2Transformer2DModel">Flux2Transformer2DModel</a>) —
	Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.`,name:"transformer"},{anchor:"diffusers.Flux2Pipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/v0.37.0/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler">FlowMatchEulerDiscreteScheduler</a>) —
	A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"},{anchor:"diffusers.Flux2Pipeline.vae",description:`<strong>vae</strong> (<code>AutoencoderKLFlux2</code>) —
	Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.Flux2Pipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>Mistral3ForConditionalGeneration</code>) —
	<a href="https://huggingface.co/docs/transformers/en/model_doc/mistral3#transformers.Mistral3ForConditionalGeneration" rel="nofollow">Mistral3ForConditionalGeneration</a>`,name:"text_encoder"},{anchor:"diffusers.Flux2Pipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>AutoProcessor</code>) —
	Tokenizer of class
	<a href="https://huggingface.co/docs/transformers/en/model_doc/pixtral#transformers.PixtralProcessor" rel="nofollow">PixtralProcessor</a>.`,name:"tokenizer"}],source:"https://github.com/huggingface/diffusers/blob/v0.37.0/src/diffusers/pipelines/flux2/pipeline_flux2.py#L251"}}),H=new ge({props:{name:"__call__",anchor:"diffusers.Flux2Pipeline.__call__",parameters:[{name:"image",val:": list[PIL.Image.Image, PIL.Image.Image] \| None = None"},{name:"prompt",val:": str \| list[str] = None"},{name:"height",val:": int \| None = None"},{name:"width",val:": int \| None = None"},{name:"num_inference_steps",val:": int = 50"},{name:"sigmas",val:": list[float] \| None = None"},{name:"guidance_scale",val:": float \| None = 4.0"},{name:"num_images_per_prompt",val:": int = 1"},{name:"generator",val:": torch._C.Generator \| list[torch._C.Generator] \| None = None"},{name:"latents",val:": torch.Tensor \| None = None"},{name:"prompt_embeds",val:": torch.Tensor \| None = None"},{name:"output_type",val:": str \| None = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": dict[str, typing.Any] \| None = None"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": list = ['latents']"},{name:"max_sequence_length",val:": int = 512"},{name:"text_encoder_out_layers",val:": tuple = (10, 20, 30)"},{name:"caption_upsample_temperature",val:": float = None"}],parametersDescription:[{anchor:"diffusers.Flux2Pipeline.__call__.image",description:`<strong>image</strong> (<code>torch.Tensor</code>, <code>PIL.Image.Image</code>, <code>np.ndarray</code>, <code>list[torch.Tensor]</code>, <code>list[PIL.Image.Image]</code>, or <code>list[np.ndarray]</code>) —
	<code>Image</code>, numpy array or tensor representing an image batch to be used as the starting point. For both
	numpy array and pytorch tensor, the expected value range is between <code>[0, 1]</code> If it’s a tensor or a list
	or tensors, the expected shape should be <code>(B, C, H, W)</code> or <code>(C, H, W)</code>. If it is a numpy array or a
	list of arrays, the expected shape should be <code>(B, H, W, C)</code> or <code>(H, W, C)</code> It can also accept image
	latents as <code>image</code>, but if passing latents directly it is not encoded again.`,name:"image"},{anchor:"diffusers.Flux2Pipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) —
	The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>.
	instead.`,name:"prompt"},{anchor:"diffusers.Flux2Pipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 1.0) —
	Embedded guiddance scale is enabled by setting <code>guidance_scale</code> > 1. Higher <code>guidance_scale</code> encourages
	a model to generate images more aligned with <code>prompt</code> at the expense of lower image quality.</p>
	<p>Guidance-distilled models approximates true classifer-free guidance for <code>guidance_scale</code> > 1. Refer to
	the <a href="https://huggingface.co/papers/2210.03142" rel="nofollow">paper</a> to learn more.`,name:"guidance_scale"},{anchor:"diffusers.Flux2Pipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) —
	The height in pixels of the generated image. This is set to 1024 by default for the best results.`,name:"height"},{anchor:"diffusers.Flux2Pipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) —
	The width in pixels of the generated image. This is set to 1024 by default for the best results.`,name:"width"},{anchor:"diffusers.Flux2Pipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) —
	The number of denoising steps. More denoising steps usually lead to a higher quality image at the
	expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.Flux2Pipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>list[float]</code>, <em>optional</em>) —
	Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in
	their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed
	will be used.`,name:"sigmas"},{anchor:"diffusers.Flux2Pipeline.__call__.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	The number of images to generate per prompt.`,name:"num_images_per_prompt"},{anchor:"diffusers.Flux2Pipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>list[torch.Generator]</code>, <em>optional</em>) —
	One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a>
	to make generation deterministic.`,name:"generator"},{anchor:"diffusers.Flux2Pipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
	generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
	tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.Flux2Pipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
	provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.Flux2Pipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) —
	The output format of the generate image. Choose between
	<a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.Flux2Pipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether or not to return a <code>~pipelines.qwenimage.QwenImagePipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.Flux2Pipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) —
	A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under
	<code>self.processor</code> in
	<a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.Flux2Pipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) —
	A function that calls at the end of each denoising steps during the inference. The function is called
	with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by
	<code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.Flux2Pipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) —
	The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list
	will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the
	<code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.Flux2Pipeline.__call__.max_sequence_length",description:"<strong>max_sequence_length</strong> (<code>int</code> defaults to 512) — Maximum sequence length to use with the <code>prompt</code>.",name:"max_sequence_length"},{anchor:"diffusers.Flux2Pipeline.__call__.text_encoder_out_layers",description:`<strong>text_encoder_out_layers</strong> (<code>tuple[int]</code>) —
	Layer indices to use in the <code>text_encoder</code> to derive the final prompt embeddings.`,name:"text_encoder_out_layers"},{anchor:"diffusers.Flux2Pipeline.__call__.caption_upsample_temperature",description:`<strong>caption_upsample_temperature</strong> (<code>float</code>) —
	When specified, we will try to perform caption upsampling for potentially improved outputs. We
	recommend setting it to 0.15 if caption upsampling is to be performed.`,name:"caption_upsample_temperature"}],source:"https://github.com/huggingface/diffusers/blob/v0.37.0/src/diffusers/pipelines/flux2/pipeline_flux2.py#L743",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>~pipelines.flux2.Flux2PipelineOutput</code> if
	<code>return_dict</code> is True, otherwise a <code>tuple</code>. When returning a tuple, the first element is a list with the
	generated images.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>~pipelines.flux2.Flux2PipelineOutput</code> or <code>tuple</code></p>
	`}}),C=new Ne({props:{anchor:"diffusers.Flux2Pipeline.__call__.example",$$slots:{default:[Se]},$$scope:{ctx:V}}}),E=new he({props:{title:"Flux2KleinPipeline",local:"diffusers.Flux2KleinPipeline",headingTag:"h2"}}),K=new ge({props:{name:"class diffusers.Flux2KleinPipeline",anchor:"diffusers.Flux2KleinPipeline",parameters:[{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"vae",val:": AutoencoderKLFlux2"},{name:"text_encoder",val:": Qwen3ForCausalLM"},{name:"tokenizer",val:": Qwen2TokenizerFast"},{name:"transformer",val:": Flux2Transformer2DModel"},{name:"is_distilled",val:": bool = False"}],parametersDescription:[{anchor:"diffusers.Flux2KleinPipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/v0.37.0/en/api/models/flux2_transformer#diffusers.Flux2Transformer2DModel">Flux2Transformer2DModel</a>) —
	Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.`,name:"transformer"},{anchor:"diffusers.Flux2KleinPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/v0.37.0/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler">FlowMatchEulerDiscreteScheduler</a>) —
	A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"},{anchor:"diffusers.Flux2KleinPipeline.vae",description:`<strong>vae</strong> (<code>AutoencoderKLFlux2</code>) —
	Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.Flux2KleinPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>Qwen3ForCausalLM</code>) —
	<a href="https://huggingface.co/docs/transformers/en/model_doc/qwen3#transformers.Qwen3ForCausalLM" rel="nofollow">Qwen3ForCausalLM</a>`,name:"text_encoder"},{anchor:"diffusers.Flux2KleinPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>Qwen2TokenizerFast</code>) —
	Tokenizer of class
	<a href="https://huggingface.co/docs/transformers/en/model_doc/qwen2#transformers.Qwen2TokenizerFast" rel="nofollow">Qwen2TokenizerFast</a>.`,name:"tokenizer"}],source:"https://github.com/huggingface/diffusers/blob/v0.37.0/src/diffusers/pipelines/flux2/pipeline_flux2_klein.py#L155"}}),z=new ge({props:{name:"__call__",anchor:"diffusers.Flux2KleinPipeline.__call__",parameters:[{name:"image",val:": list[PIL.Image.Image] \| PIL.Image.Image \| None = None"},{name:"prompt",val:": str \| list[str] = None"},{name:"height",val:": int \| None = None"},{name:"width",val:": int \| None = None"},{name:"num_inference_steps",val:": int = 50"},{name:"sigmas",val:": list[float] \| None = None"},{name:"guidance_scale",val:": float = 4.0"},{name:"num_images_per_prompt",val:": int = 1"},{name:"generator",val:": torch._C.Generator \| list[torch._C.Generator] \| None = None"},{name:"latents",val:": torch.Tensor \| None = None"},{name:"prompt_embeds",val:": torch.Tensor \| None = None"},{name:"negative_prompt_embeds",val:": str \| list[str] \| None = None"},{name:"output_type",val:": str = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": dict[str, typing.Any] \| None = None"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int, dict], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": list = ['latents']"},{name:"max_sequence_length",val:": int = 512"},{name:"text_encoder_out_layers",val:": tuple = (9, 18, 27)"}],parametersDescription:[{anchor:"diffusers.Flux2KleinPipeline.__call__.image",description:`<strong>image</strong> (<code>torch.Tensor</code>, <code>PIL.Image.Image</code>, <code>np.ndarray</code>, <code>List[torch.Tensor]</code>, <code>List[PIL.Image.Image]</code>, or <code>List[np.ndarray]</code>) —
	<code>Image</code>, numpy array or tensor representing an image batch to be used as the starting point. For both
	numpy array and pytorch tensor, the expected value range is between <code>[0, 1]</code> If it’s a tensor or a list
	or tensors, the expected shape should be <code>(B, C, H, W)</code> or <code>(C, H, W)</code>. If it is a numpy array or a
	list of arrays, the expected shape should be <code>(B, H, W, C)</code> or <code>(H, W, C)</code> It can also accept image
	latents as <code>image</code>, but if passing latents directly it is not encoded again.`,name:"image"},{anchor:"diffusers.Flux2KleinPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>.
	instead.`,name:"prompt"},{anchor:"diffusers.Flux2KleinPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 4.0) —
	Guidance scale as defined in <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">Classifier-Free Diffusion
	Guidance</a>. <code>guidance_scale</code> is defined as <code>w</code> of equation 2.
	of <a href="https://huggingface.co/papers/2205.11487" rel="nofollow">Imagen Paper</a>. Guidance scale is enabled by setting
	<code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to
	the text <code>prompt</code>, usually at the expense of lower image quality. For step-wise distilled models,
	<code>guidance_scale</code> is ignored.`,name:"guidance_scale"},{anchor:"diffusers.Flux2KleinPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) —
	The height in pixels of the generated image. This is set to 1024 by default for the best results.`,name:"height"},{anchor:"diffusers.Flux2KleinPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) —
	The width in pixels of the generated image. This is set to 1024 by default for the best results.`,name:"width"},{anchor:"diffusers.Flux2KleinPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) —
	The number of denoising steps. More denoising steps usually lead to a higher quality image at the
	expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.Flux2KleinPipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) —
	Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in
	their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed
	will be used.`,name:"sigmas"},{anchor:"diffusers.Flux2KleinPipeline.__call__.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	The number of images to generate per prompt.`,name:"num_images_per_prompt"},{anchor:"diffusers.Flux2KleinPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) —
	One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a>
	to make generation deterministic.`,name:"generator"},{anchor:"diffusers.Flux2KleinPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
	generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
	tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.Flux2KleinPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
	provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.Flux2KleinPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated negative text embeddings. Note that "" is used as the negative prompt in this pipeline.
	If not provided, will be generated from "".`,name:"negative_prompt_embeds"},{anchor:"diffusers.Flux2KleinPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) —
	The output format of the generate image. Choose between
	<a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.Flux2KleinPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether or not to return a <code>~pipelines.qwenimage.QwenImagePipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.Flux2KleinPipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) —
	A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under
	<code>self.processor</code> in
	<a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.Flux2KleinPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) —
	A function that calls at the end of each denoising steps during the inference. The function is called
	with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by
	<code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.Flux2KleinPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) —
	The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list
	will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the
	<code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.Flux2KleinPipeline.__call__.max_sequence_length",description:"<strong>max_sequence_length</strong> (<code>int</code> defaults to 512) — Maximum sequence length to use with the <code>prompt</code>.",name:"max_sequence_length"},{anchor:"diffusers.Flux2KleinPipeline.__call__.text_encoder_out_layers",description:`<strong>text_encoder_out_layers</strong> (<code>tuple[int]</code>) —
	Layer indices to use in the <code>text_encoder</code> to derive the final prompt embeddings.`,name:"text_encoder_out_layers"}],source:"https://github.com/huggingface/diffusers/blob/v0.37.0/src/diffusers/pipelines/flux2/pipeline_flux2_klein.py#L607",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>~pipelines.flux2.Flux2PipelineOutput</code> if
	<code>return_dict</code> is True, otherwise a <code>tuple</code>. When returning a tuple, the first element is a list with the
	generated images.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>~pipelines.flux2.Flux2PipelineOutput</code> or <code>tuple</code></p>
	`}}),J=new Ne({props:{anchor:"diffusers.Flux2KleinPipeline.__call__.example",$$slots:{default:[Ae]},$$scope:{ctx:V}}}),D=new Qe({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/flux2.md"}}),{c(){i=c("meta"),T=a(),u=c("p"),r=a(),g(m.$$.fragment),o=a(),g(f.$$.fragment),ne=a(),I=c("div"),I.innerHTML=Pe,oe=a(),L=c("p"),L.innerHTML=ke,se=a(),U=c("p"),U.innerHTML=Ie,ae=a(),j=c("blockquote"),j.innerHTML=je,le=a(),g(Z.$$.fragment),ie=a(),N=c("p"),N.innerHTML=Ce,re=a(),g(W.$$.fragment),ce=a(),y=c("div"),g(q.$$.fragment),_e=a(),Q=c("p"),Q.textContent=Je,xe=a(),S=c("p"),S.innerHTML=Ge,be=a(),$=c("div"),g(H.$$.fragment),we=a(),A=c("p"),A.textContent=Be,ye=a(),g(C.$$.fragment),de=a(),g(E.$$.fragment),pe=a(),v=c("div"),g(K.$$.fragment),ve=a(),X=c("p"),X.textContent=Le,Me=a(),Y=c("p"),Y.innerHTML=Ue,Fe=a(),P=c("div"),g(z.$$.fragment),Te=a(),R=c("p"),R.textContent=Ze,$e=a(),g(J.$$.fragment),me=a(),g(D.$$.fragment),ue=a(),te=c("p"),this.h()},l(e){const t=De("svelte-u9bgzb",document.head);i=d(t,"META",{name:!0,content:!0}),t.forEach(n),T=l(e),u=d(e,"P",{}),ee(u).forEach(n),r=l(e),h(m.$$.fragment,e),o=l(e),h(f.$$.fragment,e),ne=l(e),I=d(e,"DIV",{class:!0,"data-svelte-h":!0}),M(I)!=="svelte-1elo7hh"&&(I.innerHTML=Pe),oe=l(e),L=d(e,"P",{"data-svelte-h":!0}),M(L)!=="svelte-1togc7r"&&(L.innerHTML=ke),se=l(e),U=d(e,"P",{"data-svelte-h":!0}),M(U)!=="svelte-otikoq"&&(U.innerHTML=Ie),ae=l(e),j=d(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),M(j)!=="svelte-79zmu7"&&(j.innerHTML=je),le=l(e),h(Z.$$.fragment,e),ie=l(e),N=d(e,"P",{"data-svelte-h":!0}),M(N)!=="svelte-1r5tu3c"&&(N.innerHTML=Ce),re=l(e),h(W.$$.fragment,e),ce=l(e),y=d(e,"DIV",{class:!0});var F=ee(y);h(q.$$.fragment,F),_e=l(F),Q=d(F,"P",{"data-svelte-h":!0}),M(Q)!=="svelte-1hjjsby"&&(Q.textContent=Je),xe=l(F),S=d(F,"P",{"data-svelte-h":!0}),M(S)!=="svelte-a2p6bq"&&(S.innerHTML=Ge),be=l(F),$=d(F,"DIV",{class:!0});var k=ee($);h(H.$$.fragment,k),we=l(k),A=d(k,"P",{"data-svelte-h":!0}),M(A)!=="svelte-v78lg8"&&(A.textContent=Be),ye=l(k),h(C.$$.fragment,k),k.forEach(n),F.forEach(n),de=l(e),h(E.$$.fragment,e),pe=l(e),v=d(e,"DIV",{class:!0});var G=ee(v);h(K.$$.fragment,G),ve=l(G),X=d(G,"P",{"data-svelte-h":!0}),M(X)!=="svelte-186p0nz"&&(X.textContent=Le),Me=l(G),Y=d(G,"P",{"data-svelte-h":!0}),M(Y)!=="svelte-1m90spa"&&(Y.innerHTML=Ue),Fe=l(G),P=d(G,"DIV",{class:!0});var O=ee(P);h(z.$$.fragment,O),Te=l(O),R=d(O,"P",{"data-svelte-h":!0}),M(R)!=="svelte-v78lg8"&&(R.textContent=Ze),$e=l(O),h(J.$$.fragment,O),O.forEach(n),G.forEach(n),me=l(e),h(D.$$.fragment,e),ue=l(e),te=d(e,"P",{}),ee(te).forEach(n),this.h()},h(){B(i,"name","hf:doc:metadata"),B(i,"content",Ye),B(I,"class","flex flex-wrap space-x-1"),B(j,"class","tip"),B($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),B(y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),B(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),B(v,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){p(document.head,i),s(e,T,t),s(e,u,t),s(e,r,t),_(m,e,t),s(e,o,t),_(f,e,t),s(e,ne,t),s(e,I,t),s(e,oe,t),s(e,L,t),s(e,se,t),s(e,U,t),s(e,ae,t),s(e,j,t),s(e,le,t),_(Z,e,t),s(e,ie,t),s(e,N,t),s(e,re,t),_(W,e,t),s(e,ce,t),s(e,y,t),_(q,y,null),p(y,_e),p(y,Q),p(y,xe),p(y,S),p(y,be),p(y,$),_(H,$,null),p($,we),p($,A),p($,ye),_(C,$,null),s(e,de,t),_(E,e,t),s(e,pe,t),s(e,v,t),_(K,v,null),p(v,ve),p(v,X),p(v,Me),p(v,Y),p(v,Fe),p(v,P),_(z,P,null),p(P,Te),p(P,R),p(P,$e),_(J,P,null),s(e,me,t),_(D,e,t),s(e,ue,t),s(e,te,t),fe=!0},p(e,[t]){const F={};t&2&&(F.$$scope={dirty:t,ctx:e}),C.$set(F);const k={};t&2&&(k.$$scope={dirty:t,ctx:e}),J.$set(k)},i(e){fe\|\|(x(m.$$.fragment,e),x(f.$$.fragment,e),x(Z.$$.fragment,e),x(W.$$.fragment,e),x(q.$$.fragment,e),x(H.$$.fragment,e),x(C.$$.fragment,e),x(E.$$.fragment,e),x(K.$$.fragment,e),x(z.$$.fragment,e),x(J.$$.fragment,e),x(D.$$.fragment,e),fe=!0)},o(e){b(m.$$.fragment,e),b(f.$$.fragment,e),b(Z.$$.fragment,e),b(W.$$.fragment,e),b(q.$$.fragment,e),b(H.$$.fragment,e),b(C.$$.fragment,e),b(E.$$.fragment,e),b(K.$$.fragment,e),b(z.$$.fragment,e),b(J.$$.fragment,e),b(D.$$.fragment,e),fe=!1},d(e){e&&(n(T),n(u),n(r),n(o),n(ne),n(I),n(oe),n(L),n(se),n(U),n(ae),n(j),n(le),n(ie),n(N),n(re),n(ce),n(y),n(de),n(pe),n(v),n(me),n(ue),n(te)),n(i),w(m,e),w(f,e),w(Z,e),w(W,e),w(q),w(H),w(C),w(E,e),w(K),w(z),w(J),w(D,e)}}}const Ye='{"title":"Flux2","local":"flux2","sections":[{"title":"Caption upsampling","local":"caption-upsampling","sections":[],"depth":2},{"title":"Flux2Pipeline","local":"diffusers.Flux2Pipeline","sections":[],"depth":2},{"title":"Flux2KleinPipeline","local":"diffusers.Flux2KleinPipeline","sections":[],"depth":2}],"depth":1}';function Re(V){return Ee(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class lt extends Ke{constructor(i){super(),ze(this,i,Re,Xe,He,{})}}export{lt as component};

Xet Storage Details

Size:: 37.3 kB
Xet hash:: 2e07214187042831bbc8d9a94743f316958f68fef107d42ac5a48653b54f1145

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.