Buckets:
| import{s as kt,o as Dt,n as Ut}from"../chunks/scheduler.53228c21.js";import{S as Pt,i as Ct,e as s,s as i,c as m,h as Jt,a as r,d as n,b as a,f as fe,g as u,j as l,k as ae,l as y,m as o,n as f,t as g,o as _,p as h}from"../chunks/index.100fac89.js";import{C as jt}from"../chunks/CopyLLMTxtMenu.133e28e0.js";import{D as ot}from"../chunks/Docstring.f8721f67.js";import{C as me}from"../chunks/CodeBlock.d30a6509.js";import{E as It}from"../chunks/ExampleCodeBlock.24511344.js";import{H as ge,E as Lt}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.d8195636.js";function St(_e){let d,U="Examples:",w,T,v;return T=new me({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwSHVueXVhbkRpVFBpcGVsaW5lJTBBJTBBcGlwZSUyMCUzRCUyMEh1bnl1YW5EaVRQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyVGVuY2VudC1IdW55dWFuJTJGSHVueXVhbkRpVC1EaWZmdXNlcnMlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMEEpJTBBcGlwZS50byglMjJjdWRhJTIyKSUwQSUwQSUyMyUyMFlvdSUyMG1heSUyMGFsc28lMjB1c2UlMjBFbmdsaXNoJTIwcHJvbXB0JTIwYXMlMjBIdW55dWFuRGlUJTIwc3VwcG9ydHMlMjBib3RoJTIwRW5nbGlzaCUyMGFuZCUyMENoaW5lc2UlMEElMjMlMjBwcm9tcHQlMjAlM0QlMjAlMjJBbiUyMGFzdHJvbmF1dCUyMHJpZGluZyUyMGElMjBob3JzZSUyMiUwQXByb21wdCUyMCUzRCUyMCUyMiVFNCVCOCU4MCVFNCVCOCVBQSVFNSVBRSU4NyVFOCU4OCVBQSVFNSU5MSU5OCVFNSU5QyVBOCVFOSVBQSU5MSVFOSVBOSVBQyUyMiUwQWltYWdlJTIwJTNEJTIwcGlwZShwcm9tcHQpLmltYWdlcyU1QjAlNUQ=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> HunyuanDiTPipeline | |
| <span class="hljs-meta">>>> </span>pipe = HunyuanDiTPipeline.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Tencent-Hunyuan/HunyuanDiT-Diffusers"</span>, torch_dtype=torch.float16 | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># You may also use English prompt as HunyuanDiT supports both English and Chinese</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># prompt = "An astronaut riding a horse"</span> | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"一个宇航员在骑马"</span> | |
| <span class="hljs-meta">>>> </span>image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),{c(){d=s("p"),d.textContent=U,w=i(),m(T.$$.fragment)},l(p){d=r(p,"P",{"data-svelte-h":!0}),l(d)!=="svelte-kvfsh7"&&(d.textContent=U),w=a(p),u(T.$$.fragment,p)},m(p,x){o(p,d,x),o(p,w,x),f(T,p,x),v=!0},p:Ut,i(p){v||(g(T.$$.fragment,p),v=!0)},o(p){_(T.$$.fragment,p),v=!1},d(p){p&&(n(d),n(w)),h(T,p)}}}function Bt(_e){let d,U,w,T,v,p,x,he,P,it='<img src="https://github.com/gnobitab/diffusers-hunyuan/assets/1157982/39b99036-c3cb-4f16-bb1a-40ec25eda573" alt="chinese elements understanding"/>',ye,C,at='<a href="https://huggingface.co/papers/2405.08748" rel="nofollow">Hunyuan-DiT : A Powerful Multi-Resolution Diffusion Transformer with Fine-Grained Chinese Understanding</a> from Tencent Hunyuan.',be,J,st="The abstract from the paper is:",Te,j,rt="<em>We present Hunyuan-DiT, a text-to-image diffusion transformer with fine-grained understanding of both English and Chinese. To construct Hunyuan-DiT, we carefully design the transformer structure, text encoder, and positional encoding. We also build from scratch a whole data pipeline to update and evaluate data for iterative model optimization. For fine-grained language understanding, we train a Multimodal Large Language Model to refine the captions of the images. Finally, Hunyuan-DiT can perform multi-turn multimodal dialogue with users, generating and refining images according to the context. Through our holistic human evaluation protocol with more than 50 professional human evaluators, Hunyuan-DiT sets a new state-of-the-art in Chinese-to-image generation compared with other open-source models.</em>",ve,I,lt='You can find the original codebase at <a href="https://github.com/Tencent/HunyuanDiT" rel="nofollow">Tencent/HunyuanDiT</a> and all the available checkpoints at <a href="https://huggingface.co/Tencent-Hunyuan/HunyuanDiT" rel="nofollow">Tencent-Hunyuan</a>.',xe,L,pt="<strong>Highlights</strong>: HunyuanDiT supports Chinese/English-to-image, multi-resolution generation.",we,S,dt="HunyuanDiT has the following components:",Me,B,ct="<li>It uses a diffusion transformer as the backbone</li> <li>It combines two text encoders, a bilingual CLIP and a multilingual T5 encoder</li>",He,H,mt='<p>Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading#reuse-a-pipeline">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.</p>',$e,$,ut='<p>You can further improve generation quality by passing the generated image from <code>HungyuanDiTPipeline</code> to the <a href="../../using-diffusers/sdxl#base-to-refiner-model">SDXL refiner</a> model.</p>',ke,z,De,N,ft='You can optimize the pipeline’s runtime and memory consumption with torch.compile and feed-forward chunking. To learn about other optimization methods, check out the <a href="../../optimization/fp16">Speed up inference</a> and <a href="../../optimization/memory">Reduce memory usage</a> guides.',Ue,G,Pe,Z,gt='Use <a href="https://huggingface.co/docs/diffusers/main/en/tutorials/fast_diffusion#torchcompile" rel="nofollow"><code>torch.compile</code></a> to reduce the inference latency.',Ce,V,_t="First, load the pipeline:",Je,W,je,R,ht="Then change the memory layout of the pipelines <code>transformer</code> and <code>vae</code> components to <code>torch.channels-last</code>:",Ie,O,Le,E,yt="Finally, compile the components and run inference:",Se,F,Be,q,bt='The <a href="https://gist.github.com/sayakpaul/29d3a14905cfcbf611fe71ebd22e9b23" rel="nofollow">benchmark</a> results on a 80GB A100 machine are:',ze,Q,Ne,X,Ge,A,Tt='By loading the T5 text encoder in 8 bits, you can run the pipeline in just under 6 GBs of GPU VRAM. Refer to <a href="https://gist.github.com/sayakpaul/3154605f6af05b98a41081aaba5ca43e" rel="nofollow">this script</a> for details.',Ze,Y,vt='Furthermore, you can use the <a href="/docs/diffusers/pr_12249/en/api/models/hunyuan_transformer2d#diffusers.HunyuanDiT2DModel.enable_forward_chunking">enable_forward_chunking()</a> method to reduce memory usage. Feed-forward chunking runs the feed-forward layers in a transformer block in a loop instead of all at once. This gives you a trade-off between memory consumption and inference runtime.',Ve,K,We,ee,Re,c,te,Qe,se,xt="Pipeline for English/Chinese-to-image generation using HunyuanDiT.",Xe,re,wt=`This model inherits from <a href="/docs/diffusers/pr_12249/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods the | |
| library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)`,Ae,le,Mt=`HunyuanDiT uses two text encoders: <a href="https://huggingface.co/google/mt5-base" rel="nofollow">mT5</a> and [bilingual CLIP](fine-tuned by | |
| ourselves)`,Ye,M,ne,Ke,pe,Ht="The call function to the pipeline for generation with HunyuanDiT.",et,k,tt,D,oe,nt,de,$t="Encodes the prompt into text encoder hidden states.",Oe,ie,Ee,ue,Fe;return v=new jt({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),x=new ge({props:{title:"Hunyuan-DiT",local:"hunyuan-dit",headingTag:"h1"}}),z=new ge({props:{title:"Optimization",local:"optimization",headingTag:"h2"}}),G=new ge({props:{title:"Inference",local:"inference",headingTag:"h3"}}),W=new me({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEh1bnl1YW5EaVRQaXBlbGluZSUwQWltcG9ydCUyMHRvcmNoJTBBJTBBcGlwZWxpbmUlMjAlM0QlMjBIdW55dWFuRGlUUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUwOSUyMlRlbmNlbnQtSHVueXVhbiUyRkh1bnl1YW5EaVQtRGlmZnVzZXJzJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTBBKS50byglMjJjdWRhJTIyKQ==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> HunyuanDiTPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipeline = HunyuanDiTPipeline.from_pretrained( | |
| <span class="hljs-string">"Tencent-Hunyuan/HunyuanDiT-Diffusers"</span>, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>)`,wrap:!1}}),O=new me({props:{code:"cGlwZWxpbmUudHJhbnNmb3JtZXIudG8obWVtb3J5X2Zvcm1hdCUzRHRvcmNoLmNoYW5uZWxzX2xhc3QpJTBBcGlwZWxpbmUudmFlLnRvKG1lbW9yeV9mb3JtYXQlM0R0b3JjaC5jaGFubmVsc19sYXN0KQ==",highlighted:`pipeline.transformer.to(memory_format=torch.channels_last) | |
| pipeline.vae.to(memory_format=torch.channels_last)`,wrap:!1}}),F=new me({props:{code:"cGlwZWxpbmUudHJhbnNmb3JtZXIlMjAlM0QlMjB0b3JjaC5jb21waWxlKHBpcGVsaW5lLnRyYW5zZm9ybWVyJTJDJTIwbW9kZSUzRCUyMm1heC1hdXRvdHVuZSUyMiUyQyUyMGZ1bGxncmFwaCUzRFRydWUpJTBBcGlwZWxpbmUudmFlLmRlY29kZSUyMCUzRCUyMHRvcmNoLmNvbXBpbGUocGlwZWxpbmUudmFlLmRlY29kZSUyQyUyMG1vZGUlM0QlMjJtYXgtYXV0b3R1bmUlMjIlMkMlMjBmdWxsZ3JhcGglM0RUcnVlKSUwQSUwQWltYWdlJTIwJTNEJTIwcGlwZWxpbmUocHJvbXB0JTNEJTIyJUU0JUI4JTgwJUU0JUI4JUFBJUU1JUFFJTg3JUU4JTg4JUFBJUU1JTkxJTk4JUU1JTlDJUE4JUU5JUFBJTkxJUU5JUE5JUFDJTIyKS5pbWFnZXMlNUIwJTVE",highlighted:`pipeline.transformer = torch.<span class="hljs-built_in">compile</span>(pipeline.transformer, mode=<span class="hljs-string">"max-autotune"</span>, fullgraph=<span class="hljs-literal">True</span>) | |
| pipeline.vae.decode = torch.<span class="hljs-built_in">compile</span>(pipeline.vae.decode, mode=<span class="hljs-string">"max-autotune"</span>, fullgraph=<span class="hljs-literal">True</span>) | |
| image = pipeline(prompt=<span class="hljs-string">"一个宇航员在骑马"</span>).images[<span class="hljs-number">0</span>]`,wrap:!1}}),Q=new me({props:{code:"V2l0aCUyMHRvcmNoLmNvbXBpbGUoKSUzQSUyMEF2ZXJhZ2UlMjBpbmZlcmVuY2UlMjB0aW1lJTNBJTIwMTIuNDcwJTIwc2Vjb25kcy4lMEFXaXRob3V0JTIwdG9yY2guY29tcGlsZSgpJTNBJTIwQXZlcmFnZSUyMGluZmVyZW5jZSUyMHRpbWUlM0ElMjAyMC41NzAlMjBzZWNvbmRzLg==",highlighted:`With torch.compile(): Average inference time: 12.470 seconds. | |
| Without torch.compile(): Average inference time: 20.570 seconds.`,wrap:!1}}),X=new ge({props:{title:"Memory optimization",local:"memory-optimization",headingTag:"h3"}}),K=new me({props:{code:"JTJCJTIwcGlwZWxpbmUudHJhbnNmb3JtZXIuZW5hYmxlX2ZvcndhcmRfY2h1bmtpbmcoY2h1bmtfc2l6ZSUzRDElMkMlMjBkaW0lM0QxKQ==",highlighted:'<span class="hljs-addition">+ pipeline.transformer.enable_forward_chunking(chunk_size=1, dim=1)</span>',wrap:!1}}),ee=new ge({props:{title:"HunyuanDiTPipeline",local:"diffusers.HunyuanDiTPipeline",headingTag:"h2"}}),te=new ot({props:{name:"class diffusers.HunyuanDiTPipeline",anchor:"diffusers.HunyuanDiTPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": BertModel"},{name:"tokenizer",val:": BertTokenizer"},{name:"transformer",val:": HunyuanDiT2DModel"},{name:"scheduler",val:": DDPMScheduler"},{name:"safety_checker",val:": StableDiffusionSafetyChecker"},{name:"feature_extractor",val:": CLIPImageProcessor"},{name:"requires_safety_checker",val:": bool = True"},{name:"text_encoder_2",val:": typing.Optional[transformers.models.t5.modeling_t5.T5EncoderModel] = None"},{name:"tokenizer_2",val:": typing.Optional[transformers.models.t5.tokenization_t5.T5Tokenizer] = None"}],parametersDescription:[{anchor:"diffusers.HunyuanDiTPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_12249/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. We use | |
| <code>sdxl-vae-fp16-fix</code>.`,name:"vae"},{anchor:"diffusers.HunyuanDiTPipeline.text_encoder",description:`<strong>text_encoder</strong> (Optional[<code>~transformers.BertModel</code>, <code>~transformers.CLIPTextModel</code>]) — | |
| Frozen text-encoder (<a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a>). | |
| HunyuanDiT uses a fine-tuned [bilingual CLIP].`,name:"text_encoder"},{anchor:"diffusers.HunyuanDiTPipeline.tokenizer",description:`<strong>tokenizer</strong> (Optional[<code>~transformers.BertTokenizer</code>, <code>~transformers.CLIPTokenizer</code>]) — | |
| A <code>BertTokenizer</code> or <code>CLIPTokenizer</code> to tokenize text.`,name:"tokenizer"},{anchor:"diffusers.HunyuanDiTPipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_12249/en/api/models/hunyuan_transformer2d#diffusers.HunyuanDiT2DModel">HunyuanDiT2DModel</a>) — | |
| The HunyuanDiT model designed by Tencent Hunyuan.`,name:"transformer"},{anchor:"diffusers.HunyuanDiTPipeline.text_encoder_2",description:`<strong>text_encoder_2</strong> (<code>T5EncoderModel</code>) — | |
| The mT5 embedder. Specifically, it is ‘t5-v1_1-xxl’.`,name:"text_encoder_2"},{anchor:"diffusers.HunyuanDiTPipeline.tokenizer_2",description:`<strong>tokenizer_2</strong> (<code>T5Tokenizer</code>) — | |
| The tokenizer for the mT5 embedder.`,name:"tokenizer_2"},{anchor:"diffusers.HunyuanDiTPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_12249/en/api/schedulers/ddpm#diffusers.DDPMScheduler">DDPMScheduler</a>) — | |
| A scheduler to be used in combination with HunyuanDiT to denoise the encoded image latents.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/vr_12249/src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py#L149"}}),ne=new ot({props:{name:"__call__",anchor:"diffusers.HunyuanDiTPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"height",val:": typing.Optional[int] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"num_inference_steps",val:": typing.Optional[int] = 50"},{name:"guidance_scale",val:": typing.Optional[float] = 5.0"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"num_images_per_prompt",val:": typing.Optional[int] = 1"},{name:"eta",val:": typing.Optional[float] = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds_2",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds_2",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask_2",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask_2",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"callback_on_step_end",val:": typing.Union[typing.Callable[[int, int, typing.Dict], NoneType], diffusers.callbacks.PipelineCallback, diffusers.callbacks.MultiPipelineCallbacks, NoneType] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"guidance_rescale",val:": float = 0.0"},{name:"original_size",val:": typing.Optional[typing.Tuple[int, int]] = (1024, 1024)"},{name:"target_size",val:": typing.Optional[typing.Tuple[int, int]] = None"},{name:"crops_coords_top_left",val:": typing.Tuple[int, int] = (0, 0)"},{name:"use_resolution_binning",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.HunyuanDiTPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide image generation. If not defined, you need to pass <code>prompt_embeds</code>.`,name:"prompt"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference. This parameter is modulated by <code>strength</code>.`,name:"num_inference_steps"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide what to not include in image generation. If not defined, you need to | |
| pass <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (<code>guidance_scale < 1</code>).`,name:"negative_prompt"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of images to generate per prompt.`,name:"num_images_per_prompt"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) from the <a href="https://huggingface.co/papers/2010.02502" rel="nofollow">DDIM</a> paper. Only | |
| applies to the <a href="/docs/diffusers/pr_12249/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, and is ignored in other schedulers.`,name:"eta"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not | |
| provided, text embeddings are generated from the <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.prompt_embeds_2",description:`<strong>prompt_embeds_2</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not | |
| provided, text embeddings are generated from the <code>prompt</code> input argument.`,name:"prompt_embeds_2"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If | |
| not provided, <code>negative_prompt_embeds</code> are generated from the <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.negative_prompt_embeds_2",description:`<strong>negative_prompt_embeds_2</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If | |
| not provided, <code>negative_prompt_embeds</code> are generated from the <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds_2"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.prompt_attention_mask",description:`<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Attention mask for the prompt. Required when <code>prompt_embeds</code> is passed directly.`,name:"prompt_attention_mask"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.prompt_attention_mask_2",description:`<strong>prompt_attention_mask_2</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Attention mask for the prompt. Required when <code>prompt_embeds_2</code> is passed directly.`,name:"prompt_attention_mask_2"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Attention mask for the negative prompt. Required when <code>negative_prompt_embeds</code> is passed directly.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.negative_prompt_attention_mask_2",description:`<strong>negative_prompt_attention_mask_2</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Attention mask for the negative prompt. Required when <code>negative_prompt_embeds_2</code> is passed directly.`,name:"negative_prompt_attention_mask_2"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generated image. Choose between <code>PIL.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <a href="/docs/diffusers/pr_12249/en/api/pipelines/stable_diffusion/text2img#diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput">StableDiffusionPipelineOutput</a> instead of a | |
| plain tuple.`,name:"return_dict"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable[[int, int, Dict], None]</code>, <code>PipelineCallback</code>, <code>MultiPipelineCallbacks</code>, <em>optional</em>) — | |
| A callback function or a list of callback functions to be called at the end of each denoising step.`,name:"callback_on_step_end"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List[str]</code>, <em>optional</em>) — | |
| A list of tensor inputs that should be passed to the callback function. If not defined, all tensor | |
| inputs will be passed.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.guidance_rescale",description:`<strong>guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Rescale the noise_cfg according to <code>guidance_rescale</code>. Based on findings of <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise | |
| Schedules and Sample Steps are Flawed</a>. See Section 3.4`,name:"guidance_rescale"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.original_size",description:`<strong>original_size</strong> (<code>Tuple[int, int]</code>, <em>optional</em>, defaults to <code>(1024, 1024)</code>) — | |
| The original size of the image. Used to calculate the time ids.`,name:"original_size"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.target_size",description:`<strong>target_size</strong> (<code>Tuple[int, int]</code>, <em>optional</em>) — | |
| The target size of the image. Used to calculate the time ids.`,name:"target_size"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.crops_coords_top_left",description:`<strong>crops_coords_top_left</strong> (<code>Tuple[int, int]</code>, <em>optional</em>, defaults to <code>(0, 0)</code>) — | |
| The top left coordinates of the crop. Used to calculate the time ids.`,name:"crops_coords_top_left"},{anchor:"diffusers.HunyuanDiTPipeline.__call__.use_resolution_binning",description:`<strong>use_resolution_binning</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to use resolution binning or not. If <code>True</code>, the input resolution will be mapped to the closest | |
| standard resolution. Supported resolutions are 1024x1024, 1280x1280, 1024x768, 1152x864, 1280x960, | |
| 768x1024, 864x1152, 960x1280, 1280x768, and 768x1280. It is recommended to set this to <code>True</code>.`,name:"use_resolution_binning"}],source:"https://github.com/huggingface/diffusers/blob/vr_12249/src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py#L568",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <a | |
| href="/docs/diffusers/pr_12249/en/api/pipelines/stable_diffusion/text2img#diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput" | |
| >StableDiffusionPipelineOutput</a> is returned, | |
| otherwise a <code>tuple</code> is returned where the first element is a list with the generated images and the | |
| second element is a list of <code>bool</code>s indicating whether the corresponding generated image contains | |
| “not-safe-for-work” (nsfw) content.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/pr_12249/en/api/pipelines/stable_diffusion/text2img#diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput" | |
| >StableDiffusionPipelineOutput</a> or <code>tuple</code></p> | |
| `}}),k=new It({props:{anchor:"diffusers.HunyuanDiTPipeline.__call__.example",$$slots:{default:[St]},$$scope:{ctx:_e}}}),oe=new ot({props:{name:"encode_prompt",anchor:"diffusers.HunyuanDiTPipeline.encode_prompt",parameters:[{name:"prompt",val:": str"},{name:"device",val:": device = None"},{name:"dtype",val:": dtype = None"},{name:"num_images_per_prompt",val:": int = 1"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"negative_prompt",val:": typing.Optional[str] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"max_sequence_length",val:": typing.Optional[int] = None"},{name:"text_encoder_index",val:": int = 0"}],parametersDescription:[{anchor:"diffusers.HunyuanDiTPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.HunyuanDiTPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>): | |
| torch device`,name:"device"},{anchor:"diffusers.HunyuanDiTPipeline.encode_prompt.dtype",description:`<strong>dtype</strong> (<code>torch.dtype</code>) — | |
| torch dtype`,name:"dtype"},{anchor:"diffusers.HunyuanDiTPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.HunyuanDiTPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.HunyuanDiTPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.HunyuanDiTPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.HunyuanDiTPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.HunyuanDiTPipeline.encode_prompt.prompt_attention_mask",description:`<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Attention mask for the prompt. Required when <code>prompt_embeds</code> is passed directly.`,name:"prompt_attention_mask"},{anchor:"diffusers.HunyuanDiTPipeline.encode_prompt.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Attention mask for the negative prompt. Required when <code>negative_prompt_embeds</code> is passed directly.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.HunyuanDiTPipeline.encode_prompt.max_sequence_length",description:"<strong>max_sequence_length</strong> (<code>int</code>, <em>optional</em>) — maximum sequence length to use for the prompt.",name:"max_sequence_length"},{anchor:"diffusers.HunyuanDiTPipeline.encode_prompt.text_encoder_index",description:`<strong>text_encoder_index</strong> (<code>int</code>, <em>optional</em>) — | |
| Index of the text encoder to use. <code>0</code> for clip and <code>1</code> for T5.`,name:"text_encoder_index"}],source:"https://github.com/huggingface/diffusers/blob/vr_12249/src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py#L248"}}),ie=new Lt({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/hunyuandit.md"}}),{c(){d=s("meta"),U=i(),w=s("p"),T=i(),m(v.$$.fragment),p=i(),m(x.$$.fragment),he=i(),P=s("p"),P.innerHTML=it,ye=i(),C=s("p"),C.innerHTML=at,be=i(),J=s("p"),J.textContent=st,Te=i(),j=s("p"),j.innerHTML=rt,ve=i(),I=s("p"),I.innerHTML=lt,xe=i(),L=s("p"),L.innerHTML=pt,we=i(),S=s("p"),S.textContent=dt,Me=i(),B=s("ul"),B.innerHTML=ct,He=i(),H=s("blockquote"),H.innerHTML=mt,$e=i(),$=s("blockquote"),$.innerHTML=ut,ke=i(),m(z.$$.fragment),De=i(),N=s("p"),N.innerHTML=ft,Ue=i(),m(G.$$.fragment),Pe=i(),Z=s("p"),Z.innerHTML=gt,Ce=i(),V=s("p"),V.textContent=_t,Je=i(),m(W.$$.fragment),je=i(),R=s("p"),R.innerHTML=ht,Ie=i(),m(O.$$.fragment),Le=i(),E=s("p"),E.textContent=yt,Se=i(),m(F.$$.fragment),Be=i(),q=s("p"),q.innerHTML=bt,ze=i(),m(Q.$$.fragment),Ne=i(),m(X.$$.fragment),Ge=i(),A=s("p"),A.innerHTML=Tt,Ze=i(),Y=s("p"),Y.innerHTML=vt,Ve=i(),m(K.$$.fragment),We=i(),m(ee.$$.fragment),Re=i(),c=s("div"),m(te.$$.fragment),Qe=i(),se=s("p"),se.textContent=xt,Xe=i(),re=s("p"),re.innerHTML=wt,Ae=i(),le=s("p"),le.innerHTML=Mt,Ye=i(),M=s("div"),m(ne.$$.fragment),Ke=i(),pe=s("p"),pe.textContent=Ht,et=i(),m(k.$$.fragment),tt=i(),D=s("div"),m(oe.$$.fragment),nt=i(),de=s("p"),de.textContent=$t,Oe=i(),m(ie.$$.fragment),Ee=i(),ue=s("p"),this.h()},l(e){const t=Jt("svelte-u9bgzb",document.head);d=r(t,"META",{name:!0,content:!0}),t.forEach(n),U=a(e),w=r(e,"P",{}),fe(w).forEach(n),T=a(e),u(v.$$.fragment,e),p=a(e),u(x.$$.fragment,e),he=a(e),P=r(e,"P",{"data-svelte-h":!0}),l(P)!=="svelte-2bc00t"&&(P.innerHTML=it),ye=a(e),C=r(e,"P",{"data-svelte-h":!0}),l(C)!=="svelte-7etk87"&&(C.innerHTML=at),be=a(e),J=r(e,"P",{"data-svelte-h":!0}),l(J)!=="svelte-1cwsb16"&&(J.textContent=st),Te=a(e),j=r(e,"P",{"data-svelte-h":!0}),l(j)!=="svelte-pnixkh"&&(j.innerHTML=rt),ve=a(e),I=r(e,"P",{"data-svelte-h":!0}),l(I)!=="svelte-14jgagb"&&(I.innerHTML=lt),xe=a(e),L=r(e,"P",{"data-svelte-h":!0}),l(L)!=="svelte-4ilgcj"&&(L.innerHTML=pt),we=a(e),S=r(e,"P",{"data-svelte-h":!0}),l(S)!=="svelte-csifv3"&&(S.textContent=dt),Me=a(e),B=r(e,"UL",{"data-svelte-h":!0}),l(B)!=="svelte-1jg2mmd"&&(B.innerHTML=ct),He=a(e),H=r(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),l(H)!=="svelte-r1jcqf"&&(H.innerHTML=mt),$e=a(e),$=r(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),l($)!=="svelte-1f01qtd"&&($.innerHTML=ut),ke=a(e),u(z.$$.fragment,e),De=a(e),N=r(e,"P",{"data-svelte-h":!0}),l(N)!=="svelte-ni7gdi"&&(N.innerHTML=ft),Ue=a(e),u(G.$$.fragment,e),Pe=a(e),Z=r(e,"P",{"data-svelte-h":!0}),l(Z)!=="svelte-iekg51"&&(Z.innerHTML=gt),Ce=a(e),V=r(e,"P",{"data-svelte-h":!0}),l(V)!=="svelte-jub7f1"&&(V.textContent=_t),Je=a(e),u(W.$$.fragment,e),je=a(e),R=r(e,"P",{"data-svelte-h":!0}),l(R)!=="svelte-4294wb"&&(R.innerHTML=ht),Ie=a(e),u(O.$$.fragment,e),Le=a(e),E=r(e,"P",{"data-svelte-h":!0}),l(E)!=="svelte-9i4prs"&&(E.textContent=yt),Se=a(e),u(F.$$.fragment,e),Be=a(e),q=r(e,"P",{"data-svelte-h":!0}),l(q)!=="svelte-1918cyp"&&(q.innerHTML=bt),ze=a(e),u(Q.$$.fragment,e),Ne=a(e),u(X.$$.fragment,e),Ge=a(e),A=r(e,"P",{"data-svelte-h":!0}),l(A)!=="svelte-ieztxx"&&(A.innerHTML=Tt),Ze=a(e),Y=r(e,"P",{"data-svelte-h":!0}),l(Y)!=="svelte-obbekz"&&(Y.innerHTML=vt),Ve=a(e),u(K.$$.fragment,e),We=a(e),u(ee.$$.fragment,e),Re=a(e),c=r(e,"DIV",{class:!0});var b=fe(c);u(te.$$.fragment,b),Qe=a(b),se=r(b,"P",{"data-svelte-h":!0}),l(se)!=="svelte-ue2deu"&&(se.textContent=xt),Xe=a(b),re=r(b,"P",{"data-svelte-h":!0}),l(re)!=="svelte-1wdogpd"&&(re.innerHTML=wt),Ae=a(b),le=r(b,"P",{"data-svelte-h":!0}),l(le)!=="svelte-18ud7q1"&&(le.innerHTML=Mt),Ye=a(b),M=r(b,"DIV",{class:!0});var ce=fe(M);u(ne.$$.fragment,ce),Ke=a(ce),pe=r(ce,"P",{"data-svelte-h":!0}),l(pe)!=="svelte-1jqbgbl"&&(pe.textContent=Ht),et=a(ce),u(k.$$.fragment,ce),ce.forEach(n),tt=a(b),D=r(b,"DIV",{class:!0});var qe=fe(D);u(oe.$$.fragment,qe),nt=a(qe),de=r(qe,"P",{"data-svelte-h":!0}),l(de)!=="svelte-16q0ax1"&&(de.textContent=$t),qe.forEach(n),b.forEach(n),Oe=a(e),u(ie.$$.fragment,e),Ee=a(e),ue=r(e,"P",{}),fe(ue).forEach(n),this.h()},h(){ae(d,"name","hf:doc:metadata"),ae(d,"content",zt),ae(H,"class","tip"),ae($,"class","tip"),ae(M,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),ae(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),ae(c,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){y(document.head,d),o(e,U,t),o(e,w,t),o(e,T,t),f(v,e,t),o(e,p,t),f(x,e,t),o(e,he,t),o(e,P,t),o(e,ye,t),o(e,C,t),o(e,be,t),o(e,J,t),o(e,Te,t),o(e,j,t),o(e,ve,t),o(e,I,t),o(e,xe,t),o(e,L,t),o(e,we,t),o(e,S,t),o(e,Me,t),o(e,B,t),o(e,He,t),o(e,H,t),o(e,$e,t),o(e,$,t),o(e,ke,t),f(z,e,t),o(e,De,t),o(e,N,t),o(e,Ue,t),f(G,e,t),o(e,Pe,t),o(e,Z,t),o(e,Ce,t),o(e,V,t),o(e,Je,t),f(W,e,t),o(e,je,t),o(e,R,t),o(e,Ie,t),f(O,e,t),o(e,Le,t),o(e,E,t),o(e,Se,t),f(F,e,t),o(e,Be,t),o(e,q,t),o(e,ze,t),f(Q,e,t),o(e,Ne,t),f(X,e,t),o(e,Ge,t),o(e,A,t),o(e,Ze,t),o(e,Y,t),o(e,Ve,t),f(K,e,t),o(e,We,t),f(ee,e,t),o(e,Re,t),o(e,c,t),f(te,c,null),y(c,Qe),y(c,se),y(c,Xe),y(c,re),y(c,Ae),y(c,le),y(c,Ye),y(c,M),f(ne,M,null),y(M,Ke),y(M,pe),y(M,et),f(k,M,null),y(c,tt),y(c,D),f(oe,D,null),y(D,nt),y(D,de),o(e,Oe,t),f(ie,e,t),o(e,Ee,t),o(e,ue,t),Fe=!0},p(e,[t]){const b={};t&2&&(b.$$scope={dirty:t,ctx:e}),k.$set(b)},i(e){Fe||(g(v.$$.fragment,e),g(x.$$.fragment,e),g(z.$$.fragment,e),g(G.$$.fragment,e),g(W.$$.fragment,e),g(O.$$.fragment,e),g(F.$$.fragment,e),g(Q.$$.fragment,e),g(X.$$.fragment,e),g(K.$$.fragment,e),g(ee.$$.fragment,e),g(te.$$.fragment,e),g(ne.$$.fragment,e),g(k.$$.fragment,e),g(oe.$$.fragment,e),g(ie.$$.fragment,e),Fe=!0)},o(e){_(v.$$.fragment,e),_(x.$$.fragment,e),_(z.$$.fragment,e),_(G.$$.fragment,e),_(W.$$.fragment,e),_(O.$$.fragment,e),_(F.$$.fragment,e),_(Q.$$.fragment,e),_(X.$$.fragment,e),_(K.$$.fragment,e),_(ee.$$.fragment,e),_(te.$$.fragment,e),_(ne.$$.fragment,e),_(k.$$.fragment,e),_(oe.$$.fragment,e),_(ie.$$.fragment,e),Fe=!1},d(e){e&&(n(U),n(w),n(T),n(p),n(he),n(P),n(ye),n(C),n(be),n(J),n(Te),n(j),n(ve),n(I),n(xe),n(L),n(we),n(S),n(Me),n(B),n(He),n(H),n($e),n($),n(ke),n(De),n(N),n(Ue),n(Pe),n(Z),n(Ce),n(V),n(Je),n(je),n(R),n(Ie),n(Le),n(E),n(Se),n(Be),n(q),n(ze),n(Ne),n(Ge),n(A),n(Ze),n(Y),n(Ve),n(We),n(Re),n(c),n(Oe),n(Ee),n(ue)),n(d),h(v,e),h(x,e),h(z,e),h(G,e),h(W,e),h(O,e),h(F,e),h(Q,e),h(X,e),h(K,e),h(ee,e),h(te),h(ne),h(k),h(oe),h(ie,e)}}}const zt='{"title":"Hunyuan-DiT","local":"hunyuan-dit","sections":[{"title":"Optimization","local":"optimization","sections":[{"title":"Inference","local":"inference","sections":[],"depth":3},{"title":"Memory optimization","local":"memory-optimization","sections":[],"depth":3}],"depth":2},{"title":"HunyuanDiTPipeline","local":"diffusers.HunyuanDiTPipeline","sections":[],"depth":2}],"depth":1}';function Nt(_e){return Dt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ft extends Pt{constructor(d){super(),Ct(this,d,Nt,Bt,kt,{})}}export{Ft as component}; | |
Xet Storage Details
- Size:
- 37.8 kB
- Xet hash:
- 9c77138241dc59183112229104f4268f713db098f541394375881460124d894e
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.