Buckets:
| import{s as Ge,o as Be,n as ve}from"../chunks/scheduler.53228c21.js";import{S as Ie,i as We,e as m,s as l,c as g,h as De,a as u,d as n,b as r,f as z,g as _,j as U,k as F,l as h,m as i,n as y,t as b,o as w,p as T}from"../chunks/index.100fac89.js";import{C as Le}from"../chunks/CopyLLMTxtMenu.ed524afe.js";import{D as Te}from"../chunks/Docstring.7606acc6.js";import{C as ce}from"../chunks/CodeBlock.0adb3827.js";import{E as Me}from"../chunks/ExampleCodeBlock.27b35378.js";import{H as pe,E as Ae}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.7aa763e7.js";function Pe($){let a,v="Examples:",p,s,d;return s=new ce({props:{code:"aW1wb3J0JTIwc291bmRmaWxlJTIwYXMlMjBzZiUwQWltcG9ydCUyMHRvcmNoJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMExvbmdDYXRBdWRpb0RpVFBpcGVsaW5lJTBBJTBBcGlwZSUyMCUzRCUyMExvbmdDYXRBdWRpb0RpVFBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJydWl4aWFuZ21hJTJGTG9uZ0NhdC1BdWRpb0RpVC0xQi1EaWZmdXNlcnMlMjIpJTBBcGlwZS50byglMjJjdWRhJTIyKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkElMjBjYWxtJTIwb2NlYW4lMjB3YXZlJTIwYW1iaWVuY2UlMjB3aXRoJTIwc29mdCUyMHdpbmQlMjBpbiUyMHRoZSUyMGJhY2tncm91bmQuJTIyJTBBYXVkaW8lMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX2R1cmF0aW9uX3MlM0Q1LjAlMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNEMjAlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDQuMCUyQyUwQSUyMCUyMCUyMCUyMGdlbmVyYXRvciUzRHRvcmNoLkdlbmVyYXRvciglMjJjdWRhJTIyKS5tYW51YWxfc2VlZCg0MiklMkMlMEEpLmF1ZGlvcyU1QjAlMkMlMjAwJTVEJTBBc2Yud3JpdGUoJTIyb3V0cHV0LndhdiUyMiUyQyUyMGF1ZGlvJTJDJTIwcGlwZS5zYW1wbGVfcmF0ZSk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> soundfile <span class="hljs-keyword">as</span> sf | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LongCatAudioDiTPipeline | |
| <span class="hljs-meta">>>> </span>pipe = LongCatAudioDiTPipeline.from_pretrained(<span class="hljs-string">"ruixiangma/LongCat-AudioDiT-1B-Diffusers"</span>) | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"A calm ocean wave ambience with soft wind in the background."</span> | |
| <span class="hljs-meta">>>> </span>audio = pipe( | |
| <span class="hljs-meta">... </span> prompt, | |
| <span class="hljs-meta">... </span> audio_duration_s=<span class="hljs-number">5.0</span>, | |
| <span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">20</span>, | |
| <span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">4.0</span>, | |
| <span class="hljs-meta">... </span> generator=torch.Generator(<span class="hljs-string">"cuda"</span>).manual_seed(<span class="hljs-number">42</span>), | |
| <span class="hljs-meta">... </span>).audios[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>sf.write(<span class="hljs-string">"output.wav"</span>, audio, pipe.sample_rate)`,lang:"py",wrap:!1}}),{c(){a=m("p"),a.textContent=v,p=l(),g(s.$$.fragment)},l(t){a=u(t,"P",{"data-svelte-h":!0}),U(a)!=="svelte-kvfsh7"&&(a.textContent=v),p=r(t),_(s.$$.fragment,t)},m(t,c){i(t,a,c),i(t,p,c),y(s,t,c),d=!0},p:ve,i(t){d||(b(s.$$.fragment,t),d=!0)},o(t){w(s.$$.fragment,t),d=!1},d(t){t&&(n(a),n(p)),T(s,t)}}}function Re($){let a,v="If you get the error message below, you need to finetune the weights for your downstream task:",p,s,d;return s=new ce({props:{code:"U29tZSUyMHdlaWdodHMlMjBvZiUyMFVOZXQyRENvbmRpdGlvbk1vZGVsJTIwd2VyZSUyMG5vdCUyMGluaXRpYWxpemVkJTIwZnJvbSUyMHRoZSUyMG1vZGVsJTIwY2hlY2twb2ludCUyMGF0JTIwc3RhYmxlLWRpZmZ1c2lvbi12MS01JTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIwYW5kJTIwYXJlJTIwbmV3bHklMjBpbml0aWFsaXplZCUyMGJlY2F1c2UlMjB0aGUlMjBzaGFwZXMlMjBkaWQlMjBub3QlMjBtYXRjaCUzQSUwQS0lMjBjb252X2luLndlaWdodCUzQSUyMGZvdW5kJTIwc2hhcGUlMjB0b3JjaC5TaXplKCU1QjMyMCUyQyUyMDQlMkMlMjAzJTJDJTIwMyU1RCklMjBpbiUyMHRoZSUyMGNoZWNrcG9pbnQlMjBhbmQlMjB0b3JjaC5TaXplKCU1QjMyMCUyQyUyMDklMkMlMjAzJTJDJTIwMyU1RCklMjBpbiUyMHRoZSUyMG1vZGVsJTIwaW5zdGFudGlhdGVkJTBBWW91JTIwc2hvdWxkJTIwcHJvYmFibHklMjBUUkFJTiUyMHRoaXMlMjBtb2RlbCUyMG9uJTIwYSUyMGRvd24tc3RyZWFtJTIwdGFzayUyMHRvJTIwYmUlMjBhYmxlJTIwdG8lMjB1c2UlMjBpdCUyMGZvciUyMHByZWRpY3Rpb25zJTIwYW5kJTIwaW5mZXJlbmNlLg==",highlighted:`Some weights of UNet2DConditionModel were not initialized from the model checkpoint <span class="hljs-built_in">at</span> stable-<span class="hljs-keyword">diffusion-v1-5/stable-diffusion-v1-5 </span><span class="hljs-keyword">and </span>are newly initialized <span class="hljs-keyword">because </span>the <span class="hljs-keyword">shapes </span><span class="hljs-keyword">did </span>not match: | |
| - conv_in.weight: found <span class="hljs-keyword">shape </span>torch.Size([<span class="hljs-number">320</span>, <span class="hljs-number">4</span>, <span class="hljs-number">3</span>, <span class="hljs-number">3</span>]) in the checkpoint <span class="hljs-keyword">and </span>torch.Size([<span class="hljs-number">320</span>, <span class="hljs-number">9</span>, <span class="hljs-number">3</span>, <span class="hljs-number">3</span>]) in the model <span class="hljs-keyword">instantiated | |
| </span>You <span class="hljs-keyword">should </span>probably TRAIN this model on a down-stream task to <span class="hljs-keyword">be </span>able to use it for predictions <span class="hljs-keyword">and </span>inference.`,lang:"",wrap:!1}}),{c(){a=m("p"),a.textContent=v,p=l(),g(s.$$.fragment)},l(t){a=u(t,"P",{"data-svelte-h":!0}),U(a)!=="svelte-xueb0m"&&(a.textContent=v),p=r(t),_(s.$$.fragment,t)},m(t,c){i(t,a,c),i(t,p,c),y(s,t,c),d=!0},p:ve,i(t){d||(b(s.$$.fragment,t),d=!0)},o(t){w(s.$$.fragment,t),d=!1},d(t){t&&(n(a),n(p)),T(s,t)}}}function Ve($){let a,v="Examples:",p,s,d;return s=new ce({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMERpZmZ1c2lvblBpcGVsaW5lJTBBJTBBJTIzJTIwRG93bmxvYWQlMjBwaXBlbGluZSUyMGZyb20lMjBodWdnaW5nZmFjZS5jbyUyMGFuZCUyMGNhY2hlLiUwQXBpcGVsaW5lJTIwJTNEJTIwRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUyMkNvbXBWaXMlMkZsZG0tdGV4dDJpbS1sYXJnZS0yNTYlMjIpJTBBJTBBJTIzJTIwRG93bmxvYWQlMjBwaXBlbGluZSUyMHRoYXQlMjByZXF1aXJlcyUyMGFuJTIwYXV0aG9yaXphdGlvbiUyMHRva2VuJTBBJTIzJTIwRm9yJTIwbW9yZSUyMGluZm9ybWF0aW9uJTIwb24lMjBhY2Nlc3MlMjB0b2tlbnMlMkMlMjBwbGVhc2UlMjByZWZlciUyMHRvJTIwdGhpcyUyMHNlY3Rpb24lMEElMjMlMjBvZiUyMHRoZSUyMGRvY3VtZW50YXRpb24lNUQoaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRvY3MlMkZodWIlMkZzZWN1cml0eS10b2tlbnMpJTBBcGlwZWxpbmUlMjAlM0QlMjBEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTIyc3RhYmxlLWRpZmZ1c2lvbi12MS01JTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyKSUwQSUwQSUyMyUyMFVzZSUyMGElMjBkaWZmZXJlbnQlMjBzY2hlZHVsZXIlMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTE1TRGlzY3JldGVTY2hlZHVsZXIlMEElMEFzY2hlZHVsZXIlMjAlM0QlMjBMTVNEaXNjcmV0ZVNjaGVkdWxlci5mcm9tX2NvbmZpZyhwaXBlbGluZS5zY2hlZHVsZXIuY29uZmlnKSUwQXBpcGVsaW5lLnNjaGVkdWxlciUyMCUzRCUyMHNjaGVkdWxlcg==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Download pipeline from huggingface.co and cache.</span> | |
| <span class="hljs-meta">>>> </span>pipeline = DiffusionPipeline.from_pretrained(<span class="hljs-string">"CompVis/ldm-text2im-large-256"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Download pipeline that requires an authorization token</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># For more information on access tokens, please refer to this section</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># of the documentation](https://huggingface.co/docs/hub/security-tokens)</span> | |
| <span class="hljs-meta">>>> </span>pipeline = DiffusionPipeline.from_pretrained(<span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Use a different scheduler</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LMSDiscreteScheduler | |
| <span class="hljs-meta">>>> </span>scheduler = LMSDiscreteScheduler.from_config(pipeline.scheduler.config) | |
| <span class="hljs-meta">>>> </span>pipeline.scheduler = scheduler`,lang:"py",wrap:!1}}),{c(){a=m("p"),a.textContent=v,p=l(),g(s.$$.fragment)},l(t){a=u(t,"P",{"data-svelte-h":!0}),U(a)!=="svelte-kvfsh7"&&(a.textContent=v),p=r(t),_(s.$$.fragment,t)},m(t,c){i(t,a,c),i(t,p,c),y(s,t,c),d=!0},p:ve,i(t){d||(b(s.$$.fragment,t),d=!0)},o(t){w(s.$$.fragment,t),d=!1},d(t){t&&(n(a),n(p)),T(s,t)}}}function Xe($){let a,v,p,s,d,t,c,O,B,je='LongCat-AudioDiT is a text-to-audio diffusion model from Meituan LongCat. The diffusers integration exposes a standard <a href="/docs/diffusers/pr_13748/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> interface for text-conditioned audio generation.',K,I,Ue='This pipeline was adapted from the LongCat-AudioDiT reference implementation: <a href="https://github.com/meituan-longcat/LongCat-AudioDiT" rel="nofollow">https://github.com/meituan-longcat/LongCat-AudioDiT</a>',ee,W,Ce="This pipeline supports loading from a local directory or Hugging Face Hub repository in diffusers format (containing <code>text_encoder/</code>, <code>transformer/</code>, <code>vae/</code>, <code>tokenizer/</code>, and <code>scheduler/</code> subfolders).",te,D,oe,L,ne,A,ae,P,Je="<li><code>audio_duration_s</code> is the most direct way to control output duration.</li> <li>Use <code>generator=torch.Generator("cuda").manual_seed(42)</code> to make generation reproducible.</li> <li>Output shape is <code>(batch, channels, samples)</code> - use <code>.audios[0, 0]</code> to get a single audio sample.</li> <li>The pipeline outputs mono audio (1 channel). If you need stereo, you can duplicate the channel: <code>audio.unsqueeze(0).repeat(1, 2, 1)</code>.</li>",se,R,ie,j,V,me,C,X,ue,Q,xe="Function invoked when calling the pipeline for generation.",fe,k,he,f,Y,ge,H,$e="Instantiate a PyTorch diffusion pipeline from pretrained pipeline weights.",_e,E,ke="The pipeline is set in evaluation mode (<code>model.eval()</code>) by default.",ye,Z,be,S,Ze=`<p>> To use private or <a href="https://huggingface.co/docs/hub/models-gated#gated-models" rel="nofollow">gated</a> models, log-in | |
| with <code>hf > auth login</code>.</p>`,we,G,le,N,re,q,de;return d=new Le({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),c=new pe({props:{title:"LongCat-AudioDiT",local:"longcat-audiodit",headingTag:"h1"}}),D=new pe({props:{title:"Usage",local:"usage",headingTag:"h2"}}),L=new ce({props:{code:"aW1wb3J0JTIwc291bmRmaWxlJTIwYXMlMjBzZiUwQWltcG9ydCUyMHRvcmNoJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMExvbmdDYXRBdWRpb0RpVFBpcGVsaW5lJTBBJTBBcGlwZWxpbmUlMjAlM0QlMjBMb25nQ2F0QXVkaW9EaVRQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIycnVpeGlhbmdtYSUyRkxvbmdDYXQtQXVkaW9EaVQtMUItRGlmZnVzZXJzJTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQXBpcGVsaW5lJTIwJTNEJTIwcGlwZWxpbmUudG8oJTIyY3VkYSUyMiklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJBJTIwY2FsbSUyMG9jZWFuJTIwd2F2ZSUyMGFtYmllbmNlJTIwd2l0aCUyMHNvZnQlMjB3aW5kJTIwaW4lMjB0aGUlMjBiYWNrZ3JvdW5kLiUyMiUwQWF1ZGlvJTIwJTNEJTIwcGlwZWxpbmUoJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fZHVyYXRpb25fcyUzRDUuMCUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0QxNiUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNENC4wJTJDJTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yJTNEdG9yY2guR2VuZXJhdG9yKCUyMmN1ZGElMjIpLm1hbnVhbF9zZWVkKDQyKSUyQyUwQSkuYXVkaW9zJTVCMCUyQyUyMDAlNUQlMEElMEFzZi53cml0ZSglMjJsb25nY2F0LndhdiUyMiUyQyUyMGF1ZGlvJTJDJTIwcGlwZWxpbmUuc2FtcGxlX3JhdGUp",highlighted:`<span class="hljs-keyword">import</span> soundfile <span class="hljs-keyword">as</span> sf | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LongCatAudioDiTPipeline | |
| pipeline = LongCatAudioDiTPipeline.from_pretrained( | |
| <span class="hljs-string">"ruixiangma/LongCat-AudioDiT-1B-Diffusers"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| pipeline = pipeline.to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"A calm ocean wave ambience with soft wind in the background."</span> | |
| audio = pipeline( | |
| prompt, | |
| audio_duration_s=<span class="hljs-number">5.0</span>, | |
| num_inference_steps=<span class="hljs-number">16</span>, | |
| guidance_scale=<span class="hljs-number">4.0</span>, | |
| generator=torch.Generator(<span class="hljs-string">"cuda"</span>).manual_seed(<span class="hljs-number">42</span>), | |
| ).audios[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>] | |
| sf.write(<span class="hljs-string">"longcat.wav"</span>, audio, pipeline.sample_rate)`,lang:"py",wrap:!1}}),A=new pe({props:{title:"Tips",local:"tips",headingTag:"h2"}}),R=new pe({props:{title:"LongCatAudioDiTPipeline",local:"diffusers.LongCatAudioDiTPipeline",headingTag:"h2"}}),V=new Te({props:{name:"class diffusers.LongCatAudioDiTPipeline",anchor:"diffusers.LongCatAudioDiTPipeline",parameters:[{name:"vae",val:": LongCatAudioDiTVae"},{name:"text_encoder",val:": UMT5EncoderModel"},{name:"tokenizer",val:": PreTrainedTokenizerBase"},{name:"transformer",val:": LongCatAudioDiTTransformer"},{name:"scheduler",val:": diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler | None = None"}],source:"https://github.com/huggingface/diffusers/blob/vr_13748/src/diffusers/pipelines/longcat_audio_dit/pipeline_longcat_audio_dit.py#L99"}}),X=new Te({props:{name:"__call__",anchor:"diffusers.LongCatAudioDiTPipeline.__call__",parameters:[{name:"prompt",val:": str | list[str]"},{name:"negative_prompt",val:": str | list[str] | None = None"},{name:"audio_duration_s",val:": float | None = None"},{name:"latents",val:": torch.Tensor | None = None"},{name:"num_inference_steps",val:": int = 16"},{name:"guidance_scale",val:": float = 4.0"},{name:"generator",val:": torch._C.Generator | list[torch._C.Generator] | None = None"},{name:"output_type",val:": str = 'np'"},{name:"return_dict",val:": bool = True"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": list = ['latents']"}],parametersDescription:[{anchor:"diffusers.LongCatAudioDiTPipeline.__call__.prompt",description:"<strong>prompt</strong> (<code>str</code> or <code>list[str]</code>) — Prompt or prompts that guide audio generation.",name:"prompt"},{anchor:"diffusers.LongCatAudioDiTPipeline.__call__.negative_prompt",description:"<strong>negative_prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) — Negative prompt(s) for classifier-free guidance.",name:"negative_prompt"},{anchor:"diffusers.LongCatAudioDiTPipeline.__call__.audio_duration_s",description:`<strong>audio_duration_s</strong> (<code>float</code>, <em>optional</em>) — | |
| Target audio duration in seconds. Ignored when <code>latents</code> is provided.`,name:"audio_duration_s"},{anchor:"diffusers.LongCatAudioDiTPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents of shape <code>(batch_size, duration, latent_dim)</code>.`,name:"latents"},{anchor:"diffusers.LongCatAudioDiTPipeline.__call__.num_inference_steps",description:"<strong>num_inference_steps</strong> (<code>int</code>, defaults to 16) — Number of denoising steps.",name:"num_inference_steps"},{anchor:"diffusers.LongCatAudioDiTPipeline.__call__.guidance_scale",description:"<strong>guidance_scale</strong> (<code>float</code>, defaults to 4.0) — Guidance scale for classifier-free guidance.",name:"guidance_scale"},{anchor:"diffusers.LongCatAudioDiTPipeline.__call__.generator",description:"<strong>generator</strong> (<code>torch.Generator</code> or <code>list[torch.Generator]</code>, <em>optional</em>) — Random generator(s).",name:"generator"},{anchor:"diffusers.LongCatAudioDiTPipeline.__call__.output_type",description:"<strong>output_type</strong> (<code>str</code>, defaults to <code>"np"</code>) — Output format: <code>"np"</code>, <code>"pt"</code>, or <code>"latent"</code>.",name:"output_type"},{anchor:"diffusers.LongCatAudioDiTPipeline.__call__.return_dict",description:"<strong>return_dict</strong> (<code>bool</code>, defaults to <code>True</code>) — Whether to return <code>AudioPipelineOutput</code>.",name:"return_dict"},{anchor:"diffusers.LongCatAudioDiTPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function called at the end of each denoising step with the pipeline, step index, timestep, and tensor | |
| inputs specified by <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.LongCatAudioDiTPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>list</code>, defaults to <code>["latents"]</code>) — | |
| Tensor inputs passed to <code>callback_on_step_end</code>.`,name:"callback_on_step_end_tensor_inputs"}],source:"https://github.com/huggingface/diffusers/blob/vr_13748/src/diffusers/pipelines/longcat_audio_dit/pipeline_longcat_audio_dit.py#L219"}}),k=new Me({props:{anchor:"diffusers.LongCatAudioDiTPipeline.__call__.example",$$slots:{default:[Pe]},$$scope:{ctx:$}}}),Y=new Te({props:{name:"from_pretrained",anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained",parameters:[{name:"pretrained_model_name_or_path",val:": str | os.PathLike"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.pretrained_model_name_or_path",description:`<strong>pretrained_model_name_or_path</strong> (<code>str</code> or <code>os.PathLike</code>, <em>optional</em>) — | |
| Can be either:</p> | |
| <ul> | |
| <li>A string, the <em>repo id</em> (for example <code>CompVis/ldm-text2im-large-256</code>) of a pretrained pipeline | |
| hosted on the Hub.</li> | |
| <li>A path to a <em>directory</em> (for example <code>./my_pipeline_directory/</code>) containing pipeline weights | |
| saved using | |
| <a href="/docs/diffusers/pr_13748/en/api/pipelines/overview#diffusers.DiffusionPipeline.save_pretrained">save_pretrained()</a>.</li> | |
| <li>A path to a <em>directory</em> (for example <code>./my_pipeline_directory/</code>) containing a dduf file</li> | |
| </ul>`,name:"pretrained_model_name_or_path"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.torch_dtype",description:`<strong>torch_dtype</strong> (<code>torch.dtype</code> or <code>dict[str, Union[str, torch.dtype]]</code>, <em>optional</em>) — | |
| Override the default <code>torch.dtype</code> and load the model with another dtype. To load submodels with | |
| different dtype pass a <code>dict</code> (for example <code>{'transformer': torch.bfloat16, 'vae': torch.float16}</code>). | |
| Set the default dtype for unspecified components with <code>default</code> (for example <code>{'transformer': torch.bfloat16, 'default': torch.float16}</code>). If a component is not specified and no default is set, | |
| <code>torch.float32</code> is used.`,name:"torch_dtype"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.custom_pipeline",description:`<strong>custom_pipeline</strong> (<code>str</code>, <em>optional</em>) —</p> | |
| <blockquote class="warning"> | |
| <p>> 🧪 This is an experimental feature and may change in the future.</p> | |
| </blockquote> | |
| <p>Can be either:</p> | |
| <ul> | |
| <li>A string, the <em>repo id</em> (for example <code>hf-internal-testing/diffusers-dummy-pipeline</code>) of a custom | |
| pipeline hosted on the Hub. The repository must contain a file called pipeline.py that defines | |
| the custom pipeline.</li> | |
| <li>A string, the <em>file name</em> of a community pipeline hosted on GitHub under | |
| <a href="https://github.com/huggingface/diffusers/tree/main/examples/community" rel="nofollow">Community</a>. Valid file | |
| names must match the file name and not the pipeline script (<code>clip_guided_stable_diffusion</code> | |
| instead of <code>clip_guided_stable_diffusion.py</code>). Community pipelines are always loaded from the | |
| current main branch of GitHub.</li> | |
| <li>A path to a directory (<code>./my_pipeline_directory/</code>) containing a custom pipeline. The directory | |
| must contain a file called <code>pipeline.py</code> that defines the custom pipeline.</li> | |
| </ul> | |
| <p>For more information on how to load and create custom pipelines, please have a look at <a href="https://huggingface.co/docs/diffusers/using-diffusers/custom_pipeline_overview" rel="nofollow">Loading and | |
| Adding Custom | |
| Pipelines</a>`,name:"custom_pipeline"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.force_download",description:`<strong>force_download</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to force the (re-)download of the model weights and configuration files, overriding the | |
| cached versions if they exist.`,name:"force_download"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.cache_dir",description:`<strong>cache_dir</strong> (<code>Union[str, os.PathLike]</code>, <em>optional</em>) — | |
| Path to a directory where a downloaded pretrained model configuration is cached if the standard cache | |
| is not used.`,name:"cache_dir"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.proxies",description:`<strong>proxies</strong> (<code>Dict[str, str]</code>, <em>optional</em>) — | |
| A dictionary of proxy servers to use by protocol or endpoint, for example, <code>{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}</code>. The proxies are used on each request.`,name:"proxies"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.output_loading_info(bool,",description:`<strong>output_loading_info(<code>bool</code>,</strong> <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages.`,name:"output_loading_info(bool,"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.local_files_only",description:`<strong>local_files_only</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to only load local model weights and configuration files or not. If set to <code>True</code>, the model | |
| won’t be downloaded from the Hub.`,name:"local_files_only"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.token",description:`<strong>token</strong> (<code>str</code> or <em>bool</em>, <em>optional</em>) — | |
| The token to use as HTTP bearer authorization for remote files. If <code>True</code>, the token generated from | |
| <code>diffusers-cli login</code> (stored in <code>~/.huggingface</code>) is used.`,name:"token"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.revision",description:`<strong>revision</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"main"</code>) — | |
| The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier | |
| allowed by Git.`,name:"revision"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.custom_revision",description:`<strong>custom_revision</strong> (<code>str</code>, <em>optional</em>) — | |
| The specific model version to use. It can be a branch name, a tag name, or a commit id similar to | |
| <code>revision</code> when loading a custom pipeline from the Hub. Defaults to the latest stable 🤗 Diffusers | |
| version.`,name:"custom_revision"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.mirror",description:`<strong>mirror</strong> (<code>str</code>, <em>optional</em>) — | |
| Mirror source to resolve accessibility issues if you’re downloading a model in China. We do not | |
| guarantee the timeliness or safety of the source, and you should refer to the mirror site for more | |
| information.`,name:"mirror"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.device_map",description:`<strong>device_map</strong> (<code>str</code>, <em>optional</em>) — | |
| Strategy that dictates how the different components of a pipeline should be placed on available | |
| devices. Currently, only “balanced” <code>device_map</code> is supported. Check out | |
| <a href="https://huggingface.co/docs/diffusers/main/en/tutorials/inference_with_big_models#device-placement" rel="nofollow">this</a> | |
| to know more.`,name:"device_map"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.max_memory",description:`<strong>max_memory</strong> (<code>Dict</code>, <em>optional</em>) — | |
| A dictionary device identifier for the maximum memory. Will default to the maximum memory available for | |
| each GPU and the available CPU RAM if unset.`,name:"max_memory"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.offload_folder",description:`<strong>offload_folder</strong> (<code>str</code> or <code>os.PathLike</code>, <em>optional</em>) — | |
| The path to offload weights if device_map contains the value <code>"disk"</code>.`,name:"offload_folder"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.offload_state_dict",description:`<strong>offload_state_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| If <code>True</code>, temporarily offloads the CPU state dict to the hard drive to avoid running out of CPU RAM if | |
| the weight of the CPU state dict + the biggest shard of the checkpoint does not fit. Defaults to <code>True</code> | |
| when there is some disk offload.`,name:"offload_state_dict"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.low_cpu_mem_usage",description:`<strong>low_cpu_mem_usage</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code> if torch version >= 1.9.0 else <code>False</code>) — | |
| Speed up model loading only loading the pretrained weights and not initializing the weights. This also | |
| tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. | |
| Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this | |
| argument to <code>True</code> will raise an error.`,name:"low_cpu_mem_usage"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.use_safetensors",description:`<strong>use_safetensors</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| If set to <code>None</code>, the safetensors weights are downloaded if they’re available <strong>and</strong> if the | |
| safetensors library is installed. If set to <code>True</code>, the model is forcibly loaded from safetensors | |
| weights. If set to <code>False</code>, safetensors weights are not loaded.`,name:"use_safetensors"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.use_onnx",description:`<strong>use_onnx</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| If set to <code>True</code>, ONNX weights will always be downloaded if present. If set to <code>False</code>, ONNX weights | |
| will never be downloaded. By default <code>use_onnx</code> defaults to the <code>_is_onnx</code> class attribute which is | |
| <code>False</code> for non-ONNX pipelines and <code>True</code> for ONNX pipelines. ONNX weights include both files ending | |
| with <code>.onnx</code> and <code>.pb</code>.`,name:"use_onnx"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.kwargs",description:`<strong>kwargs</strong> (remaining dictionary of keyword arguments, <em>optional</em>) — | |
| Can be used to overwrite load and saveable variables (the pipeline components of the specific pipeline | |
| class). The overwritten components are passed directly to the pipelines <code>__init__</code> method. See example | |
| below for more information.`,name:"kwargs"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.variant",description:`<strong>variant</strong> (<code>str</code>, <em>optional</em>) — | |
| Load weights from a specified variant filename such as <code>"fp16"</code> or <code>"ema"</code>. This is ignored when | |
| loading <code>from_flax</code>.`,name:"variant"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.dduf_file(str,",description:`<strong>dduf_file(<code>str</code>,</strong> <em>optional</em>) — | |
| Load weights from the specified dduf file.`,name:"dduf_file(str,"},{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.disable_mmap",description:`<strong>disable_mmap</strong> (‘bool’, <em>optional</em>, defaults to ‘False’) — | |
| Whether to disable mmap when loading a Safetensors model. This option can perform better when the model | |
| is on a network mount or hard drive, which may not handle the seeky-ness of mmap very well.`,name:"disable_mmap"}],source:"https://github.com/huggingface/diffusers/blob/vr_13748/src/diffusers/pipelines/pipeline_utils.py#L616"}}),Z=new Me({props:{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.example",$$slots:{default:[Re]},$$scope:{ctx:$}}}),G=new Me({props:{anchor:"diffusers.LongCatAudioDiTPipeline.from_pretrained.example-2",$$slots:{default:[Ve]},$$scope:{ctx:$}}}),N=new Ae({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/longcat_audio_dit.md"}}),{c(){a=m("meta"),v=l(),p=m("p"),s=l(),g(d.$$.fragment),t=l(),g(c.$$.fragment),O=l(),B=m("p"),B.innerHTML=je,K=l(),I=m("p"),I.innerHTML=Ue,ee=l(),W=m("p"),W.innerHTML=Ce,te=l(),g(D.$$.fragment),oe=l(),g(L.$$.fragment),ne=l(),g(A.$$.fragment),ae=l(),P=m("ul"),P.innerHTML=Je,se=l(),g(R.$$.fragment),ie=l(),j=m("div"),g(V.$$.fragment),me=l(),C=m("div"),g(X.$$.fragment),ue=l(),Q=m("p"),Q.textContent=xe,fe=l(),g(k.$$.fragment),he=l(),f=m("div"),g(Y.$$.fragment),ge=l(),H=m("p"),H.textContent=$e,_e=l(),E=m("p"),E.innerHTML=ke,ye=l(),g(Z.$$.fragment),be=l(),S=m("blockquote"),S.innerHTML=Ze,we=l(),g(G.$$.fragment),le=l(),g(N.$$.fragment),re=l(),q=m("p"),this.h()},l(e){const o=De("svelte-u9bgzb",document.head);a=u(o,"META",{name:!0,content:!0}),o.forEach(n),v=r(e),p=u(e,"P",{}),z(p).forEach(n),s=r(e),_(d.$$.fragment,e),t=r(e),_(c.$$.fragment,e),O=r(e),B=u(e,"P",{"data-svelte-h":!0}),U(B)!=="svelte-oir9lu"&&(B.innerHTML=je),K=r(e),I=u(e,"P",{"data-svelte-h":!0}),U(I)!=="svelte-1th8ail"&&(I.innerHTML=Ue),ee=r(e),W=u(e,"P",{"data-svelte-h":!0}),U(W)!=="svelte-1rdxwps"&&(W.innerHTML=Ce),te=r(e),_(D.$$.fragment,e),oe=r(e),_(L.$$.fragment,e),ne=r(e),_(A.$$.fragment,e),ae=r(e),P=u(e,"UL",{"data-svelte-h":!0}),U(P)!=="svelte-joikqt"&&(P.innerHTML=Je),se=r(e),_(R.$$.fragment,e),ie=r(e),j=u(e,"DIV",{class:!0});var J=z(j);_(V.$$.fragment,J),me=r(J),C=u(J,"DIV",{class:!0});var x=z(C);_(X.$$.fragment,x),ue=r(x),Q=u(x,"P",{"data-svelte-h":!0}),U(Q)!=="svelte-v78lg8"&&(Q.textContent=xe),fe=r(x),_(k.$$.fragment,x),x.forEach(n),he=r(J),f=u(J,"DIV",{class:!0});var M=z(f);_(Y.$$.fragment,M),ge=r(M),H=u(M,"P",{"data-svelte-h":!0}),U(H)!=="svelte-ccbjek"&&(H.textContent=$e),_e=r(M),E=u(M,"P",{"data-svelte-h":!0}),U(E)!=="svelte-1p5vgmd"&&(E.innerHTML=ke),ye=r(M),_(Z.$$.fragment,M),be=r(M),S=u(M,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),U(S)!=="svelte-zg8xkk"&&(S.innerHTML=Ze),we=r(M),_(G.$$.fragment,M),M.forEach(n),J.forEach(n),le=r(e),_(N.$$.fragment,e),re=r(e),q=u(e,"P",{}),z(q).forEach(n),this.h()},h(){F(a,"name","hf:doc:metadata"),F(a,"content",Ye),F(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),F(S,"class","tip"),F(f,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),F(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,o){h(document.head,a),i(e,v,o),i(e,p,o),i(e,s,o),y(d,e,o),i(e,t,o),y(c,e,o),i(e,O,o),i(e,B,o),i(e,K,o),i(e,I,o),i(e,ee,o),i(e,W,o),i(e,te,o),y(D,e,o),i(e,oe,o),y(L,e,o),i(e,ne,o),y(A,e,o),i(e,ae,o),i(e,P,o),i(e,se,o),y(R,e,o),i(e,ie,o),i(e,j,o),y(V,j,null),h(j,me),h(j,C),y(X,C,null),h(C,ue),h(C,Q),h(C,fe),y(k,C,null),h(j,he),h(j,f),y(Y,f,null),h(f,ge),h(f,H),h(f,_e),h(f,E),h(f,ye),y(Z,f,null),h(f,be),h(f,S),h(f,we),y(G,f,null),i(e,le,o),y(N,e,o),i(e,re,o),i(e,q,o),de=!0},p(e,[o]){const J={};o&2&&(J.$$scope={dirty:o,ctx:e}),k.$set(J);const x={};o&2&&(x.$$scope={dirty:o,ctx:e}),Z.$set(x);const M={};o&2&&(M.$$scope={dirty:o,ctx:e}),G.$set(M)},i(e){de||(b(d.$$.fragment,e),b(c.$$.fragment,e),b(D.$$.fragment,e),b(L.$$.fragment,e),b(A.$$.fragment,e),b(R.$$.fragment,e),b(V.$$.fragment,e),b(X.$$.fragment,e),b(k.$$.fragment,e),b(Y.$$.fragment,e),b(Z.$$.fragment,e),b(G.$$.fragment,e),b(N.$$.fragment,e),de=!0)},o(e){w(d.$$.fragment,e),w(c.$$.fragment,e),w(D.$$.fragment,e),w(L.$$.fragment,e),w(A.$$.fragment,e),w(R.$$.fragment,e),w(V.$$.fragment,e),w(X.$$.fragment,e),w(k.$$.fragment,e),w(Y.$$.fragment,e),w(Z.$$.fragment,e),w(G.$$.fragment,e),w(N.$$.fragment,e),de=!1},d(e){e&&(n(v),n(p),n(s),n(t),n(O),n(B),n(K),n(I),n(ee),n(W),n(te),n(oe),n(ne),n(ae),n(P),n(se),n(ie),n(j),n(le),n(re),n(q)),n(a),T(d,e),T(c,e),T(D,e),T(L,e),T(A,e),T(R,e),T(V),T(X),T(k),T(Y),T(Z),T(G),T(N,e)}}}const Ye='{"title":"LongCat-AudioDiT","local":"longcat-audiodit","sections":[{"title":"Usage","local":"usage","sections":[],"depth":2},{"title":"Tips","local":"tips","sections":[],"depth":2},{"title":"LongCatAudioDiTPipeline","local":"diffusers.LongCatAudioDiTPipeline","sections":[],"depth":2}],"depth":1}';function Se($){return Be(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Oe extends Ie{constructor(a){super(),We(this,a,Se,Xe,Ge,{})}}export{Oe as component}; | |
Xet Storage Details
- Size:
- 34.1 kB
- Xet hash:
- e3684e9d502898f12408d933401b7160987459fc3eebd14857c85e2757a6efe3
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.