Buckets:
| import{s as Xe,n as Ye,o as Ze}from"../chunks/scheduler.8c3d61f6.js";import{S as et,i as tt,g as d,s as r,r as u,A as ot,h as i,f as t,c as n,j as b,u as m,x as $,k as _,y as s,a,v as f,d as p,t as g,w as h}from"../chunks/index.da70eac4.js";import{D as x}from"../chunks/Docstring.6b390b9a.js";import{H as re,E as rt}from"../chunks/EditOnGithub.1e64e623.js";function nt(Ve){let v,ne,te,se,y,ae,T,Se='The Oobleck variational autoencoder (VAE) model with KL loss was introduced in <a href="https://github.com/Stability-AI/stable-audio-tools" rel="nofollow">Stability-AI/stable-audio-tools</a> and <a href="https://huggingface.co/papers/2407.14358" rel="nofollow">Stable Audio Open</a> by Stability AI. The model is used in 🤗 Diffusers to encode audio waveforms into latents and to decode latent representations into audio waveforms.',de,C,qe="The abstract from the paper is:",ie,E,Ge="<em>Open generative models are vitally important for the community, allowing for fine-tunes and serving as baselines when presenting new models. However, most current text-to-audio models are private and not accessible for artists and researchers to build upon. Here we describe the architecture and training process of a new open-weights text-to-audio model trained with Creative Commons data. Our evaluation shows that the model’s performance is competitive with the state-of-the-art across various metrics. Notably, the reported FDopenl3 results (measuring the realism of the generations) showcase its potential for high-quality stereo sound synthesis at 44.1kHz.</em>",le,L,ce,l,I,Ae,R,Fe=`An autoencoder for encoding waveforms into latents and decoding latent representations into waveforms. First | |
| introduced in Stable Audio.`,De,K,je=`This model inherits from <a href="/docs/diffusers/pr_10101/en/api/models/overview#diffusers.ModelMixin">ModelMixin</a>. Check the superclass documentation for it’s generic methods implemented | |
| for all models (such as downloading or saving).`,ye,U,P,Te,B,M,Ce,A,H,Ee,J,Ne=`Disable sliced VAE decoding. If <code>enable_slicing</code> was previously enabled, this method will go back to computing | |
| decoding in one step.`,Le,D,z,Ie,Q,We=`Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to | |
| compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.`,Pe,X,V,ue,S,me,k,q,Me,Y,Re="Output of decoding method.",fe,G,pe,O,F,He,Z,Ke="Output of decoding method.",ge,j,he,w,N,ze,ee,Ue="Output of AutoencoderOobleck encoding method.",be,W,_e,oe,$e;return y=new re({props:{title:"AutoencoderOobleck",local:"autoencoderoobleck",headingTag:"h1"}}),L=new re({props:{title:"AutoencoderOobleck",local:"diffusers.AutoencoderOobleck",headingTag:"h2"}}),I=new x({props:{name:"class diffusers.AutoencoderOobleck",anchor:"diffusers.AutoencoderOobleck",parameters:[{name:"encoder_hidden_size",val:" = 128"},{name:"downsampling_ratios",val:" = [2, 4, 4, 8, 8]"},{name:"channel_multiples",val:" = [1, 2, 4, 8, 16]"},{name:"decoder_channels",val:" = 128"},{name:"decoder_input_channels",val:" = 64"},{name:"audio_channels",val:" = 2"},{name:"sampling_rate",val:" = 44100"}],parametersDescription:[{anchor:"diffusers.AutoencoderOobleck.encoder_hidden_size",description:`<strong>encoder_hidden_size</strong> (<code>int</code>, <em>optional</em>, defaults to 128) — | |
| Intermediate representation dimension for the encoder.`,name:"encoder_hidden_size"},{anchor:"diffusers.AutoencoderOobleck.downsampling_ratios",description:`<strong>downsampling_ratios</strong> (<code>List[int]</code>, <em>optional</em>, defaults to <code>[2, 4, 4, 8, 8]</code>) — | |
| Ratios for downsampling in the encoder. These are used in reverse order for upsampling in the decoder.`,name:"downsampling_ratios"},{anchor:"diffusers.AutoencoderOobleck.channel_multiples",description:`<strong>channel_multiples</strong> (<code>List[int]</code>, <em>optional</em>, defaults to <code>[1, 2, 4, 8, 16]</code>) — | |
| Multiples used to determine the hidden sizes of the hidden layers.`,name:"channel_multiples"},{anchor:"diffusers.AutoencoderOobleck.decoder_channels",description:`<strong>decoder_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 128) — | |
| Intermediate representation dimension for the decoder.`,name:"decoder_channels"},{anchor:"diffusers.AutoencoderOobleck.decoder_input_channels",description:`<strong>decoder_input_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 64) — | |
| Input dimension for the decoder. Corresponds to the latent dimension.`,name:"decoder_input_channels"},{anchor:"diffusers.AutoencoderOobleck.audio_channels",description:`<strong>audio_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 2) — | |
| Number of channels in the audio data. Either 1 for mono or 2 for stereo.`,name:"audio_channels"},{anchor:"diffusers.AutoencoderOobleck.sampling_rate",description:`<strong>sampling_rate</strong> (<code>int</code>, <em>optional</em>, defaults to 44100) — | |
| The sampling rate at which the audio waveform should be digitalized expressed in hertz (Hz).`,name:"sampling_rate"}],source:"https://github.com/huggingface/diffusers/blob/vr_10101/src/diffusers/models/autoencoders/autoencoder_oobleck.py#L294"}}),P=new x({props:{name:"wrapper",anchor:"diffusers.AutoencoderOobleck.decode",parameters:[{name:"*args",val:""},{name:"**kwargs",val:""}],source:"https://github.com/huggingface/diffusers/blob/vr_10101/src/diffusers/utils/accelerate_utils.py#L43"}}),M=new x({props:{name:"wrapper",anchor:"diffusers.AutoencoderOobleck.encode",parameters:[{name:"*args",val:""},{name:"**kwargs",val:""}],source:"https://github.com/huggingface/diffusers/blob/vr_10101/src/diffusers/utils/accelerate_utils.py#L43"}}),H=new x({props:{name:"disable_slicing",anchor:"diffusers.AutoencoderOobleck.disable_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_10101/src/diffusers/models/autoencoders/autoencoder_oobleck.py#L365"}}),z=new x({props:{name:"enable_slicing",anchor:"diffusers.AutoencoderOobleck.enable_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_10101/src/diffusers/models/autoencoders/autoencoder_oobleck.py#L358"}}),V=new x({props:{name:"forward",anchor:"diffusers.AutoencoderOobleck.forward",parameters:[{name:"sample",val:": Tensor"},{name:"sample_posterior",val:": bool = False"},{name:"return_dict",val:": bool = True"},{name:"generator",val:": typing.Optional[torch._C.Generator] = None"}],parametersDescription:[{anchor:"diffusers.AutoencoderOobleck.forward.sample",description:"<strong>sample</strong> (<code>torch.Tensor</code>) — Input sample.",name:"sample"},{anchor:"diffusers.AutoencoderOobleck.forward.sample_posterior",description:`<strong>sample_posterior</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to sample from the posterior.`,name:"sample_posterior"},{anchor:"diffusers.AutoencoderOobleck.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>OobleckDecoderOutput</code> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/diffusers/blob/vr_10101/src/diffusers/models/autoencoders/autoencoder_oobleck.py#L438"}}),S=new re({props:{title:"OobleckDecoderOutput",local:"diffusers.models.autoencoders.autoencoder_oobleck.OobleckDecoderOutput",headingTag:"h2"}}),q=new x({props:{name:"class diffusers.models.autoencoders.autoencoder_oobleck.OobleckDecoderOutput",anchor:"diffusers.models.autoencoders.autoencoder_oobleck.OobleckDecoderOutput",parameters:[{name:"sample",val:": Tensor"}],parametersDescription:[{anchor:"diffusers.models.autoencoders.autoencoder_oobleck.OobleckDecoderOutput.sample",description:`<strong>sample</strong> (<code>torch.Tensor</code> of shape <code>(batch_size, audio_channels, sequence_length)</code>) — | |
| The decoded output sample from the last layer of the model.`,name:"sample"}],source:"https://github.com/huggingface/diffusers/blob/vr_10101/src/diffusers/models/autoencoders/autoencoder_oobleck.py#L201"}}),G=new re({props:{title:"OobleckDecoderOutput",local:"diffusers.models.autoencoders.autoencoder_oobleck.OobleckDecoderOutput",headingTag:"h2"}}),F=new x({props:{name:"class diffusers.models.autoencoders.autoencoder_oobleck.OobleckDecoderOutput",anchor:"diffusers.models.autoencoders.autoencoder_oobleck.OobleckDecoderOutput",parameters:[{name:"sample",val:": Tensor"}],parametersDescription:[{anchor:"diffusers.models.autoencoders.autoencoder_oobleck.OobleckDecoderOutput.sample",description:`<strong>sample</strong> (<code>torch.Tensor</code> of shape <code>(batch_size, audio_channels, sequence_length)</code>) — | |
| The decoded output sample from the last layer of the model.`,name:"sample"}],source:"https://github.com/huggingface/diffusers/blob/vr_10101/src/diffusers/models/autoencoders/autoencoder_oobleck.py#L201"}}),j=new re({props:{title:"AutoencoderOobleckOutput",local:"diffusers.models.autoencoders.autoencoder_oobleck.AutoencoderOobleckOutput",headingTag:"h2"}}),N=new x({props:{name:"class diffusers.models.autoencoders.autoencoder_oobleck.AutoencoderOobleckOutput",anchor:"diffusers.models.autoencoders.autoencoder_oobleck.AutoencoderOobleckOutput",parameters:[{name:"latent_dist",val:": OobleckDiagonalGaussianDistribution"}],parametersDescription:[{anchor:"diffusers.models.autoencoders.autoencoder_oobleck.AutoencoderOobleckOutput.latent_dist",description:`<strong>latent_dist</strong> (<code>OobleckDiagonalGaussianDistribution</code>) — | |
| Encoded outputs of <code>Encoder</code> represented as the mean and standard deviation of | |
| <code>OobleckDiagonalGaussianDistribution</code>. <code>OobleckDiagonalGaussianDistribution</code> allows for sampling latents | |
| from the distribution.`,name:"latent_dist"}],source:"https://github.com/huggingface/diffusers/blob/vr_10101/src/diffusers/models/autoencoders/autoencoder_oobleck.py#L186"}}),W=new rt({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/models/autoencoder_oobleck.md"}}),{c(){v=d("meta"),ne=r(),te=d("p"),se=r(),u(y.$$.fragment),ae=r(),T=d("p"),T.innerHTML=Se,de=r(),C=d("p"),C.textContent=qe,ie=r(),E=d("p"),E.innerHTML=Ge,le=r(),u(L.$$.fragment),ce=r(),l=d("div"),u(I.$$.fragment),Ae=r(),R=d("p"),R.textContent=Fe,De=r(),K=d("p"),K.innerHTML=je,ye=r(),U=d("div"),u(P.$$.fragment),Te=r(),B=d("div"),u(M.$$.fragment),Ce=r(),A=d("div"),u(H.$$.fragment),Ee=r(),J=d("p"),J.innerHTML=Ne,Le=r(),D=d("div"),u(z.$$.fragment),Ie=r(),Q=d("p"),Q.textContent=We,Pe=r(),X=d("div"),u(V.$$.fragment),ue=r(),u(S.$$.fragment),me=r(),k=d("div"),u(q.$$.fragment),Me=r(),Y=d("p"),Y.textContent=Re,fe=r(),u(G.$$.fragment),pe=r(),O=d("div"),u(F.$$.fragment),He=r(),Z=d("p"),Z.textContent=Ke,ge=r(),u(j.$$.fragment),he=r(),w=d("div"),u(N.$$.fragment),ze=r(),ee=d("p"),ee.textContent=Ue,be=r(),u(W.$$.fragment),_e=r(),oe=d("p"),this.h()},l(e){const o=ot("svelte-u9bgzb",document.head);v=i(o,"META",{name:!0,content:!0}),o.forEach(t),ne=n(e),te=i(e,"P",{}),b(te).forEach(t),se=n(e),m(y.$$.fragment,e),ae=n(e),T=i(e,"P",{"data-svelte-h":!0}),$(T)!=="svelte-1fhkutu"&&(T.innerHTML=Se),de=n(e),C=i(e,"P",{"data-svelte-h":!0}),$(C)!=="svelte-1cwsb16"&&(C.textContent=qe),ie=n(e),E=i(e,"P",{"data-svelte-h":!0}),$(E)!=="svelte-1rfs7ni"&&(E.innerHTML=Ge),le=n(e),m(L.$$.fragment,e),ce=n(e),l=i(e,"DIV",{class:!0});var c=b(l);m(I.$$.fragment,c),Ae=n(c),R=i(c,"P",{"data-svelte-h":!0}),$(R)!=="svelte-qyq2gm"&&(R.textContent=Fe),De=n(c),K=i(c,"P",{"data-svelte-h":!0}),$(K)!=="svelte-5k2yyv"&&(K.innerHTML=je),ye=n(c),U=i(c,"DIV",{class:!0});var Be=b(U);m(P.$$.fragment,Be),Be.forEach(t),Te=n(c),B=i(c,"DIV",{class:!0});var Je=b(B);m(M.$$.fragment,Je),Je.forEach(t),Ce=n(c),A=i(c,"DIV",{class:!0});var ve=b(A);m(H.$$.fragment,ve),Ee=n(ve),J=i(ve,"P",{"data-svelte-h":!0}),$(J)!=="svelte-189cc7b"&&(J.innerHTML=Ne),ve.forEach(t),Le=n(c),D=i(c,"DIV",{class:!0});var ke=b(D);m(z.$$.fragment,ke),Ie=n(ke),Q=i(ke,"P",{"data-svelte-h":!0}),$(Q)!=="svelte-14bnrb6"&&(Q.textContent=We),ke.forEach(t),Pe=n(c),X=i(c,"DIV",{class:!0});var Qe=b(X);m(V.$$.fragment,Qe),Qe.forEach(t),c.forEach(t),ue=n(e),m(S.$$.fragment,e),me=n(e),k=i(e,"DIV",{class:!0});var Oe=b(k);m(q.$$.fragment,Oe),Me=n(Oe),Y=i(Oe,"P",{"data-svelte-h":!0}),$(Y)!=="svelte-18u8upa"&&(Y.textContent=Re),Oe.forEach(t),fe=n(e),m(G.$$.fragment,e),pe=n(e),O=i(e,"DIV",{class:!0});var we=b(O);m(F.$$.fragment,we),He=n(we),Z=i(we,"P",{"data-svelte-h":!0}),$(Z)!=="svelte-18u8upa"&&(Z.textContent=Ke),we.forEach(t),ge=n(e),m(j.$$.fragment,e),he=n(e),w=i(e,"DIV",{class:!0});var xe=b(w);m(N.$$.fragment,xe),ze=n(xe),ee=i(xe,"P",{"data-svelte-h":!0}),$(ee)!=="svelte-1deyjjw"&&(ee.textContent=Ue),xe.forEach(t),be=n(e),m(W.$$.fragment,e),_e=n(e),oe=i(e,"P",{}),b(oe).forEach(t),this.h()},h(){_(v,"name","hf:doc:metadata"),_(v,"content",st),_(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),_(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),_(A,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),_(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),_(X,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),_(l,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),_(k,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),_(O,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),_(w,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,o){s(document.head,v),a(e,ne,o),a(e,te,o),a(e,se,o),f(y,e,o),a(e,ae,o),a(e,T,o),a(e,de,o),a(e,C,o),a(e,ie,o),a(e,E,o),a(e,le,o),f(L,e,o),a(e,ce,o),a(e,l,o),f(I,l,null),s(l,Ae),s(l,R),s(l,De),s(l,K),s(l,ye),s(l,U),f(P,U,null),s(l,Te),s(l,B),f(M,B,null),s(l,Ce),s(l,A),f(H,A,null),s(A,Ee),s(A,J),s(l,Le),s(l,D),f(z,D,null),s(D,Ie),s(D,Q),s(l,Pe),s(l,X),f(V,X,null),a(e,ue,o),f(S,e,o),a(e,me,o),a(e,k,o),f(q,k,null),s(k,Me),s(k,Y),a(e,fe,o),f(G,e,o),a(e,pe,o),a(e,O,o),f(F,O,null),s(O,He),s(O,Z),a(e,ge,o),f(j,e,o),a(e,he,o),a(e,w,o),f(N,w,null),s(w,ze),s(w,ee),a(e,be,o),f(W,e,o),a(e,_e,o),a(e,oe,o),$e=!0},p:Ye,i(e){$e||(p(y.$$.fragment,e),p(L.$$.fragment,e),p(I.$$.fragment,e),p(P.$$.fragment,e),p(M.$$.fragment,e),p(H.$$.fragment,e),p(z.$$.fragment,e),p(V.$$.fragment,e),p(S.$$.fragment,e),p(q.$$.fragment,e),p(G.$$.fragment,e),p(F.$$.fragment,e),p(j.$$.fragment,e),p(N.$$.fragment,e),p(W.$$.fragment,e),$e=!0)},o(e){g(y.$$.fragment,e),g(L.$$.fragment,e),g(I.$$.fragment,e),g(P.$$.fragment,e),g(M.$$.fragment,e),g(H.$$.fragment,e),g(z.$$.fragment,e),g(V.$$.fragment,e),g(S.$$.fragment,e),g(q.$$.fragment,e),g(G.$$.fragment,e),g(F.$$.fragment,e),g(j.$$.fragment,e),g(N.$$.fragment,e),g(W.$$.fragment,e),$e=!1},d(e){e&&(t(ne),t(te),t(se),t(ae),t(T),t(de),t(C),t(ie),t(E),t(le),t(ce),t(l),t(ue),t(me),t(k),t(fe),t(pe),t(O),t(ge),t(he),t(w),t(be),t(_e),t(oe)),t(v),h(y,e),h(L,e),h(I),h(P),h(M),h(H),h(z),h(V),h(S,e),h(q),h(G,e),h(F),h(j,e),h(N),h(W,e)}}}const st='{"title":"AutoencoderOobleck","local":"autoencoderoobleck","sections":[{"title":"AutoencoderOobleck","local":"diffusers.AutoencoderOobleck","sections":[],"depth":2},{"title":"OobleckDecoderOutput","local":"diffusers.models.autoencoders.autoencoder_oobleck.OobleckDecoderOutput","sections":[],"depth":2},{"title":"OobleckDecoderOutput","local":"diffusers.models.autoencoders.autoencoder_oobleck.OobleckDecoderOutput","sections":[],"depth":2},{"title":"AutoencoderOobleckOutput","local":"diffusers.models.autoencoders.autoencoder_oobleck.AutoencoderOobleckOutput","sections":[],"depth":2}],"depth":1}';function at(Ve){return Ze(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ut extends et{constructor(v){super(),tt(this,v,at,nt,Xe,{})}}export{ut as component}; | |
Xet Storage Details
- Size:
- 16.1 kB
- Xet hash:
- 4947f1e18af4ccba96f9e25a27672c4b86b3a04858c862d026c2b1496c2dc58c
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.