Buckets:
| import{s as ni,o as si,n as j}from"../chunks/scheduler.9bc65507.js";import{S as ai,i as ri,g as l,s,r as m,A as ii,h as c,f as r,c as a,j as x,u,x as y,k as $,y as o,a as h,v as f,d as _,t as g,w as b}from"../chunks/index.707bf1b6.js";import{T as ce}from"../chunks/Tip.c2ecdbf4.js";import{D as W,E as oe}from"../chunks/ExampleCodeBlock.90aeff6e.js";import{C as K}from"../chunks/CodeBlock.54a9f38d.js";import{H as U,E as di}from"../chunks/EditOnGithub.922df6ba.js";function li(T){let t,k="Example:",d,p,w;return p=new K({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFdoaXNwZXJDb25maWclMkMlMjBXaGlzcGVyTW9kZWwlMEElMEElMjMlMjBJbml0aWFsaXppbmclMjBhJTIwV2hpc3BlciUyMHRpbnklMjBzdHlsZSUyMGNvbmZpZ3VyYXRpb24lMEFjb25maWd1cmF0aW9uJTIwJTNEJTIwV2hpc3BlckNvbmZpZygpJTBBJTBBJTIzJTIwSW5pdGlhbGl6aW5nJTIwYSUyMG1vZGVsJTIwKHdpdGglMjByYW5kb20lMjB3ZWlnaHRzKSUyMGZyb20lMjB0aGUlMjB0aW55JTIwc3R5bGUlMjBjb25maWd1cmF0aW9uJTBBbW9kZWwlMjAlM0QlMjBXaGlzcGVyTW9kZWwoY29uZmlndXJhdGlvbiklMEElMEElMjMlMjBBY2Nlc3NpbmclMjB0aGUlMjBtb2RlbCUyMGNvbmZpZ3VyYXRpb24lMEFjb25maWd1cmF0aW9uJTIwJTNEJTIwbW9kZWwuY29uZmln",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> WhisperConfig, WhisperModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Initializing a Whisper tiny style configuration</span> | |
| <span class="hljs-meta">>>> </span>configuration = WhisperConfig() | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Initializing a model (with random weights) from the tiny style configuration</span> | |
| <span class="hljs-meta">>>> </span>model = WhisperModel(configuration) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Accessing the model configuration</span> | |
| <span class="hljs-meta">>>> </span>configuration = model.config`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),m(p.$$.fragment)},l(n){t=c(n,"P",{"data-svelte-h":!0}),y(t)!=="svelte-11lpom8"&&(t.textContent=k),d=a(n),u(p.$$.fragment,n)},m(n,v){h(n,t,v),h(n,d,v),f(p,n,v),w=!0},p:j,i(n){w||(_(p.$$.fragment,n),w=!0)},o(n){g(p.$$.fragment,n),w=!1},d(n){n&&(r(t),r(d)),b(p,n)}}}function ci(T){let t,k="update the prefix tokens as required when fine-tuning. Example:",d,p,w;return p=new K({props:{code:"JTIzJTIwaW5zdGFudGlhdGUlMjB0aGUlMjB0b2tlbml6ZXIlMjBhbmQlMjBzZXQlMjB0aGUlMjBwcmVmaXglMjB0b2tlbiUyMHRvJTIwU3BhbmlzaCUwQXRva2VuaXplciUyMCUzRCUyMFdoaXNwZXJUb2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUyMm9wZW5haSUyRndoaXNwZXItdGlueSUyMiUyQyUyMGxhbmd1YWdlJTNEJTIyc3BhbmlzaCUyMiklMEElMjMlMjBub3clMjBzd2l0Y2glMjB0aGUlMjBwcmVmaXglMjB0b2tlbiUyMGZyb20lMjBTcGFuaXNoJTIwdG8lMjBGcmVuY2glMEF0b2tlbml6ZXIuc2V0X3ByZWZpeF90b2tlbnMobGFuZ3VhZ2UlM0QlMjJmcmVuY2glMjIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-comment"># instantiate the tokenizer and set the prefix token to Spanish</span> | |
| <span class="hljs-meta">>>> </span>tokenizer = WhisperTokenizer.from_pretrained(<span class="hljs-string">"openai/whisper-tiny"</span>, language=<span class="hljs-string">"spanish"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># now switch the prefix token from Spanish to French</span> | |
| <span class="hljs-meta">>>> </span>tokenizer.set_prefix_tokens(language=<span class="hljs-string">"french"</span>)`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),m(p.$$.fragment)},l(n){t=c(n,"P",{"data-svelte-h":!0}),y(t)!=="svelte-14u5irj"&&(t.textContent=k),d=a(n),u(p.$$.fragment,n)},m(n,v){h(n,t,v),h(n,d,v),f(p,n,v),w=!0},p:j,i(n){w||(_(p.$$.fragment,n),w=!0)},o(n){g(p.$$.fragment,n),w=!1},d(n){n&&(r(t),r(d)),b(p,n)}}}function pi(T){let t,k="update the prefix tokens as required when fine-tuning. Example:",d,p,w;return p=new K({props:{code:"JTIzJTIwaW5zdGFudGlhdGUlMjB0aGUlMjB0b2tlbml6ZXIlMjBhbmQlMjBzZXQlMjB0aGUlMjBwcmVmaXglMjB0b2tlbiUyMHRvJTIwU3BhbmlzaCUwQXRva2VuaXplciUyMCUzRCUyMFdoaXNwZXJUb2tlbml6ZXJGYXN0LmZyb21fcHJldHJhaW5lZCglMjJvcGVuYWklMkZ3aGlzcGVyLXRpbnklMjIlMkMlMjBsYW5ndWFnZSUzRCUyMnNwYW5pc2glMjIpJTBBJTIzJTIwbm93JTIwc3dpdGNoJTIwdGhlJTIwcHJlZml4JTIwdG9rZW4lMjBmcm9tJTIwU3BhbmlzaCUyMHRvJTIwRnJlbmNoJTBBdG9rZW5pemVyLnNldF9wcmVmaXhfdG9rZW5zKGxhbmd1YWdlJTNEJTIyZnJlbmNoJTIyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-comment"># instantiate the tokenizer and set the prefix token to Spanish</span> | |
| <span class="hljs-meta">>>> </span>tokenizer = WhisperTokenizerFast.from_pretrained(<span class="hljs-string">"openai/whisper-tiny"</span>, language=<span class="hljs-string">"spanish"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># now switch the prefix token from Spanish to French</span> | |
| <span class="hljs-meta">>>> </span>tokenizer.set_prefix_tokens(language=<span class="hljs-string">"french"</span>)`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),m(p.$$.fragment)},l(n){t=c(n,"P",{"data-svelte-h":!0}),y(t)!=="svelte-14u5irj"&&(t.textContent=k),d=a(n),u(p.$$.fragment,n)},m(n,v){h(n,t,v),h(n,d,v),f(p,n,v),w=!0},p:j,i(n){w||(_(p.$$.fragment,n),w=!0)},o(n){g(p.$$.fragment,n),w=!1},d(n){n&&(r(t),r(d)),b(p,n)}}}function hi(T){let t,k=`This class method is simply calling the feature extractor | |
| <code>from_pretrained()</code>, image processor | |
| <code>ImageProcessingMixin</code> and the tokenizer | |
| <code>~tokenization_utils_base.PreTrainedTokenizer.from_pretrained</code> methods. Please refer to the docstrings of the | |
| methods above for more information.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),y(t)!=="svelte-1m24p1o"&&(t.innerHTML=k)},m(d,p){h(d,t,p)},p:j,d(d){d&&r(t)}}}function mi(T){let t,k=`This class method is simply calling <code>save_pretrained()</code> and | |
| <code>save_pretrained()</code>. Please refer to the docstrings of the | |
| methods above for more information.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),y(t)!=="svelte-1j0r4gf"&&(t.innerHTML=k)},m(d,p){h(d,t,p)},p:j,d(d){d&&r(t)}}}function ui(T){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),y(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,p){h(d,t,p)},p:j,d(d){d&&r(t)}}}function fi(T){let t,k="Example:",d,p,w;return p=new K({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b0ZlYXR1cmVFeHRyYWN0b3IlMkMlMjBXaGlzcGVyTW9kZWwlMEFmcm9tJTIwZGF0YXNldHMlMjBpbXBvcnQlMjBsb2FkX2RhdGFzZXQlMEElMEFtb2RlbCUyMCUzRCUyMFdoaXNwZXJNb2RlbC5mcm9tX3ByZXRyYWluZWQoJTIyb3BlbmFpJTJGd2hpc3Blci1iYXNlJTIyKSUwQWZlYXR1cmVfZXh0cmFjdG9yJTIwJTNEJTIwQXV0b0ZlYXR1cmVFeHRyYWN0b3IuZnJvbV9wcmV0cmFpbmVkKCUyMm9wZW5haSUyRndoaXNwZXItYmFzZSUyMiklMEFkcyUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJoZi1pbnRlcm5hbC10ZXN0aW5nJTJGbGlicmlzcGVlY2hfYXNyX2R1bW15JTIyJTJDJTIwJTIyY2xlYW4lMjIlMkMlMjBzcGxpdCUzRCUyMnZhbGlkYXRpb24lMjIpJTBBaW5wdXRzJTIwJTNEJTIwZmVhdHVyZV9leHRyYWN0b3IoZHMlNUIwJTVEJTVCJTIyYXVkaW8lMjIlNUQlNUIlMjJhcnJheSUyMiU1RCUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBaW5wdXRfZmVhdHVyZXMlMjAlM0QlMjBpbnB1dHMuaW5wdXRfZmVhdHVyZXMlMEFkZWNvZGVyX2lucHV0X2lkcyUyMCUzRCUyMHRvcmNoLnRlbnNvciglNUIlNUIxJTJDJTIwMSU1RCU1RCklMjAqJTIwbW9kZWwuY29uZmlnLmRlY29kZXJfc3RhcnRfdG9rZW5faWQlMEFsYXN0X2hpZGRlbl9zdGF0ZSUyMCUzRCUyMG1vZGVsKGlucHV0X2ZlYXR1cmVzJTJDJTIwZGVjb2Rlcl9pbnB1dF9pZHMlM0RkZWNvZGVyX2lucHV0X2lkcykubGFzdF9oaWRkZW5fc3RhdGUlMEFsaXN0KGxhc3RfaGlkZGVuX3N0YXRlLnNoYXBlKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoFeatureExtractor, WhisperModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>model = WhisperModel.from_pretrained(<span class="hljs-string">"openai/whisper-base"</span>) | |
| <span class="hljs-meta">>>> </span>feature_extractor = AutoFeatureExtractor.from_pretrained(<span class="hljs-string">"openai/whisper-base"</span>) | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">"hf-internal-testing/librispeech_asr_dummy"</span>, <span class="hljs-string">"clean"</span>, split=<span class="hljs-string">"validation"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = feature_extractor(ds[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>], return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-meta">>>> </span>input_features = inputs.input_features | |
| <span class="hljs-meta">>>> </span>decoder_input_ids = torch.tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>]]) * model.config.decoder_start_token_id | |
| <span class="hljs-meta">>>> </span>last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">list</span>(last_hidden_state.shape) | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">2</span>, <span class="hljs-number">512</span>]`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),m(p.$$.fragment)},l(n){t=c(n,"P",{"data-svelte-h":!0}),y(t)!=="svelte-11lpom8"&&(t.textContent=k),d=a(n),u(p.$$.fragment,n)},m(n,v){h(n,t,v),h(n,d,v),f(p,n,v),w=!0},p:j,i(n){w||(_(p.$$.fragment,n),w=!0)},o(n){g(p.$$.fragment,n),w=!1},d(n){n&&(r(t),r(d)),b(p,n)}}}function _i(T){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),y(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,p){h(d,t,p)},p:j,d(d){d&&r(t)}}}function gi(T){let t,k="Example:",d,p,w;return p=new K({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Byb2Nlc3NvciUyQyUyMFdoaXNwZXJGb3JDb25kaXRpb25hbEdlbmVyYXRpb24lMEFmcm9tJTIwZGF0YXNldHMlMjBpbXBvcnQlMjBsb2FkX2RhdGFzZXQlMEElMEFwcm9jZXNzb3IlMjAlM0QlMjBBdXRvUHJvY2Vzc29yLmZyb21fcHJldHJhaW5lZCglMjJvcGVuYWklMkZ3aGlzcGVyLXRpbnkuZW4lMjIpJTBBbW9kZWwlMjAlM0QlMjBXaGlzcGVyRm9yQ29uZGl0aW9uYWxHZW5lcmF0aW9uLmZyb21fcHJldHJhaW5lZCglMjJvcGVuYWklMkZ3aGlzcGVyLXRpbnkuZW4lMjIpJTBBJTBBZHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTIyaGYtaW50ZXJuYWwtdGVzdGluZyUyRmxpYnJpc3BlZWNoX2Fzcl9kdW1teSUyMiUyQyUyMCUyMmNsZWFuJTIyJTJDJTIwc3BsaXQlM0QlMjJ2YWxpZGF0aW9uJTIyKSUwQSUwQWlucHV0cyUyMCUzRCUyMHByb2Nlc3NvcihkcyU1QjAlNUQlNUIlMjJhdWRpbyUyMiU1RCU1QiUyMmFycmF5JTIyJTVEJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEFpbnB1dF9mZWF0dXJlcyUyMCUzRCUyMGlucHV0cy5pbnB1dF9mZWF0dXJlcyUwQSUwQWdlbmVyYXRlZF9pZHMlMjAlM0QlMjBtb2RlbC5nZW5lcmF0ZShpbnB1dHMlM0RpbnB1dF9mZWF0dXJlcyklMEElMEF0cmFuc2NyaXB0aW9uJTIwJTNEJTIwcHJvY2Vzc29yLmJhdGNoX2RlY29kZShnZW5lcmF0ZWRfaWRzJTJDJTIwc2tpcF9zcGVjaWFsX3Rva2VucyUzRFRydWUpJTVCMCU1RCUwQXRyYW5zY3JpcHRpb24=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoProcessor, WhisperForConditionalGeneration | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>processor = AutoProcessor.from_pretrained(<span class="hljs-string">"openai/whisper-tiny.en"</span>) | |
| <span class="hljs-meta">>>> </span>model = WhisperForConditionalGeneration.from_pretrained(<span class="hljs-string">"openai/whisper-tiny.en"</span>) | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">"hf-internal-testing/librispeech_asr_dummy"</span>, <span class="hljs-string">"clean"</span>, split=<span class="hljs-string">"validation"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = processor(ds[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>], return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-meta">>>> </span>input_features = inputs.input_features | |
| <span class="hljs-meta">>>> </span>generated_ids = model.generate(inputs=input_features) | |
| <span class="hljs-meta">>>> </span>transcription = processor.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>transcription | |
| <span class="hljs-string">' Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.'</span>`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),m(p.$$.fragment)},l(n){t=c(n,"P",{"data-svelte-h":!0}),y(t)!=="svelte-11lpom8"&&(t.textContent=k),d=a(n),u(p.$$.fragment,n)},m(n,v){h(n,t,v),h(n,d,v),f(p,n,v),w=!0},p:j,i(n){w||(_(p.$$.fragment,n),w=!0)},o(n){g(p.$$.fragment,n),w=!1},d(n){n&&(r(t),r(d)),b(p,n)}}}function bi(T){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),y(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,p){h(d,t,p)},p:j,d(d){d&&r(t)}}}function yi(T){let t,k="Example:",d,p,w;return p=new K({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b0ZlYXR1cmVFeHRyYWN0b3IlMkMlMjBXaGlzcGVyRm9yQXVkaW9DbGFzc2lmaWNhdGlvbiUwQWZyb20lMjBkYXRhc2V0cyUyMGltcG9ydCUyMGxvYWRfZGF0YXNldCUwQSUwQWZlYXR1cmVfZXh0cmFjdG9yJTIwJTNEJTIwQXV0b0ZlYXR1cmVFeHRyYWN0b3IuZnJvbV9wcmV0cmFpbmVkKCUyMnNhbmNoaXQtZ2FuZGhpJTJGd2hpc3Blci1tZWRpdW0tZmxldXJzLWxhbmctaWQlMjIpJTBBbW9kZWwlMjAlM0QlMjBXaGlzcGVyRm9yQXVkaW9DbGFzc2lmaWNhdGlvbi5mcm9tX3ByZXRyYWluZWQoJTIyc2FuY2hpdC1nYW5kaGklMkZ3aGlzcGVyLW1lZGl1bS1mbGV1cnMtbGFuZy1pZCUyMiklMEElMEFkcyUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJnb29nbGUlMkZmbGV1cnMlMjIlMkMlMjAlMjJhbGwlMjIlMkMlMjBzcGxpdCUzRCUyMnZhbGlkYXRpb24lMjIlMkMlMjBzdHJlYW1pbmclM0RUcnVlKSUwQXNhbXBsZSUyMCUzRCUyMG5leHQoaXRlcihkcykpJTBBJTBBaW5wdXRzJTIwJTNEJTIwZmVhdHVyZV9leHRyYWN0b3IoJTBBJTIwJTIwJTIwJTIwc2FtcGxlJTVCJTIyYXVkaW8lMjIlNUQlNUIlMjJhcnJheSUyMiU1RCUyQyUyMHNhbXBsaW5nX3JhdGUlM0RzYW1wbGUlNUIlMjJhdWRpbyUyMiU1RCU1QiUyMnNhbXBsaW5nX3JhdGUlMjIlNUQlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyJTBBKSUwQWlucHV0X2ZlYXR1cmVzJTIwJTNEJTIwaW5wdXRzLmlucHV0X2ZlYXR1cmVzJTBBJTBBd2l0aCUyMHRvcmNoLm5vX2dyYWQoKSUzQSUwQSUyMCUyMCUyMCUyMGxvZ2l0cyUyMCUzRCUyMG1vZGVsKGlucHV0X2ZlYXR1cmVzKS5sb2dpdHMlMEElMEFwcmVkaWN0ZWRfY2xhc3NfaWRzJTIwJTNEJTIwdG9yY2guYXJnbWF4KGxvZ2l0cykuaXRlbSgpJTBBcHJlZGljdGVkX2xhYmVsJTIwJTNEJTIwbW9kZWwuY29uZmlnLmlkMmxhYmVsJTVCcHJlZGljdGVkX2NsYXNzX2lkcyU1RCUwQXByZWRpY3RlZF9sYWJlbA==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoFeatureExtractor, WhisperForAudioClassification | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>feature_extractor = AutoFeatureExtractor.from_pretrained(<span class="hljs-string">"sanchit-gandhi/whisper-medium-fleurs-lang-id"</span>) | |
| <span class="hljs-meta">>>> </span>model = WhisperForAudioClassification.from_pretrained(<span class="hljs-string">"sanchit-gandhi/whisper-medium-fleurs-lang-id"</span>) | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">"google/fleurs"</span>, <span class="hljs-string">"all"</span>, split=<span class="hljs-string">"validation"</span>, streaming=<span class="hljs-literal">True</span>) | |
| <span class="hljs-meta">>>> </span>sample = <span class="hljs-built_in">next</span>(<span class="hljs-built_in">iter</span>(ds)) | |
| <span class="hljs-meta">>>> </span>inputs = feature_extractor( | |
| <span class="hljs-meta">... </span> sample[<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>], sampling_rate=sample[<span class="hljs-string">"audio"</span>][<span class="hljs-string">"sampling_rate"</span>], return_tensors=<span class="hljs-string">"pt"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>input_features = inputs.input_features | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">with</span> torch.no_grad(): | |
| <span class="hljs-meta">... </span> logits = model(input_features).logits | |
| <span class="hljs-meta">>>> </span>predicted_class_ids = torch.argmax(logits).item() | |
| <span class="hljs-meta">>>> </span>predicted_label = model.config.id2label[predicted_class_ids] | |
| <span class="hljs-meta">>>> </span>predicted_label | |
| <span class="hljs-string">'Afrikaans'</span>`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),m(p.$$.fragment)},l(n){t=c(n,"P",{"data-svelte-h":!0}),y(t)!=="svelte-11lpom8"&&(t.textContent=k),d=a(n),u(p.$$.fragment,n)},m(n,v){h(n,t,v),h(n,d,v),f(p,n,v),w=!0},p:j,i(n){w||(_(p.$$.fragment,n),w=!0)},o(n){g(p.$$.fragment,n),w=!1},d(n){n&&(r(t),r(d)),b(p,n)}}}function ki(T){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),y(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,p){h(d,t,p)},p:j,d(d){d&&r(t)}}}function wi(T){let t,k="Example:",d,p,w;return p=new K({props:{code:"aW1wb3J0JTIwdGVuc29yZmxvdyUyMGFzJTIwdGYlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwVEZXaGlzcGVyTW9kZWwlMkMlMjBBdXRvRmVhdHVyZUV4dHJhY3RvciUwQWZyb20lMjBkYXRhc2V0cyUyMGltcG9ydCUyMGxvYWRfZGF0YXNldCUwQSUwQW1vZGVsJTIwJTNEJTIwVEZXaGlzcGVyTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMm9wZW5haSUyRndoaXNwZXItYmFzZSUyMiklMEFmZWF0dXJlX2V4dHJhY3RvciUyMCUzRCUyMEF1dG9GZWF0dXJlRXh0cmFjdG9yLmZyb21fcHJldHJhaW5lZCglMjJvcGVuYWklMkZ3aGlzcGVyLWJhc2UlMjIpJTBBZHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTIyaGYtaW50ZXJuYWwtdGVzdGluZyUyRmxpYnJpc3BlZWNoX2Fzcl9kdW1teSUyMiUyQyUyMCUyMmNsZWFuJTIyJTJDJTIwc3BsaXQlM0QlMjJ2YWxpZGF0aW9uJTIyKSUwQWlucHV0cyUyMCUzRCUyMGZlYXR1cmVfZXh0cmFjdG9yKGRzJTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVEJTVCJTIyYXJyYXklMjIlNUQlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnRmJTIyKSUwQWlucHV0X2ZlYXR1cmVzJTIwJTNEJTIwaW5wdXRzLmlucHV0X2ZlYXR1cmVzJTBBZGVjb2Rlcl9pbnB1dF9pZHMlMjAlM0QlMjB0Zi5jb252ZXJ0X3RvX3RlbnNvciglNUIlNUIxJTJDJTIwMSU1RCU1RCklMjAqJTIwbW9kZWwuY29uZmlnLmRlY29kZXJfc3RhcnRfdG9rZW5faWQlMEFsYXN0X2hpZGRlbl9zdGF0ZSUyMCUzRCUyMG1vZGVsKGlucHV0X2ZlYXR1cmVzJTJDJTIwZGVjb2Rlcl9pbnB1dF9pZHMlM0RkZWNvZGVyX2lucHV0X2lkcykubGFzdF9oaWRkZW5fc3RhdGUlMEFsaXN0KGxhc3RfaGlkZGVuX3N0YXRlLnNoYXBlKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> TFWhisperModel, AutoFeatureExtractor | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>model = TFWhisperModel.from_pretrained(<span class="hljs-string">"openai/whisper-base"</span>) | |
| <span class="hljs-meta">>>> </span>feature_extractor = AutoFeatureExtractor.from_pretrained(<span class="hljs-string">"openai/whisper-base"</span>) | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">"hf-internal-testing/librispeech_asr_dummy"</span>, <span class="hljs-string">"clean"</span>, split=<span class="hljs-string">"validation"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = feature_extractor(ds[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>], return_tensors=<span class="hljs-string">"tf"</span>) | |
| <span class="hljs-meta">>>> </span>input_features = inputs.input_features | |
| <span class="hljs-meta">>>> </span>decoder_input_ids = tf.convert_to_tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>]]) * model.config.decoder_start_token_id | |
| <span class="hljs-meta">>>> </span>last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">list</span>(last_hidden_state.shape) | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">2</span>, <span class="hljs-number">512</span>]`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),m(p.$$.fragment)},l(n){t=c(n,"P",{"data-svelte-h":!0}),y(t)!=="svelte-11lpom8"&&(t.textContent=k),d=a(n),u(p.$$.fragment,n)},m(n,v){h(n,t,v),h(n,d,v),f(p,n,v),w=!0},p:j,i(n){w||(_(p.$$.fragment,n),w=!0)},o(n){g(p.$$.fragment,n),w=!1},d(n){n&&(r(t),r(d)),b(p,n)}}}function vi(T){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),y(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,p){h(d,t,p)},p:j,d(d){d&&r(t)}}}function Ti(T){let t,k="Example:",d,p,w;return p=new K({props:{code:"aW1wb3J0JTIwdGVuc29yZmxvdyUyMGFzJTIwdGYlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Byb2Nlc3NvciUyQyUyMFRGV2hpc3BlckZvckNvbmRpdGlvbmFsR2VuZXJhdGlvbiUwQWZyb20lMjBkYXRhc2V0cyUyMGltcG9ydCUyMGxvYWRfZGF0YXNldCUwQSUwQXByb2Nlc3NvciUyMCUzRCUyMEF1dG9Qcm9jZXNzb3IuZnJvbV9wcmV0cmFpbmVkKCUyMm9wZW5haSUyRndoaXNwZXItdGlueS5lbiUyMiklMEFtb2RlbCUyMCUzRCUyMFRGV2hpc3BlckZvckNvbmRpdGlvbmFsR2VuZXJhdGlvbi5mcm9tX3ByZXRyYWluZWQoJTIyb3BlbmFpJTJGd2hpc3Blci10aW55LmVuJTIyKSUwQSUwQWRzJTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMmhmLWludGVybmFsLXRlc3RpbmclMkZsaWJyaXNwZWVjaF9hc3JfZHVtbXklMjIlMkMlMjAlMjJjbGVhbiUyMiUyQyUyMHNwbGl0JTNEJTIydmFsaWRhdGlvbiUyMiklMEElMEFpbnB1dHMlMjAlM0QlMjBwcm9jZXNzb3IoZHMlNUIwJTVEJTVCJTIyYXVkaW8lMjIlNUQlNUIlMjJhcnJheSUyMiU1RCUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIydGYlMjIpJTBBaW5wdXRfZmVhdHVyZXMlMjAlM0QlMjBpbnB1dHMuaW5wdXRfZmVhdHVyZXMlMEElMEFnZW5lcmF0ZWRfaWRzJTIwJTNEJTIwbW9kZWwuZ2VuZXJhdGUoaW5wdXRfZmVhdHVyZXMlM0RpbnB1dF9mZWF0dXJlcyklMEElMEF0cmFuc2NyaXB0aW9uJTIwJTNEJTIwcHJvY2Vzc29yLmJhdGNoX2RlY29kZShnZW5lcmF0ZWRfaWRzJTJDJTIwc2tpcF9zcGVjaWFsX3Rva2VucyUzRFRydWUpJTVCMCU1RCUwQXRyYW5zY3JpcHRpb24=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoProcessor, TFWhisperForConditionalGeneration | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>processor = AutoProcessor.from_pretrained(<span class="hljs-string">"openai/whisper-tiny.en"</span>) | |
| <span class="hljs-meta">>>> </span>model = TFWhisperForConditionalGeneration.from_pretrained(<span class="hljs-string">"openai/whisper-tiny.en"</span>) | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">"hf-internal-testing/librispeech_asr_dummy"</span>, <span class="hljs-string">"clean"</span>, split=<span class="hljs-string">"validation"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = processor(ds[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>], return_tensors=<span class="hljs-string">"tf"</span>) | |
| <span class="hljs-meta">>>> </span>input_features = inputs.input_features | |
| <span class="hljs-meta">>>> </span>generated_ids = model.generate(input_features=input_features) | |
| <span class="hljs-meta">>>> </span>transcription = processor.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>transcription | |
| <span class="hljs-string">' Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.'</span>`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),m(p.$$.fragment)},l(n){t=c(n,"P",{"data-svelte-h":!0}),y(t)!=="svelte-11lpom8"&&(t.textContent=k),d=a(n),u(p.$$.fragment,n)},m(n,v){h(n,t,v),h(n,d,v),f(p,n,v),w=!0},p:j,i(n){w||(_(p.$$.fragment,n),w=!0)},o(n){g(p.$$.fragment,n),w=!1},d(n){n&&(r(t),r(d)),b(p,n)}}}function xi(T){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),y(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,p){h(d,t,p)},p:j,d(d){d&&r(t)}}}function $i(T){let t,k="Example:",d,p,w;return p=new K({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBGbGF4V2hpc3Blck1vZGVsJTBBJTBBdG9rZW5pemVyJTIwJTNEJTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQoJTIyb3BlbmFpJTJGd2hpc3Blci10aW55JTIyKSUwQW1vZGVsJTIwJTNEJTIwRmxheFdoaXNwZXJNb2RlbC5mcm9tX3ByZXRyYWluZWQoJTIyb3BlbmFpJTJGd2hpc3Blci10aW55JTIyKSUwQSUwQWlucHV0cyUyMCUzRCUyMHRva2VuaXplciglMjJIZWxsbyUyQyUyMG15JTIwZG9nJTIwaXMlMjBjdXRlJTIyJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJqYXglMjIpJTBBb3V0cHV0cyUyMCUzRCUyMG1vZGVsKCoqaW5wdXRzKSUwQSUwQWxhc3RfaGlkZGVuX3N0YXRlcyUyMCUzRCUyMG91dHB1dHMubGFzdF9oaWRkZW5fc3RhdGU=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, FlaxWhisperModel | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"openai/whisper-tiny"</span>) | |
| <span class="hljs-meta">>>> </span>model = FlaxWhisperModel.from_pretrained(<span class="hljs-string">"openai/whisper-tiny"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = tokenizer(<span class="hljs-string">"Hello, my dog is cute"</span>, return_tensors=<span class="hljs-string">"jax"</span>) | |
| <span class="hljs-meta">>>> </span>outputs = model(**inputs) | |
| <span class="hljs-meta">>>> </span>last_hidden_states = outputs.last_hidden_state`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),m(p.$$.fragment)},l(n){t=c(n,"P",{"data-svelte-h":!0}),y(t)!=="svelte-11lpom8"&&(t.textContent=k),d=a(n),u(p.$$.fragment,n)},m(n,v){h(n,t,v),h(n,d,v),f(p,n,v),w=!0},p:j,i(n){w||(_(p.$$.fragment,n),w=!0)},o(n){g(p.$$.fragment,n),w=!1},d(n){n&&(r(t),r(d)),b(p,n)}}}function Wi(T){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),y(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,p){h(d,t,p)},p:j,d(d){d&&r(t)}}}function Mi(T){let t,k="Transcription example:",d,p,w;return p=new K({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFdoaXNwZXJQcm9jZXNzb3IlMkMlMjBGbGF4V2hpc3BlckZvckNvbmRpdGlvbmFsR2VuZXJhdGlvbiUwQWZyb20lMjBkYXRhc2V0cyUyMGltcG9ydCUyMGxvYWRfZGF0YXNldCUwQSUwQXByb2Nlc3NvciUyMCUzRCUyMFdoaXNwZXJQcm9jZXNzb3IuZnJvbV9wcmV0cmFpbmVkKCUyMm9wZW5haSUyRndoaXNwZXItdGlueS5lbiUyMiklMEFtb2RlbCUyMCUzRCUyMEZsYXhXaGlzcGVyRm9yQ29uZGl0aW9uYWxHZW5lcmF0aW9uLmZyb21fcHJldHJhaW5lZCglMjJvcGVuYWklMkZ3aGlzcGVyLXRpbnkuZW4lMjIlMkMlMjBmcm9tX3B0JTNEVHJ1ZSklMEFkcyUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJoZi1pbnRlcm5hbC10ZXN0aW5nJTJGbGlicmlzcGVlY2hfYXNyX2R1bW15JTIyJTJDJTIwJTIyY2xlYW4lMjIlMkMlMjBzcGxpdCUzRCUyMnZhbGlkYXRpb24lMjIpJTBBaW5wdXRzJTIwJTNEJTIwcHJvY2Vzc29yKGRzJTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVEJTVCJTIyYXJyYXklMjIlNUQlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMm5wJTIyKSUwQWlucHV0X2ZlYXR1cmVzJTIwJTNEJTIwaW5wdXRzLmlucHV0X2ZlYXR1cmVzJTBBZ2VuZXJhdGVkX2lkcyUyMCUzRCUyMG1vZGVsLmdlbmVyYXRlKGlucHV0X2lkcyUzRGlucHV0X2ZlYXR1cmVzKSUwQXRyYW5zY3JpcHRpb24lMjAlM0QlMjBwcm9jZXNzb3IuYmF0Y2hfZGVjb2RlKGdlbmVyYXRlZF9pZHMlMkMlMjBza2lwX3NwZWNpYWxfdG9rZW5zJTNEVHJ1ZSklNUIwJTVEJTBBdHJhbnNjcmlwdGlvbg==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> WhisperProcessor, FlaxWhisperForConditionalGeneration | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>processor = WhisperProcessor.from_pretrained(<span class="hljs-string">"openai/whisper-tiny.en"</span>) | |
| <span class="hljs-meta">>>> </span>model = FlaxWhisperForConditionalGeneration.from_pretrained(<span class="hljs-string">"openai/whisper-tiny.en"</span>, from_pt=<span class="hljs-literal">True</span>) | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">"hf-internal-testing/librispeech_asr_dummy"</span>, <span class="hljs-string">"clean"</span>, split=<span class="hljs-string">"validation"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = processor(ds[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>], return_tensors=<span class="hljs-string">"np"</span>) | |
| <span class="hljs-meta">>>> </span>input_features = inputs.input_features | |
| <span class="hljs-meta">>>> </span>generated_ids = model.generate(input_ids=input_features) | |
| <span class="hljs-meta">>>> </span>transcription = processor.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>transcription | |
| <span class="hljs-string">' Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.'</span>`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),m(p.$$.fragment)},l(n){t=c(n,"P",{"data-svelte-h":!0}),y(t)!=="svelte-yrk4pw"&&(t.textContent=k),d=a(n),u(p.$$.fragment,n)},m(n,v){h(n,t,v),h(n,d,v),f(p,n,v),w=!0},p:j,i(n){w||(_(p.$$.fragment,n),w=!0)},o(n){g(p.$$.fragment,n),w=!1},d(n){n&&(r(t),r(d)),b(p,n)}}}function Fi(T){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),y(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,p){h(d,t,p)},p:j,d(d){d&&r(t)}}}function zi(T){let t,k="Transcription example:",d,p,w;return p=new K({props:{code:"aW1wb3J0JTIwamF4Lm51bXB5JTIwYXMlMjBqbnAlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b0ZlYXR1cmVFeHRyYWN0b3IlMkMlMjBGbGF4V2hpc3BlckZvckF1ZGlvQ2xhc3NpZmljYXRpb24lMEFmcm9tJTIwZGF0YXNldHMlMjBpbXBvcnQlMjBsb2FkX2RhdGFzZXQlMEElMEFmZWF0dXJlX2V4dHJhY3RvciUyMCUzRCUyMEF1dG9GZWF0dXJlRXh0cmFjdG9yLmZyb21fcHJldHJhaW5lZCglMjJzYW5jaGl0LWdhbmRoaSUyRndoaXNwZXItbWVkaXVtLWZsZXVycy1sYW5nLWlkJTIyKSUwQW1vZGVsJTIwJTNEJTIwRmxheFdoaXNwZXJGb3JBdWRpb0NsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJzYW5jaGl0LWdhbmRoaSUyRndoaXNwZXItbWVkaXVtLWZsZXVycy1sYW5nLWlkJTIyJTJDJTIwZnJvbV9wdCUzRFRydWUlMEEpJTBBZHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTIyZ29vZ2xlJTJGZmxldXJzJTIyJTJDJTIwJTIyYWxsJTIyJTJDJTIwc3BsaXQlM0QlMjJ2YWxpZGF0aW9uJTIyJTJDJTIwc3RyZWFtaW5nJTNEVHJ1ZSUyQyUyMHRydXN0X3JlbW90ZV9jb2RlJTNEVHJ1ZSklMEElMEFzYW1wbGUlMjAlM0QlMjBuZXh0KGl0ZXIoZHMpKSUwQSUwQWlucHV0cyUyMCUzRCUyMGZlYXR1cmVfZXh0cmFjdG9yKCUwQSUyMCUyMCUyMCUyMHNhbXBsZSU1QiUyMmF1ZGlvJTIyJTVEJTVCJTIyYXJyYXklMjIlNUQlMkMlMjBzYW1wbGluZ19yYXRlJTNEc2FtcGxlJTVCJTIyYXVkaW8lMjIlNUQlNUIlMjJzYW1wbGluZ19yYXRlJTIyJTVEJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJucCUyMiUwQSklMEFpbnB1dF9mZWF0dXJlcyUyMCUzRCUyMGlucHV0cy5pbnB1dF9mZWF0dXJlcyUwQSUwQWxvZ2l0cyUyMCUzRCUyMG1vZGVsKGlucHV0X2ZlYXR1cmVzKS5sb2dpdHMlMEElMEFwcmVkaWN0ZWRfY2xhc3NfaWRzJTIwJTNEJTIwam5wLmFyZ21heChsb2dpdHMpLml0ZW0oKSUwQXByZWRpY3RlZF9sYWJlbCUyMCUzRCUyMG1vZGVsLmNvbmZpZy5pZDJsYWJlbCU1QnByZWRpY3RlZF9jbGFzc19pZHMlNUQlMEFwcmVkaWN0ZWRfbGFiZWw=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> jax.numpy <span class="hljs-keyword">as</span> jnp | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoFeatureExtractor, FlaxWhisperForAudioClassification | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>feature_extractor = AutoFeatureExtractor.from_pretrained(<span class="hljs-string">"sanchit-gandhi/whisper-medium-fleurs-lang-id"</span>) | |
| <span class="hljs-meta">>>> </span>model = FlaxWhisperForAudioClassification.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"sanchit-gandhi/whisper-medium-fleurs-lang-id"</span>, from_pt=<span class="hljs-literal">True</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">"google/fleurs"</span>, <span class="hljs-string">"all"</span>, split=<span class="hljs-string">"validation"</span>, streaming=<span class="hljs-literal">True</span>, trust_remote_code=<span class="hljs-literal">True</span>) | |
| <span class="hljs-meta">>>> </span>sample = <span class="hljs-built_in">next</span>(<span class="hljs-built_in">iter</span>(ds)) | |
| <span class="hljs-meta">>>> </span>inputs = feature_extractor( | |
| <span class="hljs-meta">... </span> sample[<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>], sampling_rate=sample[<span class="hljs-string">"audio"</span>][<span class="hljs-string">"sampling_rate"</span>], return_tensors=<span class="hljs-string">"np"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>input_features = inputs.input_features | |
| <span class="hljs-meta">>>> </span>logits = model(input_features).logits | |
| <span class="hljs-meta">>>> </span>predicted_class_ids = jnp.argmax(logits).item() | |
| <span class="hljs-meta">>>> </span>predicted_label = model.config.id2label[predicted_class_ids] | |
| <span class="hljs-meta">>>> </span>predicted_label | |
| <span class="hljs-string">'af_za'</span>`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),m(p.$$.fragment)},l(n){t=c(n,"P",{"data-svelte-h":!0}),y(t)!=="svelte-yrk4pw"&&(t.textContent=k),d=a(n),u(p.$$.fragment,n)},m(n,v){h(n,t,v),h(n,d,v),f(p,n,v),w=!0},p:j,i(n){w||(_(p.$$.fragment,n),w=!0)},o(n){g(p.$$.fragment,n),w=!1},d(n){n&&(r(t),r(d)),b(p,n)}}}function ji(T){let t,k,d,p,w,n,v,Wn,it,Ka='Whisper 모델은 Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, Ilya Sutskever에 의해 <a href="https://cdn.openai.com/papers/whisper.pdf" rel="nofollow">Robust Speech Recognition via Large-Scale Weak Supervision</a>에서 제안되었습니다.',Mn,dt,er="논문의 초록은 다음과 같습니다:",Fn,lt,tr="<em>우리는 인터넷에서 대량의 오디오를 글로 옮긴 것을 예측하도록 간단히 훈련된 음성 처리 시스템의 성능을 연구합니다. 68만 시간의 다국어 및 다중 작업 지도(multitask supervision)에 확장했을 때, 결과 모델은 표준 벤치마크에 잘 일반화되며, 미세 조정이 필요 없는 제로샷 전송 설정에서 이전의 완전히 지도된(fully-supervised) 결과와 경쟁할 수 있는 경우가 많습니다. 사람과 비교하면, 이 모델은 사람의 정확도와 견고성에 근접합니다. 우리는 강력한 음성 처리를 위한 추가 작업의 기반이 될 모델과 추론 코드를 공개합니다.</em>",zn,ct,or="팁:",jn,pt,nr='<li><p>이 모델은 일반적으로 별도의 미세 조정 없이도 잘 작동합니다.</p></li> <li><p>아키텍처는 고전적인 인코더-디코더 아키텍처를 따르기 때문에, 추론을 위해 <code>generate()</code> 함수를 사용합니다.</p></li> <li><p>현재 추론은 짧은 형식에만 구현되어 있으며, 오디오는 30초 미만의 세그먼트로 미리 분할되어야 합니다. 타임스탬프를 포함한 긴 형식에 대한 추론은 향후 릴리스에서 구현될 예정입니다.</p></li> <li><p><a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperProcessor">WhisperProcessor</a>를 사용하여 모델에 사용할 오디오를 준비하고, 예측된 ID를 텍스트로 디코딩할 수 있습니다.</p></li> <li><p>모델과 프로세서를 변환하려면 다음을 사용하는 것이 좋습니다:</p></li>',Cn,ht,Jn,mt,sr=`스크립트는 OpenAI 체크포인트에서 필요한 모든 매개변수를 자동으로 결정합니다. OpenAI 변환을 수행하려면 <code>tiktoken</code> 라이브러리를 설치해야 합니다. | |
| 라이브러리를 설치해야 OpenAI 토큰화기를 <code>tokenizers</code> 버전으로 변환할 수 있습니다.`,qn,ut,ar=`이 모델은 <a href="https://huggingface.co/ArthurZ" rel="nofollow">Arthur Zucker</a>에 의해 제공되었습니다. 이 모델의 Tensorflow 버전은 <a href="https://huggingface.co/amyeroberts" rel="nofollow">amyeroberts</a>에 의해 제공되었습니다. | |
| 원본 코드는 <a href="https://github.com/openai/whisper" rel="nofollow">여기</a>에서 찾을 수 있습니다.`,In,ft,Gn,Z,_t,cs,vo,rr=`This is the configuration class to store the configuration of a <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperModel">WhisperModel</a>. It is used to instantiate a | |
| Whisper model according to the specified arguments, defining the model architecture. Instantiating a configuration | |
| with the defaults will yield a similar configuration to that of the Whisper | |
| <a href="https://huggingface.co/openai/whisper-tiny" rel="nofollow">openai/whisper-tiny</a> architecture.`,ps,To,ir=`Configuration objects inherit from <code>PretrainedConfig</code> and can be used to control the model outputs. Read the | |
| documentation from <code>PretrainedConfig</code> for more information.`,hs,ge,Un,gt,Zn,M,bt,ms,xo,dr="Construct a Whisper tokenizer.",us,$o,lr=`This tokenizer inherits from <code>PreTrainedTokenizer</code> which contains some of the main methods. Users should refer to | |
| the superclass for more information regarding such methods.`,fs,ne,yt,_s,Wo,cr="Override the prefix tokens appended to the start of the label sequence. This method can be used standalone to",gs,be,bs,ye,kt,ys,Mo,pr="Build model inputs from a sequence by appending eos_token_id.",ks,ke,wt,ws,Fo,hr=`Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding | |
| special tokens using the tokenizer <code>prepare_for_model</code> method.`,vs,se,vt,Ts,zo,mr=`Create the token type IDs corresponding to the sequences passed. <a href="../glossary#token-type-ids">What are token type | |
| IDs?</a>`,xs,jo,ur="Should be overridden in a subclass if the model has a special way of building those.",$s,Co,Tt,Nn,xt,Xn,F,$t,Ws,Jo,fr="Construct a “fast” Whisper tokenizer (backed by HuggingFace’s <em>tokenizers</em> library).",Ms,qo,_r=`This tokenizer inherits from <code>PreTrainedTokenizerFast</code> which contains most of the main methods. Users should | |
| refer to this superclass for more information regarding those methods.`,Fs,ae,Wt,zs,Io,gr="Override the prefix tokens appended to the start of the label sequence. This method can be used standalone to",js,we,Cs,ve,Mt,Js,Go,br="Build model inputs from a sequence by appending eos_token_id.",qs,Te,Ft,Is,Uo,yr=`Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding | |
| special tokens using the tokenizer <code>prepare_for_model</code> method.`,Gs,re,zt,Us,Zo,kr=`Create the token type IDs corresponding to the sequences passed. <a href="../glossary#token-type-ids">What are token type | |
| IDs?</a>`,Zs,No,wr="Should be overridden in a subclass if the model has a special way of building those.",Ns,Xo,jt,Rn,Ct,Vn,I,Jt,Xs,Ro,vr="Constructs a Whisper feature extractor.",Rs,Vo,Tr=`This feature extractor inherits from <code>SequenceFeatureExtractor</code> which contains | |
| most of the main methods. Users should refer to this superclass for more information regarding those methods.`,Vs,Lo,xr="This class extracts mel-filter bank features from raw speech using a custom numpy implementation of the <code>Short Time Fourier Transform</code> which should match pytorch’s <code>torch.stft</code> equivalent.",Ls,xe,qt,Hs,Ho,$r=`Main method to featurize and prepare for the model one or several sequence(s). Implementation uses PyTorch for | |
| the STFT computation if available, otherwise a slower NumPy based one.`,Ln,It,Hn,z,Gt,Es,Eo,Wr=`Constructs a Whisper processor which wraps a Whisper feature extractor and a Whisper tokenizer into a single | |
| processor.`,Bs,Bo,Mr=`<a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperProcessor">WhisperProcessor</a> offers all the functionalities of <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor">WhisperFeatureExtractor</a> and <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperTokenizer">WhisperTokenizer</a>. See | |
| the <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperProcessor.__call__"><strong>call</strong>()</a> and <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperProcessor.decode">decode()</a> for more information.`,Ss,$e,Ut,Ps,So,Fr=`Forwards the <code>audio</code> argument to WhisperFeatureExtractor’s <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor.__call__"><strong>call</strong>()</a> and the <code>text</code> | |
| argument to <code>__call__()</code>. Please refer to the doctsring of the above two methods for more | |
| information.`,Ys,ie,Zt,As,Po,zr="Instantiate a processor associated with a pretrained model.",Ds,We,Qs,de,Nt,Os,Yo,jr=`Saves the attributes of this processor (feature extractor, tokenizer…) in the specified directory so that it | |
| can be reloaded using the <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperProcessor.from_pretrained">from_pretrained()</a> method.`,Ks,Me,ea,Fe,Xt,ta,Ao,Cr=`This method forwards all its arguments to WhisperTokenizer’s <code>batch_decode()</code>. Please | |
| refer to the docstring of this method for more information.`,oa,ze,Rt,na,Do,Jr=`This method forwards all its arguments to WhisperTokenizer’s <code>decode()</code>. Please refer to | |
| the docstring of this method for more information.`,En,Vt,Bn,G,Lt,sa,Qo,qr=`The bare Whisper Model outputting raw hidden-states without any specific head on top. | |
| This model inherits from <code>PreTrainedModel</code>. Check the superclass documentation for the generic methods the | |
| library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.)`,aa,Oo,Ir=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass. | |
| Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage | |
| and behavior.`,ra,B,Ht,ia,Ko,Gr='The <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperModel">WhisperModel</a> forward method, overrides the <code>__call__</code> special method.',da,je,la,Ce,ca,Je,Et,pa,en,Ur=`Masks extracted features along time axis and/or along feature axis according to | |
| <a href="https://arxiv.org/abs/1904.08779" rel="nofollow">SpecAugment</a>.`,Sn,Bt,Pn,N,St,ha,tn,Zr=`The Whisper Model with a language modeling head. Can be used for automatic speech recognition. | |
| This model inherits from <code>PreTrainedModel</code>. Check the superclass documentation for the generic methods the | |
| library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.)`,ma,on,Nr=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass. | |
| Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage | |
| and behavior.`,ua,S,Pt,fa,nn,Xr='The <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperForConditionalGeneration">WhisperForConditionalGeneration</a> forward method, overrides the <code>__call__</code> special method.',_a,qe,ga,Ie,Yn,Yt,An,ee,At,ba,sn,Rr=`Whisper Encoder Model with a sequence classification head on top (a linear layer over the pooled output) for tasks | |
| like SUPERB Keyword Spotting.`,ya,P,Dt,ka,an,Vr='The <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperForAudioClassification">WhisperForAudioClassification</a> forward method, overrides the <code>__call__</code> special method.',wa,Ge,va,Ue,Dn,Qt,Qn,X,Ot,Ta,rn,Lr=`The bare Whisper Model outputting raw hidden-states without any specific head on top. | |
| This model inherits from <code>TFPreTrainedModel</code>. Check the superclass documentation for the generic methods the | |
| library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.)`,xa,dn,Hr=`This model is also a <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model" rel="nofollow">keras.Model</a> subclass. Use it | |
| as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and | |
| behavior.`,$a,Y,Kt,Wa,ln,Er='The <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.TFWhisperModel">TFWhisperModel</a> forward method, overrides the <code>__call__</code> special method.',Ma,Ze,Fa,Ne,On,eo,Kn,R,to,za,cn,Br=`The Whisper Model with a language modeling head. Can be used for automatic speech recognition. | |
| This model inherits from <code>TFPreTrainedModel</code>. Check the superclass documentation for the generic methods the | |
| library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.)`,ja,pn,Sr=`This model is also a <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model" rel="nofollow">keras.Model</a> subclass. Use it | |
| as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and | |
| behavior.`,Ca,A,oo,Ja,hn,Pr='The <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.TFWhisperForConditionalGeneration">TFWhisperForConditionalGeneration</a> forward method, overrides the <code>__call__</code> special method.',qa,Xe,Ia,Re,es,no,ts,V,so,Ga,mn,Yr=`The bare Whisper Model transformer outputting raw hidden-states without any specific head on top. | |
| This model inherits from <code>FlaxPreTrainedModel</code>. Check the superclass documentation for the generic methods the | |
| library implements for all its models (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.) This model is also a Flax Linen | |
| <a href="https://flax.readthedocs.io/en/latest/_autosummary/flax.nn.module.html" rel="nofollow">flax.nn.Module</a> subclass. Use it as a | |
| regular Flax Module and refer to the Flax documentation for all matter related to general usage and behavior. | |
| Finally, this model supports inherent JAX features such as:`,Ua,un,Ar='<li><a href="https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit" rel="nofollow">Just-In-Time (JIT) compilation</a></li> <li><a href="https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation" rel="nofollow">Automatic Differentiation</a></li> <li><a href="https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap" rel="nofollow">Vectorization</a></li> <li><a href="https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap" rel="nofollow">Parallelization</a></li>',Za,D,ao,Na,fn,Dr="The <code>FlaxWhisperPreTrainedModel</code> forward method, overrides the <code>__call__</code> special method.",Xa,Ve,Ra,Le,os,ro,ns,L,io,Va,_n,Qr=`The Whisper Model with a language modeling head. | |
| This model inherits from <code>FlaxPreTrainedModel</code>. Check the superclass documentation for the generic methods the | |
| library implements for all its models (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.) This model is also a Flax Linen | |
| <a href="https://flax.readthedocs.io/en/latest/_autosummary/flax.nn.module.html" rel="nofollow">flax.nn.Module</a> subclass. Use it as a | |
| regular Flax Module and refer to the Flax documentation for all matter related to general usage and behavior. | |
| Finally, this model supports inherent JAX features such as:`,La,gn,Or='<li><a href="https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit" rel="nofollow">Just-In-Time (JIT) compilation</a></li> <li><a href="https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation" rel="nofollow">Automatic Differentiation</a></li> <li><a href="https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap" rel="nofollow">Vectorization</a></li> <li><a href="https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap" rel="nofollow">Parallelization</a></li>',Ha,Q,lo,Ea,bn,Kr="The <code>FlaxWhisperPreTrainedModel</code> forward method, overrides the <code>__call__</code> special method.",Ba,He,Sa,Ee,ss,co,as,H,po,Pa,yn,ei=`The Whisper Model with an audio classification head on top. | |
| This model inherits from <code>FlaxPreTrainedModel</code>. Check the superclass documentation for the generic methods the | |
| library implements for all its models (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.) This model is also a Flax Linen | |
| <a href="https://flax.readthedocs.io/en/latest/_autosummary/flax.nn.module.html" rel="nofollow">flax.nn.Module</a> subclass. Use it as a | |
| regular Flax Module and refer to the Flax documentation for all matter related to general usage and behavior. | |
| Finally, this model supports inherent JAX features such as:`,Ya,kn,ti='<li><a href="https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit" rel="nofollow">Just-In-Time (JIT) compilation</a></li> <li><a href="https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation" rel="nofollow">Automatic Differentiation</a></li> <li><a href="https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap" rel="nofollow">Vectorization</a></li> <li><a href="https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap" rel="nofollow">Parallelization</a></li>',Aa,O,ho,Da,wn,oi='The <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.FlaxWhisperForAudioClassification">FlaxWhisperForAudioClassification</a> forward method, overrides the <code>__call__</code> special method.',Qa,Be,Oa,Se,rs,mo,is,Tn,ds;return w=new U({props:{title:"Whisper",local:"whisper",headingTag:"h1"}}),v=new U({props:{title:"개요",local:"overview",headingTag:"h2"}}),ht=new K({props:{code:"cHl0aG9uJTIwc3JjJTJGdHJhbnNmb3JtZXJzJTJGbW9kZWxzJTJGd2hpc3BlciUyRmNvbnZlcnRfb3BlbmFpX3RvX2hmLnB5JTIwLS1jaGVja3BvaW50X3BhdGglMjAlMjIlMjIlMjAtLXB5dG9yY2hfZHVtcF9mb2xkZXJfcGF0aCUyMCUyMkFydGh1ciUyRndoaXNwZXItMyUyMiUyMC0tY29udmVydF9wcmVwcm9jZXNzb3IlMjBUcnVl",highlighted:'python src/transformers/models/whisper/convert_openai_to_hf.py --checkpoint_path <span class="hljs-string">""</span> --pytorch_dump_folder_path <span class="hljs-string">"Arthur/whisper-3"</span> --convert_preprocessor True',wrap:!1}}),ft=new U({props:{title:"WhisperConfig",local:"whisperconfig ][ transformers.WhisperConfig",headingTag:"h2"}}),_t=new W({props:{name:"class transformers.WhisperConfig",anchor:"transformers.WhisperConfig",parameters:[{name:"vocab_size",val:" = 51865"},{name:"num_mel_bins",val:" = 80"},{name:"encoder_layers",val:" = 4"},{name:"encoder_attention_heads",val:" = 6"},{name:"decoder_layers",val:" = 4"},{name:"decoder_attention_heads",val:" = 6"},{name:"decoder_ffn_dim",val:" = 1536"},{name:"encoder_ffn_dim",val:" = 1536"},{name:"encoder_layerdrop",val:" = 0.0"},{name:"decoder_layerdrop",val:" = 0.0"},{name:"decoder_start_token_id",val:" = 50257"},{name:"use_cache",val:" = True"},{name:"is_encoder_decoder",val:" = True"},{name:"activation_function",val:" = 'gelu'"},{name:"d_model",val:" = 384"},{name:"dropout",val:" = 0.0"},{name:"attention_dropout",val:" = 0.0"},{name:"activation_dropout",val:" = 0.0"},{name:"init_std",val:" = 0.02"},{name:"scale_embedding",val:" = False"},{name:"max_source_positions",val:" = 1500"},{name:"max_target_positions",val:" = 448"},{name:"pad_token_id",val:" = 50256"},{name:"bos_token_id",val:" = 50256"},{name:"eos_token_id",val:" = 50256"},{name:"suppress_tokens",val:" = None"},{name:"begin_suppress_tokens",val:" = [220, 50256]"},{name:"use_weighted_layer_sum",val:" = False"},{name:"classifier_proj_size",val:" = 256"},{name:"apply_spec_augment",val:" = False"},{name:"mask_time_prob",val:" = 0.05"},{name:"mask_time_length",val:" = 10"},{name:"mask_time_min_masks",val:" = 2"},{name:"mask_feature_prob",val:" = 0.0"},{name:"mask_feature_length",val:" = 10"},{name:"mask_feature_min_masks",val:" = 0"},{name:"median_filter_width",val:" = 7"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.WhisperConfig.vocab_size",description:`<strong>vocab_size</strong> (<code>int</code>, <em>optional</em>, defaults to 51865) — | |
| Vocabulary size of the Whisper model. Defines the number of different tokens that can be represented by the | |
| <code>decoder_input_ids</code> passed when calling <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperModel">WhisperModel</a>`,name:"vocab_size"},{anchor:"transformers.WhisperConfig.num_mel_bins",description:`<strong>num_mel_bins</strong> (<code>int</code>, <em>optional</em>, defaults to 80) — | |
| Number of mel features used per input features. Should correspond to the value used in the | |
| <code>WhisperProcessor</code> class.`,name:"num_mel_bins"},{anchor:"transformers.WhisperConfig.encoder_layers",description:`<strong>encoder_layers</strong> (<code>int</code>, <em>optional</em>, defaults to 4) — | |
| Number of encoder layers.`,name:"encoder_layers"},{anchor:"transformers.WhisperConfig.decoder_layers",description:`<strong>decoder_layers</strong> (<code>int</code>, <em>optional</em>, defaults to 4) — | |
| Number of decoder layers.`,name:"decoder_layers"},{anchor:"transformers.WhisperConfig.encoder_attention_heads",description:`<strong>encoder_attention_heads</strong> (<code>int</code>, <em>optional</em>, defaults to 6) — | |
| Number of attention heads for each attention layer in the Transformer encoder.`,name:"encoder_attention_heads"},{anchor:"transformers.WhisperConfig.decoder_attention_heads",description:`<strong>decoder_attention_heads</strong> (<code>int</code>, <em>optional</em>, defaults to 6) — | |
| Number of attention heads for each attention layer in the Transformer decoder.`,name:"decoder_attention_heads"},{anchor:"transformers.WhisperConfig.encoder_ffn_dim",description:`<strong>encoder_ffn_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 1536) — | |
| Dimensionality of the “intermediate” (often named feed-forward) layer in encoder.`,name:"encoder_ffn_dim"},{anchor:"transformers.WhisperConfig.decoder_ffn_dim",description:`<strong>decoder_ffn_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 1536) — | |
| Dimensionality of the “intermediate” (often named feed-forward) layer in decoder.`,name:"decoder_ffn_dim"},{anchor:"transformers.WhisperConfig.encoder_layerdrop",description:`<strong>encoder_layerdrop</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| The LayerDrop probability for the encoder. See the [LayerDrop paper](see <a href="https://arxiv.org/abs/1909.11556" rel="nofollow">https://arxiv.org/abs/1909.11556</a>) | |
| for more details.`,name:"encoder_layerdrop"},{anchor:"transformers.WhisperConfig.decoder_layerdrop",description:`<strong>decoder_layerdrop</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| The LayerDrop probability for the decoder. See the [LayerDrop paper](see <a href="https://arxiv.org/abs/1909.11556" rel="nofollow">https://arxiv.org/abs/1909.11556</a>) | |
| for more details.`,name:"decoder_layerdrop"},{anchor:"transformers.WhisperConfig.decoder_start_token_id",description:`<strong>decoder_start_token_id</strong> (<code>int</code>, <em>optional</em>, defaults to 50257) — | |
| Corresponds to the ”<|startoftranscript|>” token, which is automatically used when no <code>decoder_input_ids</code> | |
| are provided to the <code>generate</code> function. It is used to guide the model\`s generation process depending on | |
| the task.`,name:"decoder_start_token_id"},{anchor:"transformers.WhisperConfig.use_cache",description:`<strong>use_cache</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not the model should return the last key/values attentions (not used by all models).`,name:"use_cache"},{anchor:"transformers.WhisperConfig.is_encoder_decoder",description:`<strong>is_encoder_decoder</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether the model is used as an encoder/decoder or not.`,name:"is_encoder_decoder"},{anchor:"transformers.WhisperConfig.activation_function",description:`<strong>activation_function</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"gelu"</code>) — | |
| The non-linear activation function (function or string) in the encoder and pooler. If string, <code>"gelu"</code>, | |
| <code>"relu"</code>, <code>"silu"</code> and <code>"gelu_new"</code> are supported.`,name:"activation_function"},{anchor:"transformers.WhisperConfig.d_model",description:`<strong>d_model</strong> (<code>int</code>, <em>optional</em>, defaults to 384) — | |
| Dimensionality of the layers.`,name:"d_model"},{anchor:"transformers.WhisperConfig.dropout",description:`<strong>dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.1) — | |
| The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.`,name:"dropout"},{anchor:"transformers.WhisperConfig.attention_dropout",description:`<strong>attention_dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| The dropout ratio for the attention probabilities.`,name:"attention_dropout"},{anchor:"transformers.WhisperConfig.activation_dropout",description:`<strong>activation_dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| The dropout ratio for activations inside the fully connected layer.`,name:"activation_dropout"},{anchor:"transformers.WhisperConfig.init_std",description:`<strong>init_std</strong> (<code>float</code>, <em>optional</em>, defaults to 0.02) — | |
| The standard deviation of the truncated_normal_initializer for initializing all weight matrices.`,name:"init_std"},{anchor:"transformers.WhisperConfig.scale_embedding",description:`<strong>scale_embedding</strong> (<code>bool</code>, <em>optional</em>, defaults to False) — | |
| Scale embeddings by diving by sqrt(d_model).`,name:"scale_embedding"},{anchor:"transformers.WhisperConfig.max_source_positions",description:`<strong>max_source_positions</strong> (<code>int</code>, <em>optional</em>, defaults to 1500) — | |
| The maximum sequence length of log-mel filter-bank features that this model might ever be used with.`,name:"max_source_positions"},{anchor:"transformers.WhisperConfig.max_target_positions",description:`<strong>max_target_positions</strong> (<code>int</code>, <em>optional</em>, defaults to 448) — | |
| The maximum sequence length that this model might ever be used with. Typically set this to something large | |
| just in case (e.g., 512 or 1024 or 2048).`,name:"max_target_positions"},{anchor:"transformers.WhisperConfig.pad_token_id",description:`<strong>pad_token_id</strong> (<code>int</code>, <em>optional</em>, defaults to 50256) — | |
| Padding token id.`,name:"pad_token_id"},{anchor:"transformers.WhisperConfig.bos_token_id",description:`<strong>bos_token_id</strong> (<code>int</code>, <em>optional</em>, defaults to 50256) — | |
| Begin of stream token id.`,name:"bos_token_id"},{anchor:"transformers.WhisperConfig.eos_token_id",description:`<strong>eos_token_id</strong> (<code>int</code>, <em>optional</em>, defaults to 50256) — | |
| End of stream token id.`,name:"eos_token_id"},{anchor:"transformers.WhisperConfig.suppress_tokens",description:`<strong>suppress_tokens</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| A list containing the non-speech tokens that will be used by the logit processor in the <code>generate</code> | |
| function. NON_SPEECH_TOKENS and NON_SPEECH_TOKENS_MULTI each correspond to the <code>english-only</code> and the | |
| <code>multilingual</code> model.`,name:"suppress_tokens"},{anchor:"transformers.WhisperConfig.begin_suppress_tokens",description:`<strong>begin_suppress_tokens</strong> (<code>List[int]</code>, <em>optional</em>, defaults to <code>[220,50256]</code>) — | |
| A list containing tokens that will be supressed at the beginning of the sampling process. Initialized as | |
| the token for <code>" "</code> (<code>blank_token_id</code>) and the <code>eos_token_id</code>`,name:"begin_suppress_tokens"},{anchor:"transformers.WhisperConfig.use_weighted_layer_sum",description:`<strong>use_weighted_layer_sum</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to use a weighted average of layer outputs with learned weights. Only relevant when using an | |
| instance of <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperForAudioClassification">WhisperForAudioClassification</a>.`,name:"use_weighted_layer_sum"},{anchor:"transformers.WhisperConfig.classifier_proj_size",description:`<strong>classifier_proj_size</strong> (<code>int</code>, <em>optional</em>, defaults to 256) — | |
| Dimensionality of the projection before token mean-pooling for classification. Only relevant when using an | |
| instance of <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperForAudioClassification">WhisperForAudioClassification</a>.`,name:"classifier_proj_size"},{anchor:"transformers.WhisperConfig.apply_spec_augment",description:`<strong>apply_spec_augment</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to apply <em>SpecAugment</em> data augmentation to the outputs of the feature encoder. For reference see | |
| <a href="https://arxiv.org/abs/1904.08779" rel="nofollow">SpecAugment: A Simple Data Augmentation Method for Automatic Speech | |
| Recognition</a>.`,name:"apply_spec_augment"},{anchor:"transformers.WhisperConfig.mask_time_prob",description:`<strong>mask_time_prob</strong> (<code>float</code>, <em>optional</em>, defaults to 0.05) — | |
| Percentage (between 0 and 1) of all feature vectors along the time axis which will be masked. The masking | |
| procecure generates <code>mask_time_prob*len(time_axis)/mask_time_length</code> independent masks over the axis. If | |
| reasoning from the propability of each feature vector to be chosen as the start of the vector span to be | |
| masked, <em>mask_time_prob</em> should be <code>prob_vector_start*mask_time_length</code>. Note that overlap may decrease the | |
| actual percentage of masked vectors. This is only relevant if <code>apply_spec_augment == True</code>.`,name:"mask_time_prob"},{anchor:"transformers.WhisperConfig.mask_time_length",description:`<strong>mask_time_length</strong> (<code>int</code>, <em>optional</em>, defaults to 10) — | |
| Length of vector span along the time axis.`,name:"mask_time_length"},{anchor:"transformers.WhisperConfig.mask_time_min_masks",description:`<strong>mask_time_min_masks</strong> (<code>int</code>, <em>optional</em>, defaults to 2), — | |
| The minimum number of masks of length <code>mask_feature_length</code> generated along the time axis, each time step, | |
| irrespectively of <code>mask_feature_prob</code>. Only relevant if ”mask_time_prob*len(time_axis)/mask_time_length < | |
| mask_time_min_masks”`,name:"mask_time_min_masks"},{anchor:"transformers.WhisperConfig.mask_feature_prob",description:`<strong>mask_feature_prob</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked. The | |
| masking procecure generates <code>mask_feature_prob*len(feature_axis)/mask_time_length</code> independent masks over | |
| the axis. If reasoning from the propability of each feature vector to be chosen as the start of the vector | |
| span to be masked, <em>mask_feature_prob</em> should be <code>prob_vector_start*mask_feature_length</code>. Note that overlap | |
| may decrease the actual percentage of masked vectors. This is only relevant if <code>apply_spec_augment is True</code>.`,name:"mask_feature_prob"},{anchor:"transformers.WhisperConfig.mask_feature_length",description:`<strong>mask_feature_length</strong> (<code>int</code>, <em>optional</em>, defaults to 10) — | |
| Length of vector span along the feature axis.`,name:"mask_feature_length"},{anchor:"transformers.WhisperConfig.mask_feature_min_masks",description:`<strong>mask_feature_min_masks</strong> (<code>int</code>, <em>optional</em>, defaults to 0), — | |
| The minimum number of masks of length <code>mask_feature_length</code> generated along the feature axis, each time | |
| step, irrespectively of <code>mask_feature_prob</code>. Only relevant if | |
| <code>mask_feature_prob*len(feature_axis)/mask_feature_length < mask_feature_min_masks</code>.`,name:"mask_feature_min_masks"},{anchor:"transformers.WhisperConfig.median_filter_width",description:`<strong>median_filter_width</strong> (<code>int</code>, <em>optional</em>, defaults to 7) — | |
| Width of the median filter used to smoothen to cross-attention outputs when computing token timestamps. | |
| Should be an odd number.`,name:"median_filter_width"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/configuration_whisper.py#L59"}}),ge=new oe({props:{anchor:"transformers.WhisperConfig.example",$$slots:{default:[li]},$$scope:{ctx:T}}}),gt=new U({props:{title:"WhisperTokenizer",local:"whispertokenizer ][ transformers.WhisperTokenizer",headingTag:"h2"}}),bt=new W({props:{name:"class transformers.WhisperTokenizer",anchor:"transformers.WhisperTokenizer",parameters:[{name:"vocab_file",val:""},{name:"merges_file",val:""},{name:"normalizer_file",val:" = None"},{name:"errors",val:" = 'replace'"},{name:"unk_token",val:" = '<|endoftext|>'"},{name:"bos_token",val:" = '<|endoftext|>'"},{name:"eos_token",val:" = '<|endoftext|>'"},{name:"pad_token",val:" = None"},{name:"add_prefix_space",val:" = False"},{name:"language",val:" = None"},{name:"task",val:" = None"},{name:"predict_timestamps",val:" = False"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.WhisperTokenizer.vocab_file",description:`<strong>vocab_file</strong> (<code>str</code>) — | |
| Path to the vocabulary file.`,name:"vocab_file"},{anchor:"transformers.WhisperTokenizer.merges_file",description:`<strong>merges_file</strong> (<code>str</code>) — | |
| Path to the merges file.`,name:"merges_file"},{anchor:"transformers.WhisperTokenizer.normalizer_file",description:`<strong>normalizer_file</strong> (<code>str</code>, <em>optional</em>) — | |
| Path to the normalizer_file file.`,name:"normalizer_file"},{anchor:"transformers.WhisperTokenizer.errors",description:`<strong>errors</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"replace"</code>) — | |
| Paradigm to follow when decoding bytes to UTF-8. See | |
| <a href="https://docs.python.org/3/library/stdtypes.html#bytes.decode" rel="nofollow">bytes.decode</a> for more information.`,name:"errors"},{anchor:"transformers.WhisperTokenizer.unk_token",description:`<strong>unk_token</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"<|endoftext|>"</code>) — | |
| The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this | |
| token instead.`,name:"unk_token"},{anchor:"transformers.WhisperTokenizer.bos_token",description:`<strong>bos_token</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"<|endoftext|>"</code>) — | |
| The beginning of sequence token. The <code>decoder_start_token_id</code> is used to set the first token as | |
| <code>"<|startoftranscript|>"</code> when generating.`,name:"bos_token"},{anchor:"transformers.WhisperTokenizer.eos_token",description:`<strong>eos_token</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"<|endoftext|>"</code>) — | |
| The end of sequence token.`,name:"eos_token"},{anchor:"transformers.WhisperTokenizer.pad_token",description:`<strong>pad_token</strong> (<code>str</code>, <em>optional</em>) — | |
| The token used for padding, for example when batching sequences of different lengths.`,name:"pad_token"},{anchor:"transformers.WhisperTokenizer.add_prefix_space",description:`<strong>add_prefix_space</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to add an initial space to the input. This allows to treat the leading word just as any | |
| other word.`,name:"add_prefix_space"},{anchor:"transformers.WhisperTokenizer.language",description:`<strong>language</strong> (<code>str</code>, <em>optional</em>) — | |
| The language of the transcription text. The corresponding language id token is appended to the start of the | |
| sequence for multilingual speech recognition and speech translation tasks, e.g. for Spanish the token | |
| <code>"<|es|>"</code> is appended to the start of sequence. This should be used for multilingual fine-tuning only.`,name:"language"},{anchor:"transformers.WhisperTokenizer.task",description:`<strong>task</strong> (<code>str</code>, <em>optional</em>) — | |
| Task identifier to append at the start of sequence (if any). This should be used for mulitlingual | |
| fine-tuning, with <code>"transcribe"</code> for speech recognition and <code>"translate"</code> for speech translation.`,name:"task"},{anchor:"transformers.WhisperTokenizer.predict_timestamps",description:`<strong>predict_timestamps</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to omit the <code><|notimestamps|></code> token at the start of the sequence.`,name:"predict_timestamps"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/tokenization_whisper.py#L210"}}),yt=new W({props:{name:"set_prefix_tokens",anchor:"transformers.WhisperTokenizer.set_prefix_tokens",parameters:[{name:"language",val:": str = None"},{name:"task",val:": str = None"},{name:"predict_timestamps",val:": bool = None"}],parametersDescription:[{anchor:"transformers.WhisperTokenizer.set_prefix_tokens.language",description:`<strong>language</strong> (<code>str</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| The language of the transcription text.`,name:"language"},{anchor:"transformers.WhisperTokenizer.set_prefix_tokens.task",description:`<strong>task</strong> (<code>str</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Task identifier to append at the start of sequence (if any).`,name:"task"},{anchor:"transformers.WhisperTokenizer.set_prefix_tokens.predict_timestamps",description:`<strong>predict_timestamps</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Whether to omit the <code><|notimestamps|></code> token at the start of the sequence.`,name:"predict_timestamps"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/tokenization_whisper.py#L380"}}),be=new oe({props:{anchor:"transformers.WhisperTokenizer.set_prefix_tokens.example",$$slots:{default:[ci]},$$scope:{ctx:T}}}),kt=new W({props:{name:"build_inputs_with_special_tokens",anchor:"transformers.WhisperTokenizer.build_inputs_with_special_tokens",parameters:[{name:"token_ids_0",val:""},{name:"token_ids_1",val:" = None"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/tokenization_whisper.py#L439"}}),wt=new W({props:{name:"get_special_tokens_mask",anchor:"transformers.WhisperTokenizer.get_special_tokens_mask",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"},{name:"already_has_special_tokens",val:": bool = False"}],parametersDescription:[{anchor:"transformers.WhisperTokenizer.get_special_tokens_mask.token_ids_0",description:`<strong>token_ids_0</strong> (<code>List[int]</code>) — | |
| List of IDs.`,name:"token_ids_0"},{anchor:"transformers.WhisperTokenizer.get_special_tokens_mask.token_ids_1",description:`<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Optional second list of IDs for sequence pairs.`,name:"token_ids_1"},{anchor:"transformers.WhisperTokenizer.get_special_tokens_mask.already_has_special_tokens",description:`<strong>already_has_special_tokens</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not the token list is already formatted with special tokens for the model.`,name:"already_has_special_tokens"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/tokenization_whisper.py#L447",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[int]</code></p> | |
| `}}),vt=new W({props:{name:"create_token_type_ids_from_sequences",anchor:"transformers.WhisperTokenizer.create_token_type_ids_from_sequences",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.WhisperTokenizer.create_token_type_ids_from_sequences.token_ids_0",description:"<strong>token_ids_0</strong> (<code>List[int]</code>) — The first tokenized sequence.",name:"token_ids_0"},{anchor:"transformers.WhisperTokenizer.create_token_type_ids_from_sequences.token_ids_1",description:"<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) — The second tokenized sequence.",name:"token_ids_1"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/tokenization_utils_base.py#L3525",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The token type ids.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[int]</code></p> | |
| `}}),Tt=new W({props:{name:"save_vocabulary",anchor:"transformers.WhisperTokenizer.save_vocabulary",parameters:[{name:"save_directory",val:": str"},{name:"filename_prefix",val:": Optional = None"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/tokenization_whisper.py#L779"}}),xt=new U({props:{title:"WhisperTokenizerFast",local:"whispertokenizerfast ][ transformers.WhisperTokenizerFast",headingTag:"h2"}}),$t=new W({props:{name:"class transformers.WhisperTokenizerFast",anchor:"transformers.WhisperTokenizerFast",parameters:[{name:"vocab_file",val:" = None"},{name:"merges_file",val:" = None"},{name:"normalizer_file",val:" = None"},{name:"tokenizer_file",val:" = None"},{name:"unk_token",val:" = '<|endoftext|>'"},{name:"bos_token",val:" = '<|endoftext|>'"},{name:"eos_token",val:" = '<|endoftext|>'"},{name:"add_prefix_space",val:" = False"},{name:"language",val:" = None"},{name:"task",val:" = None"},{name:"predict_timestamps",val:" = False"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.WhisperTokenizerFast.vocab_file",description:`<strong>vocab_file</strong> (<code>str</code>, <em>optional</em>) — | |
| Path to the vocabulary file.`,name:"vocab_file"},{anchor:"transformers.WhisperTokenizerFast.merges_file",description:`<strong>merges_file</strong> (<code>str</code>, <em>optional</em>) — | |
| Path to the merges file.`,name:"merges_file"},{anchor:"transformers.WhisperTokenizerFast.normalizer_file",description:`<strong>normalizer_file</strong> (<code>str</code>, <em>optional</em>) — | |
| Path to the normalizer_file file.`,name:"normalizer_file"},{anchor:"transformers.WhisperTokenizerFast.tokenizer_file",description:`<strong>tokenizer_file</strong> (<code>str</code>, <em>optional</em>) — | |
| Path to <a href="https://github.com/huggingface/tokenizers" rel="nofollow">tokenizers</a> file (generally has a .json extension) that | |
| contains everything needed to load the tokenizer.`,name:"tokenizer_file"},{anchor:"transformers.WhisperTokenizerFast.unk_token",description:`<strong>unk_token</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"<|endoftext|>"</code>) — | |
| The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this | |
| token instead.`,name:"unk_token"},{anchor:"transformers.WhisperTokenizerFast.bos_token",description:`<strong>bos_token</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"<|endoftext|>"</code>) — | |
| The beginning of sequence token. The <code>decoder_start_token_id</code> is used to set the first token as | |
| <code>"<|startoftranscript|>"</code> when generating.`,name:"bos_token"},{anchor:"transformers.WhisperTokenizerFast.eos_token",description:`<strong>eos_token</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"<|endoftext|>"</code>) — | |
| The end of sequence token.`,name:"eos_token"},{anchor:"transformers.WhisperTokenizerFast.add_prefix_space",description:`<strong>add_prefix_space</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to add an initial space to the input. This allows to treat the leading word just as any | |
| other word. (Whisper tokenizer detect beginning of words by the preceding space).`,name:"add_prefix_space"},{anchor:"transformers.WhisperTokenizerFast.language",description:`<strong>language</strong> (<code>str</code>, <em>optional</em>) — | |
| The language of the transcription text. The corresponding language id token is appended to the start of the | |
| sequence for multilingual speech recognition and speech translation tasks, e.g. for Spanish the token | |
| <code>"<|es|>"</code> is appended to the start of sequence. This should be used for multilingual fine-tuning only.`,name:"language"},{anchor:"transformers.WhisperTokenizerFast.task",description:`<strong>task</strong> (<code>str</code>, <em>optional</em>) — | |
| Task identifier to append at the start of sequence (if any). This should be used for mulitlingual | |
| fine-tuning, with <code>"transcribe"</code> for speech recognition and <code>"translate"</code> for speech translation.`,name:"task"},{anchor:"transformers.WhisperTokenizerFast.predict_timestamps",description:`<strong>predict_timestamps</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to omit the <code><|notimestamps|></code> token at the start of the sequence.`,name:"predict_timestamps"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/tokenization_whisper_fast.py#L44"}}),Wt=new W({props:{name:"set_prefix_tokens",anchor:"transformers.WhisperTokenizerFast.set_prefix_tokens",parameters:[{name:"language",val:": str = None"},{name:"task",val:": str = None"},{name:"predict_timestamps",val:": bool = None"}],parametersDescription:[{anchor:"transformers.WhisperTokenizerFast.set_prefix_tokens.language",description:`<strong>language</strong> (<code>str</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| The language of the transcription text.`,name:"language"},{anchor:"transformers.WhisperTokenizerFast.set_prefix_tokens.task",description:`<strong>task</strong> (<code>str</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Task identifier to append at the start of sequence (if any).`,name:"task"},{anchor:"transformers.WhisperTokenizerFast.set_prefix_tokens.predict_timestamps",description:`<strong>predict_timestamps</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Whether to omit the <code><|notimestamps|></code> token at the start of the sequence.`,name:"predict_timestamps"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/tokenization_whisper_fast.py#L441"}}),we=new oe({props:{anchor:"transformers.WhisperTokenizerFast.set_prefix_tokens.example",$$slots:{default:[pi]},$$scope:{ctx:T}}}),Mt=new W({props:{name:"build_inputs_with_special_tokens",anchor:"transformers.WhisperTokenizerFast.build_inputs_with_special_tokens",parameters:[{name:"token_ids_0",val:""},{name:"token_ids_1",val:" = None"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/tokenization_whisper_fast.py#L515"}}),Ft=new W({props:{name:"get_special_tokens_mask",anchor:"transformers.WhisperTokenizerFast.get_special_tokens_mask",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"},{name:"already_has_special_tokens",val:": bool = False"}],parametersDescription:[{anchor:"transformers.WhisperTokenizerFast.get_special_tokens_mask.token_ids_0",description:`<strong>token_ids_0</strong> (<code>List[int]</code>) — | |
| List of IDs.`,name:"token_ids_0"},{anchor:"transformers.WhisperTokenizerFast.get_special_tokens_mask.token_ids_1",description:`<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Optional second list of IDs for sequence pairs.`,name:"token_ids_1"},{anchor:"transformers.WhisperTokenizerFast.get_special_tokens_mask.already_has_special_tokens",description:`<strong>already_has_special_tokens</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not the token list is already formatted with special tokens for the model.`,name:"already_has_special_tokens"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/tokenization_whisper_fast.py#L523",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[int]</code></p> | |
| `}}),zt=new W({props:{name:"create_token_type_ids_from_sequences",anchor:"transformers.WhisperTokenizerFast.create_token_type_ids_from_sequences",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.WhisperTokenizerFast.create_token_type_ids_from_sequences.token_ids_0",description:"<strong>token_ids_0</strong> (<code>List[int]</code>) — The first tokenized sequence.",name:"token_ids_0"},{anchor:"transformers.WhisperTokenizerFast.create_token_type_ids_from_sequences.token_ids_1",description:"<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) — The second tokenized sequence.",name:"token_ids_1"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/tokenization_utils_base.py#L3525",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The token type ids.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[int]</code></p> | |
| `}}),jt=new W({props:{name:"save_vocabulary",anchor:"transformers.WhisperTokenizerFast.save_vocabulary",parameters:[{name:"save_directory",val:": str"},{name:"filename_prefix",val:": Optional = None"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/tokenization_whisper_fast.py#L426"}}),Ct=new U({props:{title:"WhisperFeatureExtractor",local:"whisperfeatureextractor ][ transformers.WhisperFeatureExtractor",headingTag:"h2"}}),Jt=new W({props:{name:"class transformers.WhisperFeatureExtractor",anchor:"transformers.WhisperFeatureExtractor",parameters:[{name:"feature_size",val:" = 80"},{name:"sampling_rate",val:" = 16000"},{name:"hop_length",val:" = 160"},{name:"chunk_length",val:" = 30"},{name:"n_fft",val:" = 400"},{name:"padding_value",val:" = 0.0"},{name:"return_attention_mask",val:" = False"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.WhisperFeatureExtractor.feature_size",description:`<strong>feature_size</strong> (<code>int</code>, <em>optional</em>, defaults to 80) — | |
| The feature dimension of the extracted features.`,name:"feature_size"},{anchor:"transformers.WhisperFeatureExtractor.sampling_rate",description:`<strong>sampling_rate</strong> (<code>int</code>, <em>optional</em>, defaults to 16000) — | |
| The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).`,name:"sampling_rate"},{anchor:"transformers.WhisperFeatureExtractor.hop_length",description:`<strong>hop_length</strong> (<code>int</code>, <em>optional</em>, defaults to 160) — | |
| Length of the overlaping windows for the STFT used to obtain the Mel Frequency coefficients.`,name:"hop_length"},{anchor:"transformers.WhisperFeatureExtractor.chunk_length",description:`<strong>chunk_length</strong> (<code>int</code>, <em>optional</em>, defaults to 30) — | |
| The maximum number of chuncks of <code>sampling_rate</code> samples used to trim and pad longer or shorter audio | |
| sequences.`,name:"chunk_length"},{anchor:"transformers.WhisperFeatureExtractor.n_fft",description:`<strong>n_fft</strong> (<code>int</code>, <em>optional</em>, defaults to 400) — | |
| Size of the Fourier transform.`,name:"n_fft"},{anchor:"transformers.WhisperFeatureExtractor.padding_value",description:`<strong>padding_value</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Padding value used to pad the audio. Should correspond to silences.`,name:"padding_value"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/feature_extraction_whisper.py#L36"}}),qt=new W({props:{name:"__call__",anchor:"transformers.WhisperFeatureExtractor.__call__",parameters:[{name:"raw_speech",val:": Union"},{name:"truncation",val:": bool = True"},{name:"pad_to_multiple_of",val:": Optional = None"},{name:"return_tensors",val:": Union = None"},{name:"return_attention_mask",val:": Optional = None"},{name:"padding",val:": Optional = 'max_length'"},{name:"max_length",val:": Optional = None"},{name:"sampling_rate",val:": Optional = None"},{name:"do_normalize",val:": Optional = None"},{name:"device",val:": Optional = 'cpu'"},{name:"return_token_timestamps",val:": Optional = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.WhisperFeatureExtractor.__call__.raw_speech",description:`<strong>raw_speech</strong> (<code>np.ndarray</code>, <code>List[float]</code>, <code>List[np.ndarray]</code>, <code>List[List[float]]</code>) — | |
| The sequence or batch of sequences to be padded. Each sequence can be a numpy array, a list of float | |
| values, a list of numpy arrays or a list of list of float values. Must be mono channel audio, not | |
| stereo, i.e. single float per timestep.`,name:"raw_speech"},{anchor:"transformers.WhisperFeatureExtractor.__call__.truncation",description:`<strong>truncation</strong> (<code>bool</code>, <em>optional</em>, default to <code>True</code>) — | |
| Activates truncation to cut input sequences longer than <em>max_length</em> to <em>max_length</em>.`,name:"truncation"},{anchor:"transformers.WhisperFeatureExtractor.__call__.pad_to_multiple_of",description:`<strong>pad_to_multiple_of</strong> (<code>int</code>, <em>optional</em>, defaults to None) — | |
| If set will pad the sequence to a multiple of the provided value.</p> | |
| <p>This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability | |
| <code>>= 7.5</code> (Volta), or on TPUs which benefit from having sequence lengths be a multiple of 128.`,name:"pad_to_multiple_of"},{anchor:"transformers.WhisperFeatureExtractor.__call__.return_attention_mask",description:`<strong>return_attention_mask</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether to return the attention mask. If left to the default, will return the attention mask according | |
| to the specific feature_extractor’s default.</p> | |
| <p><a href="../glossary#attention-mask">What are attention masks?</a></p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p>For Whisper models, <code>attention_mask</code> should always be passed for batched inference, to avoid subtle | |
| bugs.</p> | |
| </div>`,name:"return_attention_mask"},{anchor:"transformers.WhisperFeatureExtractor.__call__.return_tensors",description:`<strong>return_tensors</strong> (<code>str</code> or <code>TensorType</code>, <em>optional</em>) — | |
| If set, will return tensors instead of list of python integers. Acceptable values are:</p> | |
| <ul> | |
| <li><code>'tf'</code>: Return TensorFlow <code>tf.constant</code> objects.</li> | |
| <li><code>'pt'</code>: Return PyTorch <code>torch.Tensor</code> objects.</li> | |
| <li><code>'np'</code>: Return Numpy <code>np.ndarray</code> objects.</li> | |
| </ul>`,name:"return_tensors"},{anchor:"transformers.WhisperFeatureExtractor.__call__.sampling_rate",description:`<strong>sampling_rate</strong> (<code>int</code>, <em>optional</em>) — | |
| The sampling rate at which the <code>raw_speech</code> input was sampled. It is strongly recommended to pass | |
| <code>sampling_rate</code> at the forward call to prevent silent errors and allow automatic speech recognition | |
| pipeline.`,name:"sampling_rate"},{anchor:"transformers.WhisperFeatureExtractor.__call__.padding_value",description:`<strong>padding_value</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| The value that is used to fill the padding values / vectors.`,name:"padding_value"},{anchor:"transformers.WhisperFeatureExtractor.__call__.do_normalize",description:`<strong>do_normalize</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to zero-mean unit-variance normalize the input. Normalizing can help to significantly | |
| improve the performance of the model.`,name:"do_normalize"},{anchor:"transformers.WhisperFeatureExtractor.__call__.device",description:`<strong>device</strong> (<code>str</code>, <em>optional</em>, defaults to <code>'cpu'</code>) — | |
| Specifies the device for computation of the log-mel spectrogram of audio signals in the | |
| <code>_torch_extract_fbank_features</code> method. (e.g., “cpu”, “cuda”)`,name:"device"},{anchor:"transformers.WhisperFeatureExtractor.__call__.return_token_timestamps",description:`<strong>return_token_timestamps</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Whether or not to return the number of frames of the input raw_speech. | |
| These num_frames can be used by the model to compute word level timestamps.`,name:"return_token_timestamps"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/feature_extraction_whisper.py#L180"}}),It=new U({props:{title:"WhisperProcessor",local:"whisperprocessor ][ transformers.WhisperProcessor",headingTag:"h2"}}),Gt=new W({props:{name:"class transformers.WhisperProcessor",anchor:"transformers.WhisperProcessor",parameters:[{name:"feature_extractor",val:""},{name:"tokenizer",val:""}],parametersDescription:[{anchor:"transformers.WhisperProcessor.feature_extractor",description:`<strong>feature_extractor</strong> (<code>WhisperFeatureExtractor</code>) — | |
| An instance of <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor">WhisperFeatureExtractor</a>. The feature extractor is a required input.`,name:"feature_extractor"},{anchor:"transformers.WhisperProcessor.tokenizer",description:`<strong>tokenizer</strong> (<code>WhisperTokenizer</code>) — | |
| An instance of <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperTokenizer">WhisperTokenizer</a>. The tokenizer is a required input.`,name:"tokenizer"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/processing_whisper.py#L22"}}),Ut=new W({props:{name:"__call__",anchor:"transformers.WhisperProcessor.__call__",parameters:[{name:"*args",val:""},{name:"**kwargs",val:""}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/processing_whisper.py#L48"}}),Zt=new W({props:{name:"from_pretrained",anchor:"transformers.WhisperProcessor.from_pretrained",parameters:[{name:"pretrained_model_name_or_path",val:": Union"},{name:"cache_dir",val:": Union = None"},{name:"force_download",val:": bool = False"},{name:"local_files_only",val:": bool = False"},{name:"token",val:": Union = None"},{name:"revision",val:": str = 'main'"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.WhisperProcessor.from_pretrained.pretrained_model_name_or_path",description:`<strong>pretrained_model_name_or_path</strong> (<code>str</code> or <code>os.PathLike</code>) — | |
| This can be either:</p> | |
| <ul> | |
| <li>a string, the <em>model id</em> of a pretrained feature_extractor hosted inside a model repo on | |
| huggingface.co.</li> | |
| <li>a path to a <em>directory</em> containing a feature extractor file saved using the | |
| <code>save_pretrained()</code> method, e.g., <code>./my_model_directory/</code>.</li> | |
| <li>a path or url to a saved feature extractor JSON <em>file</em>, e.g., | |
| <code>./my_model_directory/preprocessor_config.json</code>. | |
| **kwargs — | |
| Additional keyword arguments passed along to both | |
| <code>from_pretrained()</code> and | |
| <code>~tokenization_utils_base.PreTrainedTokenizer.from_pretrained</code>.</li> | |
| </ul>`,name:"pretrained_model_name_or_path"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/processing_utils.py#L835"}}),We=new ce({props:{$$slots:{default:[hi]},$$scope:{ctx:T}}}),Nt=new W({props:{name:"save_pretrained",anchor:"transformers.WhisperProcessor.save_pretrained",parameters:[{name:"save_directory",val:""},{name:"push_to_hub",val:": bool = False"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.WhisperProcessor.save_pretrained.save_directory",description:`<strong>save_directory</strong> (<code>str</code> or <code>os.PathLike</code>) — | |
| Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will | |
| be created if it does not exist).`,name:"save_directory"},{anchor:"transformers.WhisperProcessor.save_pretrained.push_to_hub",description:`<strong>push_to_hub</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the | |
| repository you want to push to with <code>repo_id</code> (will default to the name of <code>save_directory</code> in your | |
| namespace).`,name:"push_to_hub"},{anchor:"transformers.WhisperProcessor.save_pretrained.kwargs",description:`<strong>kwargs</strong> (<code>Dict[str, Any]</code>, <em>optional</em>) — | |
| Additional key word arguments passed along to the <code>push_to_hub()</code> method.`,name:"kwargs"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/processing_utils.py#L432"}}),Me=new ce({props:{$$slots:{default:[mi]},$$scope:{ctx:T}}}),Xt=new W({props:{name:"batch_decode",anchor:"transformers.WhisperProcessor.batch_decode",parameters:[{name:"*args",val:""},{name:"**kwargs",val:""}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/processing_whisper.py#L82"}}),Rt=new W({props:{name:"decode",anchor:"transformers.WhisperProcessor.decode",parameters:[{name:"*args",val:""},{name:"**kwargs",val:""}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/processing_whisper.py#L89"}}),Vt=new U({props:{title:"WhisperModel",local:"whispermodel ][ transformers.WhisperModel",headingTag:"h2"}}),Lt=new W({props:{name:"class transformers.WhisperModel",anchor:"transformers.WhisperModel",parameters:[{name:"config",val:": WhisperConfig"}],parametersDescription:[{anchor:"transformers.WhisperModel.config",description:`<strong>config</strong> (<a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig">WhisperConfig</a>) — | |
| Model configuration class with all the parameters of the model. Initializing with a config file does not | |
| load the weights associated with the model, only the configuration. Check out the | |
| <code>from_pretrained()</code> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_whisper.py#L1490"}}),Ht=new W({props:{name:"forward",anchor:"transformers.WhisperModel.forward",parameters:[{name:"input_features",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"decoder_input_ids",val:": Optional = None"},{name:"decoder_attention_mask",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"decoder_head_mask",val:": Optional = None"},{name:"cross_attn_head_mask",val:": Optional = None"},{name:"encoder_outputs",val:": Optional = None"},{name:"past_key_values",val:": Union = None"},{name:"decoder_inputs_embeds",val:": Optional = None"},{name:"decoder_position_ids",val:": Optional = None"},{name:"use_cache",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"},{name:"cache_position",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.WhisperModel.forward.input_features",description:`<strong>input_features</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, feature_size, sequence_length)</code>) — | |
| Float values mel features extracted from the raw speech waveform. Raw speech waveform can be obtained by | |
| loading a <code>.flac</code> or <code>.wav</code> audio file into an array of type <code>List[float]</code> or a <code>numpy.ndarray</code>, <em>e.g.</em> via | |
| the soundfile library (<code>pip install soundfile</code>). To prepare the array into <code>input_features</code>, the | |
| <code>AutoFeatureExtractor</code> should be used for extracting the mel features, padding and conversion into a | |
| tensor of type <code>torch.FloatTensor</code>. See <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor.__call__"><strong>call</strong>()</a>`,name:"input_features"},{anchor:"transformers.WhisperModel.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Mask to avoid performing <em>SpecAugment</em> data augmentation on padding token indices. Mask values selected in | |
| <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 for tokens that are <strong>not masked</strong>,</li> | |
| <li>0 for tokens that are <strong>masked</strong>.</li> | |
| </ul> | |
| <p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.WhisperModel.forward.decoder_input_ids",description:`<strong>decoder_input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Indices of decoder input sequence tokens in the vocabulary.</p> | |
| <p>Indices can be obtained using <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperTokenizer">WhisperTokenizer</a>. See <code>PreTrainedTokenizer.encode()</code> and | |
| <code>PreTrainedTokenizer.__call__()</code> for details.</p> | |
| <p><a href="../glossary#decoder-input-ids">What are decoder input IDs?</a></p> | |
| <p>Whisper uses the <code>decoder_start_token_id</code> as the starting token for <code>decoder_input_ids</code> generation. If | |
| <code>past_key_values</code> is used, optionally only the last <code>decoder_input_ids</code> have to be input (see | |
| <code>past_key_values</code>).`,name:"decoder_input_ids"},{anchor:"transformers.WhisperModel.forward.decoder_attention_mask",description:`<strong>decoder_attention_mask</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Default behavior: generate a tensor that ignores pad tokens in <code>decoder_input_ids</code>. Causal mask will also | |
| be used by default.</p> | |
| <p>If you want to change padding behavior, you should read | |
| <code>modeling_whisper._prepare_decoder_attention_mask</code> and modify to your needs. See diagram 1 in <a href="https://arxiv.org/abs/1910.13461" rel="nofollow">the BART | |
| paper</a> for more information on the default strategy.`,name:"decoder_attention_mask"},{anchor:"transformers.WhisperModel.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.Tensor</code> of shape <code>(encoder_layers, encoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the attention modules in the encoder. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"head_mask"},{anchor:"transformers.WhisperModel.forward.decoder_head_mask",description:`<strong>decoder_head_mask</strong> (<code>torch.Tensor</code> of shape <code>(decoder_layers, decoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the attention modules in the decoder. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"decoder_head_mask"},{anchor:"transformers.WhisperModel.forward.cross_attn_head_mask",description:`<strong>cross_attn_head_mask</strong> (<code>torch.Tensor</code> of shape <code>(decoder_layers, decoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the cross-attention modules. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"cross_attn_head_mask"},{anchor:"transformers.WhisperModel.forward.encoder_outputs",description:`<strong>encoder_outputs</strong> (<code>tuple(tuple(torch.FloatTensor)</code>, <em>optional</em>) — | |
| Tuple consists of (<code>last_hidden_state</code>, <em>optional</em>: <code>hidden_states</code>, <em>optional</em>: <code>attentions</code>) | |
| <code>last_hidden_state</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) is a sequence of | |
| hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.`,name:"encoder_outputs"},{anchor:"transformers.WhisperModel.forward.past_key_values",description:`<strong>past_key_values</strong> (<code>EncoderDecoderCache</code> or <code>tuple(tuple(torch.FloatTensor))</code>, <em>optional</em>) — | |
| Pre-computed hidden-states that can be used to speed up auto-regressive (sequential) decoding. There are | |
| four sets of pre-computed hidden-states: key and values states in the self-attention blocks (2) and | |
| in the cross-attention blocks (2). The <code>past_key_values</code> are returned when <code>use_cache=True</code> is passed or | |
| when <code>config.use_cache=True</code></p> | |
| <p>Two formats are allowed:</p> | |
| <ul> | |
| <li>An <code>EncoderDecoderCache</code> instance;</li> | |
| <li>Tuple of <code>tuple(torch.FloatTensor)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of shape | |
| <code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>) and 2 additional tensors of shape | |
| <code>(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)</code>.</li> | |
| </ul> | |
| <p>If <code>past_key_values</code> are used, the user can optionally input only the last <code>decoder_input_ids</code> (those that | |
| don’t have their past key value states given to this model) of shape <code>(batch_size, 1)</code> instead of all | |
| <code>decoder_input_ids</code> of shape <code>(batch_size, sequence_length)</code>.`,name:"past_key_values"},{anchor:"transformers.WhisperModel.forward.decoder_inputs_embeds",description:`<strong>decoder_inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, target_sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of passing <code>decoder_input_ids</code> you can choose to directly pass an embedded | |
| representation. If <code>past_key_values</code> is used, optionally only the last <code>decoder_inputs_embeds</code> have to be | |
| input (see <code>past_key_values</code>). This is useful if you want more control over how to convert | |
| <code>decoder_input_ids</code> indices into associated vectors than the model’s internal embedding lookup matrix.`,name:"decoder_inputs_embeds"},{anchor:"transformers.WhisperModel.forward.use_cache",description:`<strong>use_cache</strong> (<code>bool</code>, <em>optional</em>) — | |
| If set to <code>True</code>, <code>past_key_values</code> key value states are returned and can be used to speed up decoding (see | |
| <code>past_key_values</code>).`,name:"use_cache"},{anchor:"transformers.WhisperModel.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.WhisperModel.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.WhisperModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <code>ModelOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.WhisperModel.forward.cache_position",description:`<strong>cache_position</strong> (<code>torch.LongTensor</code> of shape <code>(sequence_length)</code>, <em>optional</em>) — | |
| Indices depicting the position of the input sequence tokens in the sequence. It is used to update the cache | |
| in the correct position and to infer the complete sequence length.`,name:"cache_position"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_whisper.py#L1565",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <code>transformers.modeling_outputs.Seq2SeqModelOutput</code> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig" | |
| >WhisperConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>) — Sequence of hidden-states at the output of the last layer of the decoder of the model.</p> | |
| <p>If <code>past_key_values</code> is used only the last hidden-state of the sequences of shape <code>(batch_size, 1, hidden_size)</code> is output.</p> | |
| </li> | |
| <li> | |
| <p><strong>past_key_values</strong> (<code>tuple(tuple(torch.FloatTensor))</code>, <em>optional</em>, returned when <code>use_cache=True</code> is passed or when <code>config.use_cache=True</code>) — Tuple of <code>tuple(torch.FloatTensor)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of shape | |
| <code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>) and 2 additional tensors of shape | |
| <code>(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)</code>.</p> | |
| <p>Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention | |
| blocks) that can be used (see <code>past_key_values</code> input) to speed up sequential decoding.</p> | |
| </li> | |
| <li> | |
| <p><strong>decoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, + | |
| one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the decoder at the output of each layer plus the optional initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>decoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the | |
| self-attention heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>cross_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the decoder’s cross-attention layer, after the attention softmax, used to compute the | |
| weighted average in the cross-attention heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the encoder of the model.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, + | |
| one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the encoder at the output of each layer plus the optional initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the | |
| self-attention heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>transformers.modeling_outputs.Seq2SeqModelOutput</code> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),je=new ce({props:{$$slots:{default:[ui]},$$scope:{ctx:T}}}),Ce=new oe({props:{anchor:"transformers.WhisperModel.forward.example",$$slots:{default:[fi]},$$scope:{ctx:T}}}),Et=new W({props:{name:"_mask_input_features",anchor:"transformers.WhisperModel._mask_input_features",parameters:[{name:"input_features",val:": FloatTensor"},{name:"attention_mask",val:": Optional = None"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_whisper.py#L1522"}}),Bt=new U({props:{title:"WhisperForConditionalGeneration",local:"whisperforconditionalgeneration ][ transformers.WhisperForConditionalGeneration",headingTag:"h2"}}),St=new W({props:{name:"class transformers.WhisperForConditionalGeneration",anchor:"transformers.WhisperForConditionalGeneration",parameters:[{name:"config",val:": WhisperConfig"}],parametersDescription:[{anchor:"transformers.WhisperForConditionalGeneration.config",description:`<strong>config</strong> (<a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig">WhisperConfig</a>) — | |
| Model configuration class with all the parameters of the model. Initializing with a config file does not | |
| load the weights associated with the model, only the configuration. Check out the | |
| <code>from_pretrained()</code> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_whisper.py#L1662"}}),Pt=new W({props:{name:"forward",anchor:"transformers.WhisperForConditionalGeneration.forward",parameters:[{name:"input_features",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"decoder_input_ids",val:": Optional = None"},{name:"decoder_attention_mask",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"decoder_head_mask",val:": Optional = None"},{name:"cross_attn_head_mask",val:": Optional = None"},{name:"encoder_outputs",val:": Optional = None"},{name:"past_key_values",val:": Union = None"},{name:"decoder_inputs_embeds",val:": Optional = None"},{name:"decoder_position_ids",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"use_cache",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"},{name:"cache_position",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.WhisperForConditionalGeneration.forward.input_features",description:`<strong>input_features</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, feature_size, sequence_length)</code>) — | |
| Float values mel features extracted from the raw speech waveform. Raw speech waveform can be obtained by | |
| loading a <code>.flac</code> or <code>.wav</code> audio file into an array of type <code>List[float]</code> or a <code>numpy.ndarray</code>, <em>e.g.</em> via | |
| the soundfile library (<code>pip install soundfile</code>). To prepare the array into <code>input_features</code>, the | |
| <code>AutoFeatureExtractor</code> should be used for extracting the mel features, padding and conversion into a | |
| tensor of type <code>torch.FloatTensor</code>. See <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor.__call__"><strong>call</strong>()</a>`,name:"input_features"},{anchor:"transformers.WhisperForConditionalGeneration.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Mask to avoid performing <em>SpecAugment</em> data augmentation on padding token indices. Mask values selected in | |
| <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 for tokens that are <strong>not masked</strong>,</li> | |
| <li>0 for tokens that are <strong>masked</strong>.</li> | |
| </ul> | |
| <p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.WhisperForConditionalGeneration.forward.decoder_input_ids",description:`<strong>decoder_input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Indices of decoder input sequence tokens in the vocabulary.</p> | |
| <p>Indices can be obtained using <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperTokenizer">WhisperTokenizer</a>. See <code>PreTrainedTokenizer.encode()</code> and | |
| <code>PreTrainedTokenizer.__call__()</code> for details.</p> | |
| <p><a href="../glossary#decoder-input-ids">What are decoder input IDs?</a></p> | |
| <p>Whisper uses the <code>decoder_start_token_id</code> as the starting token for <code>decoder_input_ids</code> generation. If | |
| <code>past_key_values</code> is used, optionally only the last <code>decoder_input_ids</code> have to be input (see | |
| <code>past_key_values</code>).`,name:"decoder_input_ids"},{anchor:"transformers.WhisperForConditionalGeneration.forward.decoder_attention_mask",description:`<strong>decoder_attention_mask</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Default behavior: generate a tensor that ignores pad tokens in <code>decoder_input_ids</code>. Causal mask will also | |
| be used by default.</p> | |
| <p>If you want to change padding behavior, you should read | |
| <code>modeling_whisper._prepare_decoder_attention_mask</code> and modify to your needs. See diagram 1 in <a href="https://arxiv.org/abs/1910.13461" rel="nofollow">the BART | |
| paper</a> for more information on the default strategy.`,name:"decoder_attention_mask"},{anchor:"transformers.WhisperForConditionalGeneration.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.Tensor</code> of shape <code>(encoder_layers, encoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the attention modules in the encoder. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"head_mask"},{anchor:"transformers.WhisperForConditionalGeneration.forward.decoder_head_mask",description:`<strong>decoder_head_mask</strong> (<code>torch.Tensor</code> of shape <code>(decoder_layers, decoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the attention modules in the decoder. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"decoder_head_mask"},{anchor:"transformers.WhisperForConditionalGeneration.forward.cross_attn_head_mask",description:`<strong>cross_attn_head_mask</strong> (<code>torch.Tensor</code> of shape <code>(decoder_layers, decoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the cross-attention modules. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"cross_attn_head_mask"},{anchor:"transformers.WhisperForConditionalGeneration.forward.encoder_outputs",description:`<strong>encoder_outputs</strong> (<code>tuple(tuple(torch.FloatTensor)</code>, <em>optional</em>) — | |
| Tuple consists of (<code>last_hidden_state</code>, <em>optional</em>: <code>hidden_states</code>, <em>optional</em>: <code>attentions</code>) | |
| <code>last_hidden_state</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) is a sequence of | |
| hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.`,name:"encoder_outputs"},{anchor:"transformers.WhisperForConditionalGeneration.forward.past_key_values",description:`<strong>past_key_values</strong> (<code>EncoderDecoderCache</code> or <code>tuple(tuple(torch.FloatTensor))</code>, <em>optional</em>) — | |
| Pre-computed hidden-states that can be used to speed up auto-regressive (sequential) decoding. There are | |
| four sets of pre-computed hidden-states: key and values states in the self-attention blocks (2) and | |
| in the cross-attention blocks (2). The <code>past_key_values</code> are returned when <code>use_cache=True</code> is passed or | |
| when <code>config.use_cache=True</code></p> | |
| <p>Two formats are allowed:</p> | |
| <ul> | |
| <li>An <code>EncoderDecoderCache</code> instance;</li> | |
| <li>Tuple of <code>tuple(torch.FloatTensor)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of shape | |
| <code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>) and 2 additional tensors of shape | |
| <code>(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)</code>.</li> | |
| </ul> | |
| <p>If <code>past_key_values</code> are used, the user can optionally input only the last <code>decoder_input_ids</code> (those that | |
| don’t have their past key value states given to this model) of shape <code>(batch_size, 1)</code> instead of all | |
| <code>decoder_input_ids</code> of shape <code>(batch_size, sequence_length)</code>.`,name:"past_key_values"},{anchor:"transformers.WhisperForConditionalGeneration.forward.decoder_inputs_embeds",description:`<strong>decoder_inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, target_sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of passing <code>decoder_input_ids</code> you can choose to directly pass an embedded | |
| representation. If <code>past_key_values</code> is used, optionally only the last <code>decoder_inputs_embeds</code> have to be | |
| input (see <code>past_key_values</code>). This is useful if you want more control over how to convert | |
| <code>decoder_input_ids</code> indices into associated vectors than the model’s internal embedding lookup matrix.`,name:"decoder_inputs_embeds"},{anchor:"transformers.WhisperForConditionalGeneration.forward.use_cache",description:`<strong>use_cache</strong> (<code>bool</code>, <em>optional</em>) — | |
| If set to <code>True</code>, <code>past_key_values</code> key value states are returned and can be used to speed up decoding (see | |
| <code>past_key_values</code>).`,name:"use_cache"},{anchor:"transformers.WhisperForConditionalGeneration.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.WhisperForConditionalGeneration.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.WhisperForConditionalGeneration.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <code>ModelOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.WhisperForConditionalGeneration.forward.cache_position",description:`<strong>cache_position</strong> (<code>torch.LongTensor</code> of shape <code>(sequence_length)</code>, <em>optional</em>) — | |
| Indices depicting the position of the input sequence tokens in the sequence. It is used to update the cache | |
| in the correct position and to infer the complete sequence length.`,name:"cache_position"},{anchor:"transformers.WhisperForConditionalGeneration.forward.labels",description:`<strong>labels</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Labels for computing the language modeling loss. Indices should either be in <code>[0, ..., config.vocab_size]</code> | |
| or -100 (see <code>input_ids</code> docstring). Tokens with indices set to <code>-100</code> are ignored (masked), the loss is | |
| only computed for the tokens with labels in <code>[0, ..., config.vocab_size]</code>. <code>sequence_length</code> should be smaller than or equal to <code>config.max_target_positions</code>.`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_whisper.py#L1701",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <code>transformers.modeling_outputs.Seq2SeqLMOutput</code> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig" | |
| >WhisperConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> is provided) — Language modeling loss.</p> | |
| </li> | |
| <li> | |
| <p><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, config.vocab_size)</code>) — Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).</p> | |
| </li> | |
| <li> | |
| <p><strong>past_key_values</strong> (<code>tuple(tuple(torch.FloatTensor))</code>, <em>optional</em>, returned when <code>use_cache=True</code> is passed or when <code>config.use_cache=True</code>) — Tuple of <code>tuple(torch.FloatTensor)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of shape | |
| <code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>) and 2 additional tensors of shape | |
| <code>(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)</code>.</p> | |
| <p>Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention | |
| blocks) that can be used (see <code>past_key_values</code> input) to speed up sequential decoding.</p> | |
| </li> | |
| <li> | |
| <p><strong>decoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, + | |
| one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>decoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the | |
| self-attention heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>cross_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the decoder’s cross-attention layer, after the attention softmax, used to compute the | |
| weighted average in the cross-attention heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the encoder of the model.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, + | |
| one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the | |
| self-attention heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>transformers.modeling_outputs.Seq2SeqLMOutput</code> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),qe=new ce({props:{$$slots:{default:[_i]},$$scope:{ctx:T}}}),Ie=new oe({props:{anchor:"transformers.WhisperForConditionalGeneration.forward.example",$$slots:{default:[gi]},$$scope:{ctx:T}}}),Yt=new U({props:{title:"WhisperForAudioClassification",local:"whisperforaudioclassification ][ transformers.WhisperForAudioClassification",headingTag:"h2"}}),At=new W({props:{name:"class transformers.WhisperForAudioClassification",anchor:"transformers.WhisperForAudioClassification",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.WhisperForAudioClassification.input_features",description:`<strong>input_features</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, feature_size, sequence_length)</code>) — | |
| Float values mel features extracted from the raw speech waveform. Raw speech waveform can be obtained by | |
| loading a <code>.flac</code> or <code>.wav</code> audio file into an array of type <code>List[float]</code> or a <code>numpy.ndarray</code>, <em>e.g.</em> via | |
| the soundfile library (<code>pip install soundfile</code>). To prepare the array into <code>input_features</code>, the | |
| <code>AutoFeatureExtractor</code> should be used for extracting the mel features, padding and conversion into a | |
| tensor of type <code>torch.FloatTensor</code>. See <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor.__call__"><strong>call</strong>()</a>`,name:"input_features"},{anchor:"transformers.WhisperForAudioClassification.head_mask",description:`<strong>head_mask</strong> (<code>torch.Tensor</code> of shape <code>(encoder_layers, encoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the attention modules in the encoder. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"head_mask"},{anchor:"transformers.WhisperForAudioClassification.encoder_outputs",description:`<strong>encoder_outputs</strong> (<code>tuple(tuple(torch.FloatTensor)</code>, <em>optional</em>) — | |
| Tuple consists of (<code>last_hidden_state</code>, <em>optional</em>: <code>hidden_states</code>, <em>optional</em>: <code>attentions</code>) | |
| <code>last_hidden_state</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) is a sequence of | |
| hidden-states at the output of the last layer of the encoder.`,name:"encoder_outputs"},{anchor:"transformers.WhisperForAudioClassification.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.WhisperForAudioClassification.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.WhisperForAudioClassification.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <code>ModelOutput</code> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_whisper.py#L2148"}}),Dt=new W({props:{name:"forward",anchor:"transformers.WhisperForAudioClassification.forward",parameters:[{name:"input_features",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"encoder_outputs",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.WhisperForAudioClassification.forward.input_features",description:`<strong>input_features</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, feature_size, sequence_length)</code>) — | |
| Float values mel features extracted from the raw speech waveform. Raw speech waveform can be obtained by | |
| loading a <code>.flac</code> or <code>.wav</code> audio file into an array of type <code>List[float]</code> or a <code>numpy.ndarray</code>, <em>e.g.</em> via | |
| the soundfile library (<code>pip install soundfile</code>). To prepare the array into <code>input_features</code>, the | |
| <code>AutoFeatureExtractor</code> should be used for extracting the mel features, padding and conversion into a | |
| tensor of type <code>torch.FloatTensor</code>. See <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor.__call__"><strong>call</strong>()</a>`,name:"input_features"},{anchor:"transformers.WhisperForAudioClassification.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.Tensor</code> of shape <code>(encoder_layers, encoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the attention modules in the encoder. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"head_mask"},{anchor:"transformers.WhisperForAudioClassification.forward.encoder_outputs",description:`<strong>encoder_outputs</strong> (<code>tuple(tuple(torch.FloatTensor)</code>, <em>optional</em>) — | |
| Tuple consists of (<code>last_hidden_state</code>, <em>optional</em>: <code>hidden_states</code>, <em>optional</em>: <code>attentions</code>) | |
| <code>last_hidden_state</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) is a sequence of | |
| hidden-states at the output of the last layer of the encoder.`,name:"encoder_outputs"},{anchor:"transformers.WhisperForAudioClassification.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.WhisperForAudioClassification.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.WhisperForAudioClassification.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <code>ModelOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.WhisperForAudioClassification.forward.labels",description:`<strong>labels</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size,)</code>, <em>optional</em>) — | |
| Labels for computing the sequence classification/regression loss. Indices should be in <code>[0, ..., config.num_labels - 1]</code>. If <code>config.num_labels == 1</code> a regression loss is computed (Mean-Square loss), If | |
| <code>config.num_labels > 1</code> a classification loss is computed (Cross-Entropy).`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_whisper.py#L2182",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <code>transformers.modeling_outputs.SequenceClassifierOutput</code> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig" | |
| >WhisperConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> is provided) — Classification (or regression if config.num_labels==1) loss.</p> | |
| </li> | |
| <li> | |
| <p><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, config.num_labels)</code>) — Classification (or regression if config.num_labels==1) scores (before SoftMax).</p> | |
| </li> | |
| <li> | |
| <p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, + | |
| one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention | |
| heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>transformers.modeling_outputs.SequenceClassifierOutput</code> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),Ge=new ce({props:{$$slots:{default:[bi]},$$scope:{ctx:T}}}),Ue=new oe({props:{anchor:"transformers.WhisperForAudioClassification.forward.example",$$slots:{default:[yi]},$$scope:{ctx:T}}}),Qt=new U({props:{title:"TFWhisperModel",local:"tfwhispermodel ][ transformers.TFWhisperModel",headingTag:"h2"}}),Ot=new W({props:{name:"class transformers.TFWhisperModel",anchor:"transformers.TFWhisperModel",parameters:[{name:"config",val:": WhisperConfig"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.TFWhisperModel.config",description:`<strong>config</strong> (<a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig">WhisperConfig</a>) — | |
| Model configuration class with all the parameters of the model. Initializing with a config file does not | |
| load the weights associated with the model, only the configuration. Check out the | |
| <code>from_pretrained()</code> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_tf_whisper.py#L1225"}}),Kt=new W({props:{name:"call",anchor:"transformers.TFWhisperModel.call",parameters:[{name:"input_features",val:": TFModelInputType | None = None"},{name:"decoder_input_ids",val:": np.ndarray | tf.Tensor | None = None"},{name:"decoder_attention_mask",val:": np.ndarray | tf.Tensor | None = None"},{name:"decoder_position_ids",val:": np.ndarray | tf.Tensor | None = None"},{name:"head_mask",val:": np.ndarray | tf.Tensor | None = None"},{name:"decoder_head_mask",val:": np.ndarray | tf.Tensor | None = None"},{name:"cross_attn_head_mask",val:": np.ndarray | tf.Tensor | None = None"},{name:"encoder_outputs",val:": Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None"},{name:"past_key_values",val:": Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None"},{name:"decoder_inputs_embeds",val:": Optional[Tuple[Union[np.ndarray, tf.Tensor]]] = None"},{name:"use_cache",val:": Optional[bool] = None"},{name:"output_attentions",val:": Optional[bool] = None"},{name:"output_hidden_states",val:": Optional[bool] = None"},{name:"return_dict",val:": Optional[bool] = None"},{name:"training",val:": bool = False"}],parametersDescription:[{anchor:"transformers.TFWhisperModel.call.input_features",description:`<strong>input_features</strong> (<code>tf.Tensor</code> of shape <code>(batch_size, feature_size, sequence_length)</code>) — | |
| Float values of fbank features extracted from the raw speech waveform. Raw speech waveform can be obtained | |
| by loading a <code>.flac</code> or <code>.wav</code> audio file into an array of type <code>List[float]</code> or a <code>numpy.ndarray</code>, <em>e.g.</em> | |
| via the soundfile library (<code>pip install soundfile</code>). To prepare the array into <code>input_features</code>, the | |
| <code>AutoFeatureExtractor</code> should be used for extracting the fbank features, padding and conversion into a | |
| tensor of type <code>tf.Tensor</code>. See <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor.__call__"><strong>call</strong>()</a>`,name:"input_features"},{anchor:"transformers.TFWhisperModel.call.decoder_input_ids",description:`<strong>decoder_input_ids</strong> (<code>tf.Tensor</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Indices of decoder input sequence tokens in the vocabulary.</p> | |
| <p>Indices can be obtained using <code>SpeechToTextTokenizer</code>. See <code>PreTrainedTokenizer.encode()</code> and | |
| <code>PreTrainedTokenizer.__call__()</code> for details.</p> | |
| <p><a href="../glossary#decoder-input-ids">What are decoder input IDs?</a></p> | |
| <p>SpeechToText uses the <code>eos_token_id</code> as the starting token for <code>decoder_input_ids</code> generation. If | |
| <code>past_key_values</code> is used, optionally only the last <code>decoder_input_ids</code> have to be input (see | |
| <code>past_key_values</code>).`,name:"decoder_input_ids"},{anchor:"transformers.TFWhisperModel.call.decoder_attention_mask",description:`<strong>decoder_attention_mask</strong> (<code>tf.Tensor</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Default behavior: generate a tensor that ignores pad tokens in <code>decoder_input_ids</code>. Causal mask will also | |
| be used by default.</p> | |
| <p>If you want to change padding behavior, you should read | |
| <code>modeling_whisper._prepare_decoder_attention_mask</code> and modify to your needs. See diagram 1 in <a href="https://arxiv.org/abs/1910.13461" rel="nofollow">the | |
| paper</a> for more information on the default strategy.`,name:"decoder_attention_mask"},{anchor:"transformers.TFWhisperModel.call.head_mask",description:`<strong>head_mask</strong> (<code>tf.Tensor</code> of shape <code>(encoder_layers, encoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the attention modules in the encoder. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"head_mask"},{anchor:"transformers.TFWhisperModel.call.decoder_head_mask",description:`<strong>decoder_head_mask</strong> (<code>tf.Tensor</code> of shape <code>(decoder_layers, decoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the attention modules in the decoder. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"decoder_head_mask"},{anchor:"transformers.TFWhisperModel.call.cross_attn_head_mask",description:`<strong>cross_attn_head_mask</strong> (<code>tf.Tensor</code> of shape <code>(decoder_layers, decoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the cross-attention modules. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"cross_attn_head_mask"},{anchor:"transformers.TFWhisperModel.call.encoder_outputs",description:`<strong>encoder_outputs</strong> (<code>tuple(tuple(tf.Tensor)</code>, <em>optional</em>) — | |
| Tuple consists of (<code>last_hidden_state</code>, <em>optional</em>: <code>hidden_states</code>, <em>optional</em>: <code>attentions</code>) | |
| <code>last_hidden_state</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) is a sequence of | |
| hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.`,name:"encoder_outputs"},{anchor:"transformers.TFWhisperModel.call.past_key_values",description:`<strong>past_key_values</strong> (<code>tuple(tuple(tf.Tensor))</code>, <em>optional</em>, returned when <code>use_cache=True</code> is passed or when <code>config.use_cache=True</code>) — | |
| Tuple of <code>tuple(tf.Tensor)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of shape | |
| <code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>) and 2 additional tensors of shape | |
| <code>(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)</code>.</p> | |
| <p>Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention | |
| blocks) that can be used (see <code>past_key_values</code> input) to speed up sequential decoding.</p> | |
| <p>If <code>past_key_values</code> are used, the user can optionally input only the last <code>decoder_input_ids</code> (those that | |
| don’t have their past key value states given to this model) of shape <code>(batch_size, 1)</code> instead of all | |
| <code>decoder_input_ids</code> of shape <code>(batch_size, sequence_length)</code>.`,name:"past_key_values"},{anchor:"transformers.TFWhisperModel.call.decoder_inputs_embeds",description:`<strong>decoder_inputs_embeds</strong> (<code>tf.Tensor</code> of shape <code>(batch_size, target_sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of passing <code>decoder_input_ids</code> you can choose to directly pass an embedded | |
| representation. If <code>past_key_values</code> is used, optionally only the last <code>decoder_inputs_embeds</code> have to be | |
| input (see <code>past_key_values</code>). This is useful if you want more control over how to convert | |
| <code>decoder_input_ids</code> indices into associated vectors than the model’s internal embedding lookup matrix.`,name:"decoder_inputs_embeds"},{anchor:"transformers.TFWhisperModel.call.use_cache",description:`<strong>use_cache</strong> (<code>bool</code>, <em>optional</em>) — | |
| If set to <code>True</code>, <code>past_key_values</code> key value states are returned and can be used to speed up decoding (see | |
| <code>past_key_values</code>).`,name:"use_cache"},{anchor:"transformers.TFWhisperModel.call.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.TFWhisperModel.call.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.TFWhisperModel.call.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <code>ModelOutput</code> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_tf_whisper.py#L1253",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <code>transformers.modeling_tf_outputs.TFSeq2SeqModelOutput</code> or a tuple of <code>tf.Tensor</code> (if | |
| <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various elements depending on the | |
| configuration (<a | |
| href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig" | |
| >WhisperConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>last_hidden_state</strong> (<code>tf.Tensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>) — Sequence of hidden-states at the output of the last layer of the decoder of the model.</p> | |
| <p>If <code>past_key_values</code> is used only the last hidden-state of the sequences of shape <code>(batch_size, 1, hidden_size)</code> is output.</p> | |
| </li> | |
| <li> | |
| <p><strong>past_key_values</strong> (<code>List[tf.Tensor]</code>, <em>optional</em>, returned when <code>use_cache=True</code> is passed or when <code>config.use_cache=True</code>) — List of <code>tf.Tensor</code> of length <code>config.n_layers</code>, with each tensor of shape <code>(2, batch_size, num_heads, sequence_length, embed_size_per_head)</code>).</p> | |
| <p>Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be | |
| used (see <code>past_key_values</code> input) to speed up sequential decoding.</p> | |
| </li> | |
| <li> | |
| <p><strong>decoder_hidden_states</strong> (<code>tuple(tf.Tensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>tf.Tensor</code> (one for the output of the embeddings + one for the output of each layer) of shape | |
| <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>decoder_attentions</strong> (<code>tuple(tf.Tensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>tf.Tensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the | |
| self-attention heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>cross_attentions</strong> (<code>tuple(tf.Tensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>tf.Tensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the decoder’s cross-attention layer, after the attention softmax, used to compute the | |
| weighted average in the cross-attention heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_last_hidden_state</strong> (<code>tf.Tensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the encoder of the model.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_hidden_states</strong> (<code>tuple(tf.Tensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>tf.Tensor</code> (one for the output of the embeddings + one for the output of each layer) of shape | |
| <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_attentions</strong> (<code>tuple(tf.Tensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>tf.Tensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the | |
| self-attention heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>transformers.modeling_tf_outputs.TFSeq2SeqModelOutput</code> or <code>tuple(tf.Tensor)</code></p> | |
| `}}),Ze=new ce({props:{$$slots:{default:[ki]},$$scope:{ctx:T}}}),Ne=new oe({props:{anchor:"transformers.TFWhisperModel.call.example",$$slots:{default:[wi]},$$scope:{ctx:T}}}),eo=new U({props:{title:"TFWhisperForConditionalGeneration",local:"tfwhisperforconditionalgeneration ][ transformers.TFWhisperForConditionalGeneration",headingTag:"h2"}}),to=new W({props:{name:"class transformers.TFWhisperForConditionalGeneration",anchor:"transformers.TFWhisperForConditionalGeneration",parameters:[{name:"config",val:": WhisperConfig"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.TFWhisperForConditionalGeneration.config",description:`<strong>config</strong> (<a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig">WhisperConfig</a>) — | |
| Model configuration class with all the parameters of the model. Initializing with a config file does not | |
| load the weights associated with the model, only the configuration. Check out the | |
| <code>from_pretrained()</code> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_tf_whisper.py#L1341"}}),oo=new W({props:{name:"call",anchor:"transformers.TFWhisperForConditionalGeneration.call",parameters:[{name:"input_features",val:": TFModelInputType | None = None"},{name:"decoder_input_ids",val:": np.ndarray | tf.Tensor | None = None"},{name:"decoder_attention_mask",val:": np.ndarray | tf.Tensor | None = None"},{name:"decoder_position_ids",val:": np.ndarray | tf.Tensor | None = None"},{name:"head_mask",val:": np.ndarray | tf.Tensor | None = None"},{name:"decoder_head_mask",val:": np.ndarray | tf.Tensor | None = None"},{name:"cross_attn_head_mask",val:": np.ndarray | tf.Tensor | None = None"},{name:"encoder_outputs",val:": Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None"},{name:"past_key_values",val:": Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None"},{name:"decoder_inputs_embeds",val:": Optional[Tuple[Union[np.ndarray, tf.Tensor]]] = None"},{name:"labels",val:": np.ndarray | tf.Tensor | None = None"},{name:"use_cache",val:": Optional[bool] = None"},{name:"output_attentions",val:": Optional[bool] = None"},{name:"output_hidden_states",val:": Optional[bool] = None"},{name:"return_dict",val:": Optional[bool] = None"},{name:"training",val:": bool = False"}],parametersDescription:[{anchor:"transformers.TFWhisperForConditionalGeneration.call.input_features",description:`<strong>input_features</strong> (<code>tf.Tensor</code> of shape <code>(batch_size, feature_size, sequence_length)</code>) — | |
| Float values of fbank features extracted from the raw speech waveform. Raw speech waveform can be obtained | |
| by loading a <code>.flac</code> or <code>.wav</code> audio file into an array of type <code>List[float]</code> or a <code>numpy.ndarray</code>, <em>e.g.</em> | |
| via the soundfile library (<code>pip install soundfile</code>). To prepare the array into <code>input_features</code>, the | |
| <code>AutoFeatureExtractor</code> should be used for extracting the fbank features, padding and conversion into a | |
| tensor of type <code>tf.Tensor</code>. See <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor.__call__"><strong>call</strong>()</a>`,name:"input_features"},{anchor:"transformers.TFWhisperForConditionalGeneration.call.decoder_input_ids",description:`<strong>decoder_input_ids</strong> (<code>tf.Tensor</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Indices of decoder input sequence tokens in the vocabulary.</p> | |
| <p>Indices can be obtained using <code>SpeechToTextTokenizer</code>. See <code>PreTrainedTokenizer.encode()</code> and | |
| <code>PreTrainedTokenizer.__call__()</code> for details.</p> | |
| <p><a href="../glossary#decoder-input-ids">What are decoder input IDs?</a></p> | |
| <p>SpeechToText uses the <code>eos_token_id</code> as the starting token for <code>decoder_input_ids</code> generation. If | |
| <code>past_key_values</code> is used, optionally only the last <code>decoder_input_ids</code> have to be input (see | |
| <code>past_key_values</code>).`,name:"decoder_input_ids"},{anchor:"transformers.TFWhisperForConditionalGeneration.call.decoder_attention_mask",description:`<strong>decoder_attention_mask</strong> (<code>tf.Tensor</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Default behavior: generate a tensor that ignores pad tokens in <code>decoder_input_ids</code>. Causal mask will also | |
| be used by default.</p> | |
| <p>If you want to change padding behavior, you should read | |
| <code>modeling_whisper._prepare_decoder_attention_mask</code> and modify to your needs. See diagram 1 in <a href="https://arxiv.org/abs/1910.13461" rel="nofollow">the | |
| paper</a> for more information on the default strategy.`,name:"decoder_attention_mask"},{anchor:"transformers.TFWhisperForConditionalGeneration.call.head_mask",description:`<strong>head_mask</strong> (<code>tf.Tensor</code> of shape <code>(encoder_layers, encoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the attention modules in the encoder. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"head_mask"},{anchor:"transformers.TFWhisperForConditionalGeneration.call.decoder_head_mask",description:`<strong>decoder_head_mask</strong> (<code>tf.Tensor</code> of shape <code>(decoder_layers, decoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the attention modules in the decoder. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"decoder_head_mask"},{anchor:"transformers.TFWhisperForConditionalGeneration.call.cross_attn_head_mask",description:`<strong>cross_attn_head_mask</strong> (<code>tf.Tensor</code> of shape <code>(decoder_layers, decoder_attention_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the cross-attention modules. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"cross_attn_head_mask"},{anchor:"transformers.TFWhisperForConditionalGeneration.call.encoder_outputs",description:`<strong>encoder_outputs</strong> (<code>tuple(tuple(tf.Tensor)</code>, <em>optional</em>) — | |
| Tuple consists of (<code>last_hidden_state</code>, <em>optional</em>: <code>hidden_states</code>, <em>optional</em>: <code>attentions</code>) | |
| <code>last_hidden_state</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) is a sequence of | |
| hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.`,name:"encoder_outputs"},{anchor:"transformers.TFWhisperForConditionalGeneration.call.past_key_values",description:`<strong>past_key_values</strong> (<code>tuple(tuple(tf.Tensor))</code>, <em>optional</em>, returned when <code>use_cache=True</code> is passed or when <code>config.use_cache=True</code>) — | |
| Tuple of <code>tuple(tf.Tensor)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of shape | |
| <code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>) and 2 additional tensors of shape | |
| <code>(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)</code>.</p> | |
| <p>Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention | |
| blocks) that can be used (see <code>past_key_values</code> input) to speed up sequential decoding.</p> | |
| <p>If <code>past_key_values</code> are used, the user can optionally input only the last <code>decoder_input_ids</code> (those that | |
| don’t have their past key value states given to this model) of shape <code>(batch_size, 1)</code> instead of all | |
| <code>decoder_input_ids</code> of shape <code>(batch_size, sequence_length)</code>.`,name:"past_key_values"},{anchor:"transformers.TFWhisperForConditionalGeneration.call.decoder_inputs_embeds",description:`<strong>decoder_inputs_embeds</strong> (<code>tf.Tensor</code> of shape <code>(batch_size, target_sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of passing <code>decoder_input_ids</code> you can choose to directly pass an embedded | |
| representation. If <code>past_key_values</code> is used, optionally only the last <code>decoder_inputs_embeds</code> have to be | |
| input (see <code>past_key_values</code>). This is useful if you want more control over how to convert | |
| <code>decoder_input_ids</code> indices into associated vectors than the model’s internal embedding lookup matrix.`,name:"decoder_inputs_embeds"},{anchor:"transformers.TFWhisperForConditionalGeneration.call.use_cache",description:`<strong>use_cache</strong> (<code>bool</code>, <em>optional</em>) — | |
| If set to <code>True</code>, <code>past_key_values</code> key value states are returned and can be used to speed up decoding (see | |
| <code>past_key_values</code>).`,name:"use_cache"},{anchor:"transformers.TFWhisperForConditionalGeneration.call.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.TFWhisperForConditionalGeneration.call.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.TFWhisperForConditionalGeneration.call.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <code>ModelOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.TFWhisperForConditionalGeneration.call.labels",description:`<strong>labels</strong> (<code>tf.Tensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Labels for computing the language modeling loss. Indices should either be in <code>[0, ..., config.vocab_size]</code> | |
| or -100 (see <code>input_ids</code> docstring). Tokens with indices set to <code>-100</code> are ignored (masked), the loss is | |
| only computed for the tokens with labels in <code>[0, ..., config.vocab_size]</code>.`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_tf_whisper.py#L1376",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <code>transformers.modeling_tf_outputs.TFSeq2SeqLMOutput</code> or a tuple of <code>tf.Tensor</code> (if | |
| <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various elements depending on the | |
| configuration (<a | |
| href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig" | |
| >WhisperConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>loss</strong> (<code>tf.Tensor</code> of shape <code>(n,)</code>, <em>optional</em>, where n is the number of non-masked labels, returned when <code>labels</code> is provided) — Language modeling loss.</p> | |
| </li> | |
| <li> | |
| <p><strong>logits</strong> (<code>tf.Tensor</code> of shape <code>(batch_size, sequence_length, config.vocab_size)</code>) — Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).</p> | |
| </li> | |
| <li> | |
| <p><strong>past_key_values</strong> (<code>List[tf.Tensor]</code>, <em>optional</em>, returned when <code>use_cache=True</code> is passed or when <code>config.use_cache=True</code>) — List of <code>tf.Tensor</code> of length <code>config.n_layers</code>, with each tensor of shape <code>(2, batch_size, num_heads, sequence_length, embed_size_per_head)</code>).</p> | |
| <p>Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be | |
| used (see <code>past_key_values</code> input) to speed up sequential decoding.</p> | |
| </li> | |
| <li> | |
| <p><strong>decoder_hidden_states</strong> (<code>tuple(tf.Tensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>tf.Tensor</code> (one for the output of the embeddings + one for the output of each layer) of shape | |
| <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>decoder_attentions</strong> (<code>tuple(tf.Tensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>tf.Tensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the | |
| self-attention heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>cross_attentions</strong> (<code>tuple(tf.Tensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>tf.Tensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the decoder’s cross-attention layer, after the attention softmax, used to compute the | |
| weighted average in the cross-attention heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_last_hidden_state</strong> (<code>tf.Tensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the encoder of the model.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_hidden_states</strong> (<code>tuple(tf.Tensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>tf.Tensor</code> (one for the output of the embeddings + one for the output of each layer) of shape | |
| <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_attentions</strong> (<code>tuple(tf.Tensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>tf.Tensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the | |
| self-attention heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>transformers.modeling_tf_outputs.TFSeq2SeqLMOutput</code> or <code>tuple(tf.Tensor)</code></p> | |
| `}}),Xe=new ce({props:{$$slots:{default:[vi]},$$scope:{ctx:T}}}),Re=new oe({props:{anchor:"transformers.TFWhisperForConditionalGeneration.call.example",$$slots:{default:[Ti]},$$scope:{ctx:T}}}),no=new U({props:{title:"FlaxWhisperModel",local:"flaxwhispermodel ][ transformers.FlaxWhisperModel",headingTag:"h2"}}),so=new W({props:{name:"class transformers.FlaxWhisperModel",anchor:"transformers.FlaxWhisperModel",parameters:[{name:"config",val:": WhisperConfig"},{name:"input_shape",val:": Tuple = None"},{name:"seed",val:": int = 0"},{name:"dtype",val:": dtype = <class 'jax.numpy.float32'>"},{name:"_do_init",val:": bool = True"},{name:"gradient_checkpointing",val:": bool = False"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.FlaxWhisperModel.config",description:`<strong>config</strong> (<a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig">WhisperConfig</a>) — Model configuration class with all the parameters of the model. | |
| Initializing with a config file does not load the weights associated with the model, only the | |
| configuration. Check out the <code>from_pretrained()</code> method to load the model weights.`,name:"config"},{anchor:"transformers.FlaxWhisperModel.dtype",description:`<strong>dtype</strong> (<code>jax.numpy.dtype</code>, <em>optional</em>, defaults to <code>jax.numpy.float32</code>) — | |
| The data type of the computation. Can be one of <code>jax.numpy.float32</code>, <code>jax.numpy.float16</code> (on GPUs) and | |
| <code>jax.numpy.bfloat16</code> (on TPUs). This can be used to enable mixed-precision training or half-precision | |
| inference on GPUs or TPUs. If specified all the computation will be performed with the given <code>dtype</code>. | |
| <strong>Note that this only specifies the dtype of the computation and does not influence the dtype of model | |
| parameters.</strong> If you wish to change the dtype of the model parameters, see <code>to_fp16()</code> | |
| and <code>to_bf16()</code>.`,name:"dtype"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_flax_whisper.py#L1185"}}),ao=new W({props:{name:"__call__",anchor:"transformers.FlaxWhisperModel.__call__",parameters:[{name:"input_features",val:": Array"},{name:"decoder_input_ids",val:": Array"},{name:"attention_mask",val:": Optional = None"},{name:"decoder_attention_mask",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"decoder_position_ids",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"},{name:"train",val:": bool = False"},{name:"params",val:": dict = None"},{name:"dropout_rng",val:": PRNGKey = None"}],parametersDescription:[{anchor:"transformers.FlaxWhisperModel.__call__.input_features",description:`<strong>input_features</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, feature_size, sequence_length)</code>) — | |
| Float values mel features extracted from the raw speech waveform. Raw speech waveform can be obtained by | |
| loading a <code>.flac</code> or <code>.wav</code> audio file into an array of type <code>List[float]</code> or a <code>numpy.ndarray</code>, <em>e.g.</em> via | |
| the soundfile library (<code>pip install soundfile</code>). To prepare the array into <code>input_features</code>, the | |
| <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor">WhisperFeatureExtractor</a> should be used for extracting the features, padding and conversion into a | |
| tensor of type <code>numpy.ndarray</code>. See <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor.__call__"><strong>call</strong>()</a>`,name:"input_features"},{anchor:"transformers.FlaxWhisperModel.__call__.attention_mask",description:`<strong>attention_mask</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Whisper does not support masking of the <code>input_features</code>, this argument is preserved for compatibility, but | |
| is not used. By default the silence in the input log mel spectrogram are ignored.`,name:"attention_mask"},{anchor:"transformers.FlaxWhisperModel.__call__.decoder_input_ids",description:`<strong>decoder_input_ids</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Indices of decoder input sequence tokens in the vocabulary. Indices can be obtained using | |
| <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperTokenizer">WhisperTokenizer</a>. See <code>PreTrainedTokenizer.encode()</code> and <code>PreTrainedTokenizer.__call__()</code> for details. | |
| <a href="../glossary#decoder-input-ids">What are decoder input IDs?</a> Whisper uses the <code>decoder_start_token_id</code> as | |
| the starting token for <code>decoder_input_ids</code> generation.`,name:"decoder_input_ids"},{anchor:"transformers.FlaxWhisperModel.__call__.decoder_attention_mask",description:`<strong>decoder_attention_mask</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Default behavior: generate a tensor that ignores pad tokens in <code>decoder_input_ids</code>. Causal mask will also | |
| be used by default. If you want to change padding behavior, you should modify to your needs. See diagram 1 | |
| in <a href="https://arxiv.org/abs/1910.13461" rel="nofollow">the paper</a> for more information on the default strategy.`,name:"decoder_attention_mask"},{anchor:"transformers.FlaxWhisperModel.__call__.position_ids",description:`<strong>position_ids</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Whisper does not use <code>position_ids</code> in the encoder as <code>input_features</code> is always the same size and doesn’t | |
| use masking, but this argument is preserved for compatibility. By default the silence in the input log mel | |
| spectrogram are ignored.`,name:"position_ids"},{anchor:"transformers.FlaxWhisperModel.__call__.decoder_position_ids",description:`<strong>decoder_position_ids</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the | |
| range <code>[0, config.max_position_embeddings - 1]</code>.`,name:"decoder_position_ids"},{anchor:"transformers.FlaxWhisperModel.__call__.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.FlaxWhisperModel.__call__.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.FlaxWhisperModel.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <code>ModelOutput</code> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_flax_whisper.py#L1134",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <code>transformers.modeling_flax_outputs.FlaxSeq2SeqModelOutput</code> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig" | |
| >WhisperConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>last_hidden_state</strong> (<code>jnp.ndarray</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>) — Sequence of hidden-states at the output of the last layer of the decoder of the model.</p> | |
| <p>If <code>past_key_values</code> is used only the last hidden-state of the sequences of shape <code>(batch_size, 1, hidden_size)</code> is output.</p> | |
| </li> | |
| <li> | |
| <p><strong>past_key_values</strong> (<code>tuple(tuple(jnp.ndarray))</code>, <em>optional</em>, returned when <code>use_cache=True</code> is passed or when <code>config.use_cache=True</code>) — Tuple of <code>tuple(jnp.ndarray)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of shape | |
| <code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>) and 2 additional tensors of shape | |
| <code>(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)</code>.</p> | |
| <p>Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention | |
| blocks) that can be used (see <code>past_key_values</code> input) to speed up sequential decoding.</p> | |
| </li> | |
| <li> | |
| <p><strong>decoder_hidden_states</strong> (<code>tuple(jnp.ndarray)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>jnp.ndarray</code> (one for the output of the embeddings + one for the output of each layer) of shape | |
| <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>decoder_attentions</strong> (<code>tuple(jnp.ndarray)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>jnp.ndarray</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the | |
| self-attention heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>cross_attentions</strong> (<code>tuple(jnp.ndarray)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>jnp.ndarray</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the decoder’s cross-attention layer, after the attention softmax, used to compute the | |
| weighted average in the cross-attention heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_last_hidden_state</strong> (<code>jnp.ndarray</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the encoder of the model.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_hidden_states</strong> (<code>tuple(jnp.ndarray)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>jnp.ndarray</code> (one for the output of the embeddings + one for the output of each layer) of shape | |
| <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_attentions</strong> (<code>tuple(jnp.ndarray)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>jnp.ndarray</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the | |
| self-attention heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>transformers.modeling_flax_outputs.FlaxSeq2SeqModelOutput</code> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),Ve=new ce({props:{$$slots:{default:[xi]},$$scope:{ctx:T}}}),Le=new oe({props:{anchor:"transformers.FlaxWhisperModel.__call__.example",$$slots:{default:[$i]},$$scope:{ctx:T}}}),ro=new U({props:{title:"FlaxWhisperForConditionalGeneration",local:"flaxwhisperforconditionalgeneration ][ transformers.FlaxWhisperForConditionalGeneration",headingTag:"h2"}}),io=new W({props:{name:"class transformers.FlaxWhisperForConditionalGeneration",anchor:"transformers.FlaxWhisperForConditionalGeneration",parameters:[{name:"config",val:": WhisperConfig"},{name:"input_shape",val:": Tuple = None"},{name:"seed",val:": int = 0"},{name:"dtype",val:": dtype = <class 'jax.numpy.float32'>"},{name:"_do_init",val:": bool = True"},{name:"gradient_checkpointing",val:": bool = False"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.FlaxWhisperForConditionalGeneration.config",description:`<strong>config</strong> (<a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig">WhisperConfig</a>) — Model configuration class with all the parameters of the model. | |
| Initializing with a config file does not load the weights associated with the model, only the | |
| configuration. Check out the <code>from_pretrained()</code> method to load the model weights.`,name:"config"},{anchor:"transformers.FlaxWhisperForConditionalGeneration.dtype",description:`<strong>dtype</strong> (<code>jax.numpy.dtype</code>, <em>optional</em>, defaults to <code>jax.numpy.float32</code>) — | |
| The data type of the computation. Can be one of <code>jax.numpy.float32</code>, <code>jax.numpy.float16</code> (on GPUs) and | |
| <code>jax.numpy.bfloat16</code> (on TPUs). This can be used to enable mixed-precision training or half-precision | |
| inference on GPUs or TPUs. If specified all the computation will be performed with the given <code>dtype</code>. | |
| <strong>Note that this only specifies the dtype of the computation and does not influence the dtype of model | |
| parameters.</strong> If you wish to change the dtype of the model parameters, see <code>to_fp16()</code> | |
| and <code>to_bf16()</code>.`,name:"dtype"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_flax_whisper.py#L1267"}}),lo=new W({props:{name:"__call__",anchor:"transformers.FlaxWhisperForConditionalGeneration.__call__",parameters:[{name:"input_features",val:": Array"},{name:"decoder_input_ids",val:": Array"},{name:"attention_mask",val:": Optional = None"},{name:"decoder_attention_mask",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"decoder_position_ids",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"},{name:"train",val:": bool = False"},{name:"params",val:": dict = None"},{name:"dropout_rng",val:": PRNGKey = None"}],parametersDescription:[{anchor:"transformers.FlaxWhisperForConditionalGeneration.__call__.input_features",description:`<strong>input_features</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, feature_size, sequence_length)</code>) — | |
| Float values mel features extracted from the raw speech waveform. Raw speech waveform can be obtained by | |
| loading a <code>.flac</code> or <code>.wav</code> audio file into an array of type <code>List[float]</code> or a <code>numpy.ndarray</code>, <em>e.g.</em> via | |
| the soundfile library (<code>pip install soundfile</code>). To prepare the array into <code>input_features</code>, the | |
| <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor">WhisperFeatureExtractor</a> should be used for extracting the features, padding and conversion into a | |
| tensor of type <code>numpy.ndarray</code>. See <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor.__call__"><strong>call</strong>()</a>`,name:"input_features"},{anchor:"transformers.FlaxWhisperForConditionalGeneration.__call__.attention_mask",description:`<strong>attention_mask</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Whisper does not support masking of the <code>input_features</code>, this argument is preserved for compatibility, but | |
| is not used. By default the silence in the input log mel spectrogram are ignored.`,name:"attention_mask"},{anchor:"transformers.FlaxWhisperForConditionalGeneration.__call__.decoder_input_ids",description:`<strong>decoder_input_ids</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Indices of decoder input sequence tokens in the vocabulary. Indices can be obtained using | |
| <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperTokenizer">WhisperTokenizer</a>. See <code>PreTrainedTokenizer.encode()</code> and <code>PreTrainedTokenizer.__call__()</code> for details. | |
| <a href="../glossary#decoder-input-ids">What are decoder input IDs?</a> Whisper uses the <code>decoder_start_token_id</code> as | |
| the starting token for <code>decoder_input_ids</code> generation.`,name:"decoder_input_ids"},{anchor:"transformers.FlaxWhisperForConditionalGeneration.__call__.decoder_attention_mask",description:`<strong>decoder_attention_mask</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Default behavior: generate a tensor that ignores pad tokens in <code>decoder_input_ids</code>. Causal mask will also | |
| be used by default. If you want to change padding behavior, you should modify to your needs. See diagram 1 | |
| in <a href="https://arxiv.org/abs/1910.13461" rel="nofollow">the paper</a> for more information on the default strategy.`,name:"decoder_attention_mask"},{anchor:"transformers.FlaxWhisperForConditionalGeneration.__call__.position_ids",description:`<strong>position_ids</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Whisper does not use <code>position_ids</code> in the encoder as <code>input_features</code> is always the same size and doesn’t | |
| use masking, but this argument is preserved for compatibility. By default the silence in the input log mel | |
| spectrogram are ignored.`,name:"position_ids"},{anchor:"transformers.FlaxWhisperForConditionalGeneration.__call__.decoder_position_ids",description:`<strong>decoder_position_ids</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the | |
| range <code>[0, config.max_position_embeddings - 1]</code>.`,name:"decoder_position_ids"},{anchor:"transformers.FlaxWhisperForConditionalGeneration.__call__.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.FlaxWhisperForConditionalGeneration.__call__.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.FlaxWhisperForConditionalGeneration.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <code>ModelOutput</code> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_flax_whisper.py#L1134",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <code>transformers.modeling_flax_outputs.FlaxSeq2SeqLMOutput</code> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig" | |
| >WhisperConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>logits</strong> (<code>jnp.ndarray</code> of shape <code>(batch_size, sequence_length, config.vocab_size)</code>) — Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).</p> | |
| </li> | |
| <li> | |
| <p><strong>past_key_values</strong> (<code>tuple(tuple(jnp.ndarray))</code>, <em>optional</em>, returned when <code>use_cache=True</code> is passed or when <code>config.use_cache=True</code>) — Tuple of <code>tuple(jnp.ndarray)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of shape | |
| <code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>) and 2 additional tensors of shape | |
| <code>(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)</code>.</p> | |
| <p>Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention | |
| blocks) that can be used (see <code>past_key_values</code> input) to speed up sequential decoding.</p> | |
| </li> | |
| <li> | |
| <p><strong>decoder_hidden_states</strong> (<code>tuple(jnp.ndarray)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>jnp.ndarray</code> (one for the output of the embeddings + one for the output of each layer) of shape | |
| <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>decoder_attentions</strong> (<code>tuple(jnp.ndarray)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>jnp.ndarray</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the | |
| self-attention heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>cross_attentions</strong> (<code>tuple(jnp.ndarray)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>jnp.ndarray</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the decoder’s cross-attention layer, after the attention softmax, used to compute the | |
| weighted average in the cross-attention heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_last_hidden_state</strong> (<code>jnp.ndarray</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the encoder of the model.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_hidden_states</strong> (<code>tuple(jnp.ndarray)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>jnp.ndarray</code> (one for the output of the embeddings + one for the output of each layer) of shape | |
| <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>encoder_attentions</strong> (<code>tuple(jnp.ndarray)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>jnp.ndarray</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the | |
| self-attention heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>transformers.modeling_flax_outputs.FlaxSeq2SeqLMOutput</code> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),He=new ce({props:{$$slots:{default:[Wi]},$$scope:{ctx:T}}}),Ee=new oe({props:{anchor:"transformers.FlaxWhisperForConditionalGeneration.__call__.example",$$slots:{default:[Mi]},$$scope:{ctx:T}}}),co=new U({props:{title:"FlaxWhisperForAudioClassification",local:"flaxwhisperforaudioclassification ][ transformers.FlaxWhisperForAudioClassification",headingTag:"h2"}}),po=new W({props:{name:"class transformers.FlaxWhisperForAudioClassification",anchor:"transformers.FlaxWhisperForAudioClassification",parameters:[{name:"config",val:": WhisperConfig"},{name:"input_shape",val:": Tuple = None"},{name:"seed",val:": int = 0"},{name:"dtype",val:": dtype = <class 'jax.numpy.float32'>"},{name:"_do_init",val:": bool = True"},{name:"gradient_checkpointing",val:": bool = False"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.FlaxWhisperForAudioClassification.config",description:`<strong>config</strong> (<a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig">WhisperConfig</a>) — Model configuration class with all the parameters of the model. | |
| Initializing with a config file does not load the weights associated with the model, only the | |
| configuration. Check out the <code>from_pretrained()</code> method to load the model weights.`,name:"config"},{anchor:"transformers.FlaxWhisperForAudioClassification.dtype",description:`<strong>dtype</strong> (<code>jax.numpy.dtype</code>, <em>optional</em>, defaults to <code>jax.numpy.float32</code>) — | |
| The data type of the computation. Can be one of <code>jax.numpy.float32</code>, <code>jax.numpy.float16</code> (on GPUs) and | |
| <code>jax.numpy.bfloat16</code> (on TPUs). This can be used to enable mixed-precision training or half-precision | |
| inference on GPUs or TPUs. If specified all the computation will be performed with the given <code>dtype</code>. | |
| <strong>Note that this only specifies the dtype of the computation and does not influence the dtype of model | |
| parameters.</strong> If you wish to change the dtype of the model parameters, see <code>to_fp16()</code> | |
| and <code>to_bf16()</code>.`,name:"dtype"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_flax_whisper.py#L1597"}}),ho=new W({props:{name:"__call__",anchor:"transformers.FlaxWhisperForAudioClassification.__call__",parameters:[{name:"input_features",val:": Array"},{name:"attention_mask",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"},{name:"train",val:": bool = False"},{name:"params",val:": dict = None"},{name:"dropout_rng",val:": PRNGKey = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.FlaxWhisperForAudioClassification.__call__.input_features",description:`<strong>input_features</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, feature_size, sequence_length)</code>) — | |
| Float values mel features extracted from the raw speech waveform. Raw speech waveform can be obtained by | |
| loading a <code>.flac</code> or <code>.wav</code> audio file into an array of type <code>List[float]</code> or a <code>numpy.ndarray</code>, <em>e.g.</em> via | |
| the soundfile library (<code>pip install soundfile</code>). To prepare the array into <code>input_features</code>, the | |
| <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor">WhisperFeatureExtractor</a> should be used for extracting the features, padding and conversion into a | |
| tensor of type <code>numpy.ndarray</code>. See <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperFeatureExtractor.__call__"><strong>call</strong>()</a>`,name:"input_features"},{anchor:"transformers.FlaxWhisperForAudioClassification.__call__.attention_mask",description:`<strong>attention_mask</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Whisper does not support masking of the <code>input_features</code>, this argument is preserved for compatibility, but | |
| is not used. By default the silence in the input log mel spectrogram are ignored.`,name:"attention_mask"},{anchor:"transformers.FlaxWhisperForAudioClassification.__call__.decoder_input_ids",description:`<strong>decoder_input_ids</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Indices of decoder input sequence tokens in the vocabulary. Indices can be obtained using | |
| <a href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperTokenizer">WhisperTokenizer</a>. See <code>PreTrainedTokenizer.encode()</code> and <code>PreTrainedTokenizer.__call__()</code> for details. | |
| <a href="../glossary#decoder-input-ids">What are decoder input IDs?</a> Whisper uses the <code>decoder_start_token_id</code> as | |
| the starting token for <code>decoder_input_ids</code> generation.`,name:"decoder_input_ids"},{anchor:"transformers.FlaxWhisperForAudioClassification.__call__.decoder_attention_mask",description:`<strong>decoder_attention_mask</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, target_sequence_length)</code>, <em>optional</em>) — | |
| Default behavior: generate a tensor that ignores pad tokens in <code>decoder_input_ids</code>. Causal mask will also | |
| be used by default. If you want to change padding behavior, you should modify to your needs. See diagram 1 | |
| in <a href="https://arxiv.org/abs/1910.13461" rel="nofollow">the paper</a> for more information on the default strategy.`,name:"decoder_attention_mask"},{anchor:"transformers.FlaxWhisperForAudioClassification.__call__.position_ids",description:`<strong>position_ids</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Whisper does not use <code>position_ids</code> in the encoder as <code>input_features</code> is always the same size and doesn’t | |
| use masking, but this argument is preserved for compatibility. By default the silence in the input log mel | |
| spectrogram are ignored.`,name:"position_ids"},{anchor:"transformers.FlaxWhisperForAudioClassification.__call__.decoder_position_ids",description:`<strong>decoder_position_ids</strong> (<code>numpy.ndarray</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the | |
| range <code>[0, config.max_position_embeddings - 1]</code>.`,name:"decoder_position_ids"},{anchor:"transformers.FlaxWhisperForAudioClassification.__call__.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.FlaxWhisperForAudioClassification.__call__.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.FlaxWhisperForAudioClassification.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <code>ModelOutput</code> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_flax_whisper.py#L1625",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <code>transformers.modeling_flax_outputs.FlaxSequenceClassifierOutput</code> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/main/ko/model_doc/whisper#transformers.WhisperConfig" | |
| >WhisperConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>logits</strong> (<code>jnp.ndarray</code> of shape <code>(batch_size, config.num_labels)</code>) — Classification (or regression if config.num_labels==1) scores (before SoftMax).</p> | |
| </li> | |
| <li> | |
| <p><strong>hidden_states</strong> (<code>tuple(jnp.ndarray)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>jnp.ndarray</code> (one for the output of the embeddings + one for the output of each layer) of shape | |
| <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the model at the output of each layer plus the initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>attentions</strong> (<code>tuple(jnp.ndarray)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>jnp.ndarray</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention | |
| heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>transformers.modeling_flax_outputs.FlaxSequenceClassifierOutput</code> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),Be=new ce({props:{$$slots:{default:[Fi]},$$scope:{ctx:T}}}),Se=new oe({props:{anchor:"transformers.FlaxWhisperForAudioClassification.__call__.example",$$slots:{default:[zi]},$$scope:{ctx:T}}}),mo=new di({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/ko/model_doc/whisper.md"}}),{c(){t=l("meta"),k=s(),d=l("p"),p=s(),m(w.$$.fragment),n=s(),m(v.$$.fragment),Wn=s(),it=l("p"),it.innerHTML=Ka,Mn=s(),dt=l("p"),dt.textContent=er,Fn=s(),lt=l("p"),lt.innerHTML=tr,zn=s(),ct=l("p"),ct.textContent=or,jn=s(),pt=l("ul"),pt.innerHTML=nr,Cn=s(),m(ht.$$.fragment),Jn=s(),mt=l("p"),mt.innerHTML=sr,qn=s(),ut=l("p"),ut.innerHTML=ar,In=s(),m(ft.$$.fragment),Gn=s(),Z=l("div"),m(_t.$$.fragment),cs=s(),vo=l("p"),vo.innerHTML=rr,ps=s(),To=l("p"),To.innerHTML=ir,hs=s(),m(ge.$$.fragment),Un=s(),m(gt.$$.fragment),Zn=s(),M=l("div"),m(bt.$$.fragment),ms=s(),xo=l("p"),xo.textContent=dr,us=s(),$o=l("p"),$o.innerHTML=lr,fs=s(),ne=l("div"),m(yt.$$.fragment),_s=s(),Wo=l("p"),Wo.textContent=cr,gs=s(),m(be.$$.fragment),bs=s(),ye=l("div"),m(kt.$$.fragment),ys=s(),Mo=l("p"),Mo.textContent=pr,ks=s(),ke=l("div"),m(wt.$$.fragment),ws=s(),Fo=l("p"),Fo.innerHTML=hr,vs=s(),se=l("div"),m(vt.$$.fragment),Ts=s(),zo=l("p"),zo.innerHTML=mr,xs=s(),jo=l("p"),jo.textContent=ur,$s=s(),Co=l("div"),m(Tt.$$.fragment),Nn=s(),m(xt.$$.fragment),Xn=s(),F=l("div"),m($t.$$.fragment),Ws=s(),Jo=l("p"),Jo.innerHTML=fr,Ms=s(),qo=l("p"),qo.innerHTML=_r,Fs=s(),ae=l("div"),m(Wt.$$.fragment),zs=s(),Io=l("p"),Io.textContent=gr,js=s(),m(we.$$.fragment),Cs=s(),ve=l("div"),m(Mt.$$.fragment),Js=s(),Go=l("p"),Go.textContent=br,qs=s(),Te=l("div"),m(Ft.$$.fragment),Is=s(),Uo=l("p"),Uo.innerHTML=yr,Gs=s(),re=l("div"),m(zt.$$.fragment),Us=s(),Zo=l("p"),Zo.innerHTML=kr,Zs=s(),No=l("p"),No.textContent=wr,Ns=s(),Xo=l("div"),m(jt.$$.fragment),Rn=s(),m(Ct.$$.fragment),Vn=s(),I=l("div"),m(Jt.$$.fragment),Xs=s(),Ro=l("p"),Ro.textContent=vr,Rs=s(),Vo=l("p"),Vo.innerHTML=Tr,Vs=s(),Lo=l("p"),Lo.innerHTML=xr,Ls=s(),xe=l("div"),m(qt.$$.fragment),Hs=s(),Ho=l("p"),Ho.textContent=$r,Ln=s(),m(It.$$.fragment),Hn=s(),z=l("div"),m(Gt.$$.fragment),Es=s(),Eo=l("p"),Eo.textContent=Wr,Bs=s(),Bo=l("p"),Bo.innerHTML=Mr,Ss=s(),$e=l("div"),m(Ut.$$.fragment),Ps=s(),So=l("p"),So.innerHTML=Fr,Ys=s(),ie=l("div"),m(Zt.$$.fragment),As=s(),Po=l("p"),Po.textContent=zr,Ds=s(),m(We.$$.fragment),Qs=s(),de=l("div"),m(Nt.$$.fragment),Os=s(),Yo=l("p"),Yo.innerHTML=jr,Ks=s(),m(Me.$$.fragment),ea=s(),Fe=l("div"),m(Xt.$$.fragment),ta=s(),Ao=l("p"),Ao.innerHTML=Cr,oa=s(),ze=l("div"),m(Rt.$$.fragment),na=s(),Do=l("p"),Do.innerHTML=Jr,En=s(),m(Vt.$$.fragment),Bn=s(),G=l("div"),m(Lt.$$.fragment),sa=s(),Qo=l("p"),Qo.innerHTML=qr,aa=s(),Oo=l("p"),Oo.innerHTML=Ir,ra=s(),B=l("div"),m(Ht.$$.fragment),ia=s(),Ko=l("p"),Ko.innerHTML=Gr,da=s(),m(je.$$.fragment),la=s(),m(Ce.$$.fragment),ca=s(),Je=l("div"),m(Et.$$.fragment),pa=s(),en=l("p"),en.innerHTML=Ur,Sn=s(),m(Bt.$$.fragment),Pn=s(),N=l("div"),m(St.$$.fragment),ha=s(),tn=l("p"),tn.innerHTML=Zr,ma=s(),on=l("p"),on.innerHTML=Nr,ua=s(),S=l("div"),m(Pt.$$.fragment),fa=s(),nn=l("p"),nn.innerHTML=Xr,_a=s(),m(qe.$$.fragment),ga=s(),m(Ie.$$.fragment),Yn=s(),m(Yt.$$.fragment),An=s(),ee=l("div"),m(At.$$.fragment),ba=s(),sn=l("p"),sn.textContent=Rr,ya=s(),P=l("div"),m(Dt.$$.fragment),ka=s(),an=l("p"),an.innerHTML=Vr,wa=s(),m(Ge.$$.fragment),va=s(),m(Ue.$$.fragment),Dn=s(),m(Qt.$$.fragment),Qn=s(),X=l("div"),m(Ot.$$.fragment),Ta=s(),rn=l("p"),rn.innerHTML=Lr,xa=s(),dn=l("p"),dn.innerHTML=Hr,$a=s(),Y=l("div"),m(Kt.$$.fragment),Wa=s(),ln=l("p"),ln.innerHTML=Er,Ma=s(),m(Ze.$$.fragment),Fa=s(),m(Ne.$$.fragment),On=s(),m(eo.$$.fragment),Kn=s(),R=l("div"),m(to.$$.fragment),za=s(),cn=l("p"),cn.innerHTML=Br,ja=s(),pn=l("p"),pn.innerHTML=Sr,Ca=s(),A=l("div"),m(oo.$$.fragment),Ja=s(),hn=l("p"),hn.innerHTML=Pr,qa=s(),m(Xe.$$.fragment),Ia=s(),m(Re.$$.fragment),es=s(),m(no.$$.fragment),ts=s(),V=l("div"),m(so.$$.fragment),Ga=s(),mn=l("p"),mn.innerHTML=Yr,Ua=s(),un=l("ul"),un.innerHTML=Ar,Za=s(),D=l("div"),m(ao.$$.fragment),Na=s(),fn=l("p"),fn.innerHTML=Dr,Xa=s(),m(Ve.$$.fragment),Ra=s(),m(Le.$$.fragment),os=s(),m(ro.$$.fragment),ns=s(),L=l("div"),m(io.$$.fragment),Va=s(),_n=l("p"),_n.innerHTML=Qr,La=s(),gn=l("ul"),gn.innerHTML=Or,Ha=s(),Q=l("div"),m(lo.$$.fragment),Ea=s(),bn=l("p"),bn.innerHTML=Kr,Ba=s(),m(He.$$.fragment),Sa=s(),m(Ee.$$.fragment),ss=s(),m(co.$$.fragment),as=s(),H=l("div"),m(po.$$.fragment),Pa=s(),yn=l("p"),yn.innerHTML=ei,Ya=s(),kn=l("ul"),kn.innerHTML=ti,Aa=s(),O=l("div"),m(ho.$$.fragment),Da=s(),wn=l("p"),wn.innerHTML=oi,Qa=s(),m(Be.$$.fragment),Oa=s(),m(Se.$$.fragment),rs=s(),m(mo.$$.fragment),is=s(),Tn=l("p"),this.h()},l(e){const i=ii("svelte-u9bgzb",document.head);t=c(i,"META",{name:!0,content:!0}),i.forEach(r),k=a(e),d=c(e,"P",{}),x(d).forEach(r),p=a(e),u(w.$$.fragment,e),n=a(e),u(v.$$.fragment,e),Wn=a(e),it=c(e,"P",{"data-svelte-h":!0}),y(it)!=="svelte-jbylng"&&(it.innerHTML=Ka),Mn=a(e),dt=c(e,"P",{"data-svelte-h":!0}),y(dt)!=="svelte-e5r8wp"&&(dt.textContent=er),Fn=a(e),lt=c(e,"P",{"data-svelte-h":!0}),y(lt)!=="svelte-miykgl"&&(lt.innerHTML=tr),zn=a(e),ct=c(e,"P",{"data-svelte-h":!0}),y(ct)!=="svelte-k6v9m1"&&(ct.textContent=or),jn=a(e),pt=c(e,"UL",{"data-svelte-h":!0}),y(pt)!=="svelte-1yp0qfj"&&(pt.innerHTML=nr),Cn=a(e),u(ht.$$.fragment,e),Jn=a(e),mt=c(e,"P",{"data-svelte-h":!0}),y(mt)!=="svelte-13fvt52"&&(mt.innerHTML=sr),qn=a(e),ut=c(e,"P",{"data-svelte-h":!0}),y(ut)!=="svelte-x8w7b0"&&(ut.innerHTML=ar),In=a(e),u(ft.$$.fragment,e),Gn=a(e),Z=c(e,"DIV",{class:!0});var te=x(Z);u(_t.$$.fragment,te),cs=a(te),vo=c(te,"P",{"data-svelte-h":!0}),y(vo)!=="svelte-1vvndk6"&&(vo.innerHTML=rr),ps=a(te),To=c(te,"P",{"data-svelte-h":!0}),y(To)!=="svelte-huu8ef"&&(To.innerHTML=ir),hs=a(te),u(ge.$$.fragment,te),te.forEach(r),Un=a(e),u(gt.$$.fragment,e),Zn=a(e),M=c(e,"DIV",{class:!0});var C=x(M);u(bt.$$.fragment,C),ms=a(C),xo=c(C,"P",{"data-svelte-h":!0}),y(xo)!=="svelte-1996rkv"&&(xo.textContent=dr),us=a(C),$o=c(C,"P",{"data-svelte-h":!0}),y($o)!=="svelte-xbd6w0"&&($o.innerHTML=lr),fs=a(C),ne=c(C,"DIV",{class:!0});var pe=x(ne);u(yt.$$.fragment,pe),_s=a(pe),Wo=c(pe,"P",{"data-svelte-h":!0}),y(Wo)!=="svelte-8in46s"&&(Wo.textContent=cr),gs=a(pe),u(be.$$.fragment,pe),pe.forEach(r),bs=a(C),ye=c(C,"DIV",{class:!0});var uo=x(ye);u(kt.$$.fragment,uo),ys=a(uo),Mo=c(uo,"P",{"data-svelte-h":!0}),y(Mo)!=="svelte-wv4s2m"&&(Mo.textContent=pr),uo.forEach(r),ks=a(C),ke=c(C,"DIV",{class:!0});var fo=x(ke);u(wt.$$.fragment,fo),ws=a(fo),Fo=c(fo,"P",{"data-svelte-h":!0}),y(Fo)!=="svelte-1f4f5kp"&&(Fo.innerHTML=hr),fo.forEach(r),vs=a(C),se=c(C,"DIV",{class:!0});var he=x(se);u(vt.$$.fragment,he),Ts=a(he),zo=c(he,"P",{"data-svelte-h":!0}),y(zo)!=="svelte-zj1vf1"&&(zo.innerHTML=mr),xs=a(he),jo=c(he,"P",{"data-svelte-h":!0}),y(jo)!=="svelte-9vptpw"&&(jo.textContent=ur),he.forEach(r),$s=a(C),Co=c(C,"DIV",{class:!0});var xn=x(Co);u(Tt.$$.fragment,xn),xn.forEach(r),C.forEach(r),Nn=a(e),u(xt.$$.fragment,e),Xn=a(e),F=c(e,"DIV",{class:!0});var J=x(F);u($t.$$.fragment,J),Ws=a(J),Jo=c(J,"P",{"data-svelte-h":!0}),y(Jo)!=="svelte-14ct2lo"&&(Jo.innerHTML=fr),Ms=a(J),qo=c(J,"P",{"data-svelte-h":!0}),y(qo)!=="svelte-1ndfe3e"&&(qo.innerHTML=_r),Fs=a(J),ae=c(J,"DIV",{class:!0});var me=x(ae);u(Wt.$$.fragment,me),zs=a(me),Io=c(me,"P",{"data-svelte-h":!0}),y(Io)!=="svelte-8in46s"&&(Io.textContent=gr),js=a(me),u(we.$$.fragment,me),me.forEach(r),Cs=a(J),ve=c(J,"DIV",{class:!0});var _o=x(ve);u(Mt.$$.fragment,_o),Js=a(_o),Go=c(_o,"P",{"data-svelte-h":!0}),y(Go)!=="svelte-wv4s2m"&&(Go.textContent=br),_o.forEach(r),qs=a(J),Te=c(J,"DIV",{class:!0});var go=x(Te);u(Ft.$$.fragment,go),Is=a(go),Uo=c(go,"P",{"data-svelte-h":!0}),y(Uo)!=="svelte-1f4f5kp"&&(Uo.innerHTML=yr),go.forEach(r),Gs=a(J),re=c(J,"DIV",{class:!0});var ue=x(re);u(zt.$$.fragment,ue),Us=a(ue),Zo=c(ue,"P",{"data-svelte-h":!0}),y(Zo)!=="svelte-zj1vf1"&&(Zo.innerHTML=kr),Zs=a(ue),No=c(ue,"P",{"data-svelte-h":!0}),y(No)!=="svelte-9vptpw"&&(No.textContent=wr),ue.forEach(r),Ns=a(J),Xo=c(J,"DIV",{class:!0});var $n=x(Xo);u(jt.$$.fragment,$n),$n.forEach(r),J.forEach(r),Rn=a(e),u(Ct.$$.fragment,e),Vn=a(e),I=c(e,"DIV",{class:!0});var E=x(I);u(Jt.$$.fragment,E),Xs=a(E),Ro=c(E,"P",{"data-svelte-h":!0}),y(Ro)!=="svelte-1xbhurt"&&(Ro.textContent=vr),Rs=a(E),Vo=c(E,"P",{"data-svelte-h":!0}),y(Vo)!=="svelte-19915nz"&&(Vo.innerHTML=Tr),Vs=a(E),Lo=c(E,"P",{"data-svelte-h":!0}),y(Lo)!=="svelte-1lv9ra7"&&(Lo.innerHTML=xr),Ls=a(E),xe=c(E,"DIV",{class:!0});var bo=x(xe);u(qt.$$.fragment,bo),Hs=a(bo),Ho=c(bo,"P",{"data-svelte-h":!0}),y(Ho)!=="svelte-1o1r06v"&&(Ho.textContent=$r),bo.forEach(r),E.forEach(r),Ln=a(e),u(It.$$.fragment,e),Hn=a(e),z=c(e,"DIV",{class:!0});var q=x(z);u(Gt.$$.fragment,q),Es=a(q),Eo=c(q,"P",{"data-svelte-h":!0}),y(Eo)!=="svelte-1g1myb6"&&(Eo.textContent=Wr),Bs=a(q),Bo=c(q,"P",{"data-svelte-h":!0}),y(Bo)!=="svelte-tjn15i"&&(Bo.innerHTML=Mr),Ss=a(q),$e=c(q,"DIV",{class:!0});var yo=x($e);u(Ut.$$.fragment,yo),Ps=a(yo),So=c(yo,"P",{"data-svelte-h":!0}),y(So)!=="svelte-7qeqk6"&&(So.innerHTML=Fr),yo.forEach(r),Ys=a(q),ie=c(q,"DIV",{class:!0});var fe=x(ie);u(Zt.$$.fragment,fe),As=a(fe),Po=c(fe,"P",{"data-svelte-h":!0}),y(Po)!=="svelte-1cj8dcb"&&(Po.textContent=zr),Ds=a(fe),u(We.$$.fragment,fe),fe.forEach(r),Qs=a(q),de=c(q,"DIV",{class:!0});var _e=x(de);u(Nt.$$.fragment,_e),Os=a(_e),Yo=c(_e,"P",{"data-svelte-h":!0}),y(Yo)!=="svelte-fb31l0"&&(Yo.innerHTML=jr),Ks=a(_e),u(Me.$$.fragment,_e),_e.forEach(r),ea=a(q),Fe=c(q,"DIV",{class:!0});var ko=x(Fe);u(Xt.$$.fragment,ko),ta=a(ko),Ao=c(ko,"P",{"data-svelte-h":!0}),y(Ao)!=="svelte-b7b5eq"&&(Ao.innerHTML=Cr),ko.forEach(r),oa=a(q),ze=c(q,"DIV",{class:!0});var wo=x(ze);u(Rt.$$.fragment,wo),na=a(wo),Do=c(wo,"P",{"data-svelte-h":!0}),y(Do)!=="svelte-u0jgov"&&(Do.innerHTML=Jr),wo.forEach(r),q.forEach(r),En=a(e),u(Vt.$$.fragment,e),Bn=a(e),G=c(e,"DIV",{class:!0});var le=x(G);u(Lt.$$.fragment,le),sa=a(le),Qo=c(le,"P",{"data-svelte-h":!0}),y(Qo)!=="svelte-1wogdtk"&&(Qo.innerHTML=qr),aa=a(le),Oo=c(le,"P",{"data-svelte-h":!0}),y(Oo)!=="svelte-hswkmf"&&(Oo.innerHTML=Ir),ra=a(le),B=c(le,"DIV",{class:!0});var Pe=x(B);u(Ht.$$.fragment,Pe),ia=a(Pe),Ko=c(Pe,"P",{"data-svelte-h":!0}),y(Ko)!=="svelte-1mwet9q"&&(Ko.innerHTML=Gr),da=a(Pe),u(je.$$.fragment,Pe),la=a(Pe),u(Ce.$$.fragment,Pe),Pe.forEach(r),ca=a(le),Je=c(le,"DIV",{class:!0});var ls=x(Je);u(Et.$$.fragment,ls),pa=a(ls),en=c(ls,"P",{"data-svelte-h":!0}),y(en)!=="svelte-1iyovru"&&(en.innerHTML=Ur),ls.forEach(r),le.forEach(r),Sn=a(e),u(Bt.$$.fragment,e),Pn=a(e),N=c(e,"DIV",{class:!0});var Ye=x(N);u(St.$$.fragment,Ye),ha=a(Ye),tn=c(Ye,"P",{"data-svelte-h":!0}),y(tn)!=="svelte-175y3np"&&(tn.innerHTML=Zr),ma=a(Ye),on=c(Ye,"P",{"data-svelte-h":!0}),y(on)!=="svelte-hswkmf"&&(on.innerHTML=Nr),ua=a(Ye),S=c(Ye,"DIV",{class:!0});var Ae=x(S);u(Pt.$$.fragment,Ae),fa=a(Ae),nn=c(Ae,"P",{"data-svelte-h":!0}),y(nn)!=="svelte-fjrdgw"&&(nn.innerHTML=Xr),_a=a(Ae),u(qe.$$.fragment,Ae),ga=a(Ae),u(Ie.$$.fragment,Ae),Ae.forEach(r),Ye.forEach(r),Yn=a(e),u(Yt.$$.fragment,e),An=a(e),ee=c(e,"DIV",{class:!0});var vn=x(ee);u(At.$$.fragment,vn),ba=a(vn),sn=c(vn,"P",{"data-svelte-h":!0}),y(sn)!=="svelte-1y2nev0"&&(sn.textContent=Rr),ya=a(vn),P=c(vn,"DIV",{class:!0});var De=x(P);u(Dt.$$.fragment,De),ka=a(De),an=c(De,"P",{"data-svelte-h":!0}),y(an)!=="svelte-l6rpvo"&&(an.innerHTML=Vr),wa=a(De),u(Ge.$$.fragment,De),va=a(De),u(Ue.$$.fragment,De),De.forEach(r),vn.forEach(r),Dn=a(e),u(Qt.$$.fragment,e),Qn=a(e),X=c(e,"DIV",{class:!0});var Qe=x(X);u(Ot.$$.fragment,Qe),Ta=a(Qe),rn=c(Qe,"P",{"data-svelte-h":!0}),y(rn)!=="svelte-vkbfyy"&&(rn.innerHTML=Lr),xa=a(Qe),dn=c(Qe,"P",{"data-svelte-h":!0}),y(dn)!=="svelte-1be7e3c"&&(dn.innerHTML=Hr),$a=a(Qe),Y=c(Qe,"DIV",{class:!0});var Oe=x(Y);u(Kt.$$.fragment,Oe),Wa=a(Oe),ln=c(Oe,"P",{"data-svelte-h":!0}),y(ln)!=="svelte-az9qey"&&(ln.innerHTML=Er),Ma=a(Oe),u(Ze.$$.fragment,Oe),Fa=a(Oe),u(Ne.$$.fragment,Oe),Oe.forEach(r),Qe.forEach(r),On=a(e),u(eo.$$.fragment,e),Kn=a(e),R=c(e,"DIV",{class:!0});var Ke=x(R);u(to.$$.fragment,Ke),za=a(Ke),cn=c(Ke,"P",{"data-svelte-h":!0}),y(cn)!=="svelte-160h77"&&(cn.innerHTML=Br),ja=a(Ke),pn=c(Ke,"P",{"data-svelte-h":!0}),y(pn)!=="svelte-1be7e3c"&&(pn.innerHTML=Sr),Ca=a(Ke),A=c(Ke,"DIV",{class:!0});var et=x(A);u(oo.$$.fragment,et),Ja=a(et),hn=c(et,"P",{"data-svelte-h":!0}),y(hn)!=="svelte-sgyxxc"&&(hn.innerHTML=Pr),qa=a(et),u(Xe.$$.fragment,et),Ia=a(et),u(Re.$$.fragment,et),et.forEach(r),Ke.forEach(r),es=a(e),u(no.$$.fragment,e),ts=a(e),V=c(e,"DIV",{class:!0});var tt=x(V);u(so.$$.fragment,tt),Ga=a(tt),mn=c(tt,"P",{"data-svelte-h":!0}),y(mn)!=="svelte-1fs2ctq"&&(mn.innerHTML=Yr),Ua=a(tt),un=c(tt,"UL",{"data-svelte-h":!0}),y(un)!=="svelte-1w7z84m"&&(un.innerHTML=Ar),Za=a(tt),D=c(tt,"DIV",{class:!0});var ot=x(D);u(ao.$$.fragment,ot),Na=a(ot),fn=c(ot,"P",{"data-svelte-h":!0}),y(fn)!=="svelte-13ych8p"&&(fn.innerHTML=Dr),Xa=a(ot),u(Ve.$$.fragment,ot),Ra=a(ot),u(Le.$$.fragment,ot),ot.forEach(r),tt.forEach(r),os=a(e),u(ro.$$.fragment,e),ns=a(e),L=c(e,"DIV",{class:!0});var nt=x(L);u(io.$$.fragment,nt),Va=a(nt),_n=c(nt,"P",{"data-svelte-h":!0}),y(_n)!=="svelte-1im0573"&&(_n.innerHTML=Qr),La=a(nt),gn=c(nt,"UL",{"data-svelte-h":!0}),y(gn)!=="svelte-1w7z84m"&&(gn.innerHTML=Or),Ha=a(nt),Q=c(nt,"DIV",{class:!0});var st=x(Q);u(lo.$$.fragment,st),Ea=a(st),bn=c(st,"P",{"data-svelte-h":!0}),y(bn)!=="svelte-13ych8p"&&(bn.innerHTML=Kr),Ba=a(st),u(He.$$.fragment,st),Sa=a(st),u(Ee.$$.fragment,st),st.forEach(r),nt.forEach(r),ss=a(e),u(co.$$.fragment,e),as=a(e),H=c(e,"DIV",{class:!0});var at=x(H);u(po.$$.fragment,at),Pa=a(at),yn=c(at,"P",{"data-svelte-h":!0}),y(yn)!=="svelte-jvvbzk"&&(yn.innerHTML=ei),Ya=a(at),kn=c(at,"UL",{"data-svelte-h":!0}),y(kn)!=="svelte-1w7z84m"&&(kn.innerHTML=ti),Aa=a(at),O=c(at,"DIV",{class:!0});var rt=x(O);u(ho.$$.fragment,rt),Da=a(rt),wn=c(rt,"P",{"data-svelte-h":!0}),y(wn)!=="svelte-1namxjo"&&(wn.innerHTML=oi),Qa=a(rt),u(Be.$$.fragment,rt),Oa=a(rt),u(Se.$$.fragment,rt),rt.forEach(r),at.forEach(r),rs=a(e),u(mo.$$.fragment,e),is=a(e),Tn=c(e,"P",{}),x(Tn).forEach(r),this.h()},h(){$(t,"name","hf:doc:metadata"),$(t,"content",Ci),$(Z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(ne,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(ye,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(ke,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(se,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(Co,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(M,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(ae,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(ve,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(Te,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(re,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(Xo,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(F,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(xe,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(I,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$($e,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(ie,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(de,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(Fe,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(ze,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(Je,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(ee,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(Y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(X,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(A,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(V,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(Q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(O,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(H,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,i){o(document.head,t),h(e,k,i),h(e,d,i),h(e,p,i),f(w,e,i),h(e,n,i),f(v,e,i),h(e,Wn,i),h(e,it,i),h(e,Mn,i),h(e,dt,i),h(e,Fn,i),h(e,lt,i),h(e,zn,i),h(e,ct,i),h(e,jn,i),h(e,pt,i),h(e,Cn,i),f(ht,e,i),h(e,Jn,i),h(e,mt,i),h(e,qn,i),h(e,ut,i),h(e,In,i),f(ft,e,i),h(e,Gn,i),h(e,Z,i),f(_t,Z,null),o(Z,cs),o(Z,vo),o(Z,ps),o(Z,To),o(Z,hs),f(ge,Z,null),h(e,Un,i),f(gt,e,i),h(e,Zn,i),h(e,M,i),f(bt,M,null),o(M,ms),o(M,xo),o(M,us),o(M,$o),o(M,fs),o(M,ne),f(yt,ne,null),o(ne,_s),o(ne,Wo),o(ne,gs),f(be,ne,null),o(M,bs),o(M,ye),f(kt,ye,null),o(ye,ys),o(ye,Mo),o(M,ks),o(M,ke),f(wt,ke,null),o(ke,ws),o(ke,Fo),o(M,vs),o(M,se),f(vt,se,null),o(se,Ts),o(se,zo),o(se,xs),o(se,jo),o(M,$s),o(M,Co),f(Tt,Co,null),h(e,Nn,i),f(xt,e,i),h(e,Xn,i),h(e,F,i),f($t,F,null),o(F,Ws),o(F,Jo),o(F,Ms),o(F,qo),o(F,Fs),o(F,ae),f(Wt,ae,null),o(ae,zs),o(ae,Io),o(ae,js),f(we,ae,null),o(F,Cs),o(F,ve),f(Mt,ve,null),o(ve,Js),o(ve,Go),o(F,qs),o(F,Te),f(Ft,Te,null),o(Te,Is),o(Te,Uo),o(F,Gs),o(F,re),f(zt,re,null),o(re,Us),o(re,Zo),o(re,Zs),o(re,No),o(F,Ns),o(F,Xo),f(jt,Xo,null),h(e,Rn,i),f(Ct,e,i),h(e,Vn,i),h(e,I,i),f(Jt,I,null),o(I,Xs),o(I,Ro),o(I,Rs),o(I,Vo),o(I,Vs),o(I,Lo),o(I,Ls),o(I,xe),f(qt,xe,null),o(xe,Hs),o(xe,Ho),h(e,Ln,i),f(It,e,i),h(e,Hn,i),h(e,z,i),f(Gt,z,null),o(z,Es),o(z,Eo),o(z,Bs),o(z,Bo),o(z,Ss),o(z,$e),f(Ut,$e,null),o($e,Ps),o($e,So),o(z,Ys),o(z,ie),f(Zt,ie,null),o(ie,As),o(ie,Po),o(ie,Ds),f(We,ie,null),o(z,Qs),o(z,de),f(Nt,de,null),o(de,Os),o(de,Yo),o(de,Ks),f(Me,de,null),o(z,ea),o(z,Fe),f(Xt,Fe,null),o(Fe,ta),o(Fe,Ao),o(z,oa),o(z,ze),f(Rt,ze,null),o(ze,na),o(ze,Do),h(e,En,i),f(Vt,e,i),h(e,Bn,i),h(e,G,i),f(Lt,G,null),o(G,sa),o(G,Qo),o(G,aa),o(G,Oo),o(G,ra),o(G,B),f(Ht,B,null),o(B,ia),o(B,Ko),o(B,da),f(je,B,null),o(B,la),f(Ce,B,null),o(G,ca),o(G,Je),f(Et,Je,null),o(Je,pa),o(Je,en),h(e,Sn,i),f(Bt,e,i),h(e,Pn,i),h(e,N,i),f(St,N,null),o(N,ha),o(N,tn),o(N,ma),o(N,on),o(N,ua),o(N,S),f(Pt,S,null),o(S,fa),o(S,nn),o(S,_a),f(qe,S,null),o(S,ga),f(Ie,S,null),h(e,Yn,i),f(Yt,e,i),h(e,An,i),h(e,ee,i),f(At,ee,null),o(ee,ba),o(ee,sn),o(ee,ya),o(ee,P),f(Dt,P,null),o(P,ka),o(P,an),o(P,wa),f(Ge,P,null),o(P,va),f(Ue,P,null),h(e,Dn,i),f(Qt,e,i),h(e,Qn,i),h(e,X,i),f(Ot,X,null),o(X,Ta),o(X,rn),o(X,xa),o(X,dn),o(X,$a),o(X,Y),f(Kt,Y,null),o(Y,Wa),o(Y,ln),o(Y,Ma),f(Ze,Y,null),o(Y,Fa),f(Ne,Y,null),h(e,On,i),f(eo,e,i),h(e,Kn,i),h(e,R,i),f(to,R,null),o(R,za),o(R,cn),o(R,ja),o(R,pn),o(R,Ca),o(R,A),f(oo,A,null),o(A,Ja),o(A,hn),o(A,qa),f(Xe,A,null),o(A,Ia),f(Re,A,null),h(e,es,i),f(no,e,i),h(e,ts,i),h(e,V,i),f(so,V,null),o(V,Ga),o(V,mn),o(V,Ua),o(V,un),o(V,Za),o(V,D),f(ao,D,null),o(D,Na),o(D,fn),o(D,Xa),f(Ve,D,null),o(D,Ra),f(Le,D,null),h(e,os,i),f(ro,e,i),h(e,ns,i),h(e,L,i),f(io,L,null),o(L,Va),o(L,_n),o(L,La),o(L,gn),o(L,Ha),o(L,Q),f(lo,Q,null),o(Q,Ea),o(Q,bn),o(Q,Ba),f(He,Q,null),o(Q,Sa),f(Ee,Q,null),h(e,ss,i),f(co,e,i),h(e,as,i),h(e,H,i),f(po,H,null),o(H,Pa),o(H,yn),o(H,Ya),o(H,kn),o(H,Aa),o(H,O),f(ho,O,null),o(O,Da),o(O,wn),o(O,Qa),f(Be,O,null),o(O,Oa),f(Se,O,null),h(e,rs,i),f(mo,e,i),h(e,is,i),h(e,Tn,i),ds=!0},p(e,[i]){const te={};i&2&&(te.$$scope={dirty:i,ctx:e}),ge.$set(te);const C={};i&2&&(C.$$scope={dirty:i,ctx:e}),be.$set(C);const pe={};i&2&&(pe.$$scope={dirty:i,ctx:e}),we.$set(pe);const uo={};i&2&&(uo.$$scope={dirty:i,ctx:e}),We.$set(uo);const fo={};i&2&&(fo.$$scope={dirty:i,ctx:e}),Me.$set(fo);const he={};i&2&&(he.$$scope={dirty:i,ctx:e}),je.$set(he);const xn={};i&2&&(xn.$$scope={dirty:i,ctx:e}),Ce.$set(xn);const J={};i&2&&(J.$$scope={dirty:i,ctx:e}),qe.$set(J);const me={};i&2&&(me.$$scope={dirty:i,ctx:e}),Ie.$set(me);const _o={};i&2&&(_o.$$scope={dirty:i,ctx:e}),Ge.$set(_o);const go={};i&2&&(go.$$scope={dirty:i,ctx:e}),Ue.$set(go);const ue={};i&2&&(ue.$$scope={dirty:i,ctx:e}),Ze.$set(ue);const $n={};i&2&&($n.$$scope={dirty:i,ctx:e}),Ne.$set($n);const E={};i&2&&(E.$$scope={dirty:i,ctx:e}),Xe.$set(E);const bo={};i&2&&(bo.$$scope={dirty:i,ctx:e}),Re.$set(bo);const q={};i&2&&(q.$$scope={dirty:i,ctx:e}),Ve.$set(q);const yo={};i&2&&(yo.$$scope={dirty:i,ctx:e}),Le.$set(yo);const fe={};i&2&&(fe.$$scope={dirty:i,ctx:e}),He.$set(fe);const _e={};i&2&&(_e.$$scope={dirty:i,ctx:e}),Ee.$set(_e);const ko={};i&2&&(ko.$$scope={dirty:i,ctx:e}),Be.$set(ko);const wo={};i&2&&(wo.$$scope={dirty:i,ctx:e}),Se.$set(wo)},i(e){ds||(_(w.$$.fragment,e),_(v.$$.fragment,e),_(ht.$$.fragment,e),_(ft.$$.fragment,e),_(_t.$$.fragment,e),_(ge.$$.fragment,e),_(gt.$$.fragment,e),_(bt.$$.fragment,e),_(yt.$$.fragment,e),_(be.$$.fragment,e),_(kt.$$.fragment,e),_(wt.$$.fragment,e),_(vt.$$.fragment,e),_(Tt.$$.fragment,e),_(xt.$$.fragment,e),_($t.$$.fragment,e),_(Wt.$$.fragment,e),_(we.$$.fragment,e),_(Mt.$$.fragment,e),_(Ft.$$.fragment,e),_(zt.$$.fragment,e),_(jt.$$.fragment,e),_(Ct.$$.fragment,e),_(Jt.$$.fragment,e),_(qt.$$.fragment,e),_(It.$$.fragment,e),_(Gt.$$.fragment,e),_(Ut.$$.fragment,e),_(Zt.$$.fragment,e),_(We.$$.fragment,e),_(Nt.$$.fragment,e),_(Me.$$.fragment,e),_(Xt.$$.fragment,e),_(Rt.$$.fragment,e),_(Vt.$$.fragment,e),_(Lt.$$.fragment,e),_(Ht.$$.fragment,e),_(je.$$.fragment,e),_(Ce.$$.fragment,e),_(Et.$$.fragment,e),_(Bt.$$.fragment,e),_(St.$$.fragment,e),_(Pt.$$.fragment,e),_(qe.$$.fragment,e),_(Ie.$$.fragment,e),_(Yt.$$.fragment,e),_(At.$$.fragment,e),_(Dt.$$.fragment,e),_(Ge.$$.fragment,e),_(Ue.$$.fragment,e),_(Qt.$$.fragment,e),_(Ot.$$.fragment,e),_(Kt.$$.fragment,e),_(Ze.$$.fragment,e),_(Ne.$$.fragment,e),_(eo.$$.fragment,e),_(to.$$.fragment,e),_(oo.$$.fragment,e),_(Xe.$$.fragment,e),_(Re.$$.fragment,e),_(no.$$.fragment,e),_(so.$$.fragment,e),_(ao.$$.fragment,e),_(Ve.$$.fragment,e),_(Le.$$.fragment,e),_(ro.$$.fragment,e),_(io.$$.fragment,e),_(lo.$$.fragment,e),_(He.$$.fragment,e),_(Ee.$$.fragment,e),_(co.$$.fragment,e),_(po.$$.fragment,e),_(ho.$$.fragment,e),_(Be.$$.fragment,e),_(Se.$$.fragment,e),_(mo.$$.fragment,e),ds=!0)},o(e){g(w.$$.fragment,e),g(v.$$.fragment,e),g(ht.$$.fragment,e),g(ft.$$.fragment,e),g(_t.$$.fragment,e),g(ge.$$.fragment,e),g(gt.$$.fragment,e),g(bt.$$.fragment,e),g(yt.$$.fragment,e),g(be.$$.fragment,e),g(kt.$$.fragment,e),g(wt.$$.fragment,e),g(vt.$$.fragment,e),g(Tt.$$.fragment,e),g(xt.$$.fragment,e),g($t.$$.fragment,e),g(Wt.$$.fragment,e),g(we.$$.fragment,e),g(Mt.$$.fragment,e),g(Ft.$$.fragment,e),g(zt.$$.fragment,e),g(jt.$$.fragment,e),g(Ct.$$.fragment,e),g(Jt.$$.fragment,e),g(qt.$$.fragment,e),g(It.$$.fragment,e),g(Gt.$$.fragment,e),g(Ut.$$.fragment,e),g(Zt.$$.fragment,e),g(We.$$.fragment,e),g(Nt.$$.fragment,e),g(Me.$$.fragment,e),g(Xt.$$.fragment,e),g(Rt.$$.fragment,e),g(Vt.$$.fragment,e),g(Lt.$$.fragment,e),g(Ht.$$.fragment,e),g(je.$$.fragment,e),g(Ce.$$.fragment,e),g(Et.$$.fragment,e),g(Bt.$$.fragment,e),g(St.$$.fragment,e),g(Pt.$$.fragment,e),g(qe.$$.fragment,e),g(Ie.$$.fragment,e),g(Yt.$$.fragment,e),g(At.$$.fragment,e),g(Dt.$$.fragment,e),g(Ge.$$.fragment,e),g(Ue.$$.fragment,e),g(Qt.$$.fragment,e),g(Ot.$$.fragment,e),g(Kt.$$.fragment,e),g(Ze.$$.fragment,e),g(Ne.$$.fragment,e),g(eo.$$.fragment,e),g(to.$$.fragment,e),g(oo.$$.fragment,e),g(Xe.$$.fragment,e),g(Re.$$.fragment,e),g(no.$$.fragment,e),g(so.$$.fragment,e),g(ao.$$.fragment,e),g(Ve.$$.fragment,e),g(Le.$$.fragment,e),g(ro.$$.fragment,e),g(io.$$.fragment,e),g(lo.$$.fragment,e),g(He.$$.fragment,e),g(Ee.$$.fragment,e),g(co.$$.fragment,e),g(po.$$.fragment,e),g(ho.$$.fragment,e),g(Be.$$.fragment,e),g(Se.$$.fragment,e),g(mo.$$.fragment,e),ds=!1},d(e){e&&(r(k),r(d),r(p),r(n),r(Wn),r(it),r(Mn),r(dt),r(Fn),r(lt),r(zn),r(ct),r(jn),r(pt),r(Cn),r(Jn),r(mt),r(qn),r(ut),r(In),r(Gn),r(Z),r(Un),r(Zn),r(M),r(Nn),r(Xn),r(F),r(Rn),r(Vn),r(I),r(Ln),r(Hn),r(z),r(En),r(Bn),r(G),r(Sn),r(Pn),r(N),r(Yn),r(An),r(ee),r(Dn),r(Qn),r(X),r(On),r(Kn),r(R),r(es),r(ts),r(V),r(os),r(ns),r(L),r(ss),r(as),r(H),r(rs),r(is),r(Tn)),r(t),b(w,e),b(v,e),b(ht,e),b(ft,e),b(_t),b(ge),b(gt,e),b(bt),b(yt),b(be),b(kt),b(wt),b(vt),b(Tt),b(xt,e),b($t),b(Wt),b(we),b(Mt),b(Ft),b(zt),b(jt),b(Ct,e),b(Jt),b(qt),b(It,e),b(Gt),b(Ut),b(Zt),b(We),b(Nt),b(Me),b(Xt),b(Rt),b(Vt,e),b(Lt),b(Ht),b(je),b(Ce),b(Et),b(Bt,e),b(St),b(Pt),b(qe),b(Ie),b(Yt,e),b(At),b(Dt),b(Ge),b(Ue),b(Qt,e),b(Ot),b(Kt),b(Ze),b(Ne),b(eo,e),b(to),b(oo),b(Xe),b(Re),b(no,e),b(so),b(ao),b(Ve),b(Le),b(ro,e),b(io),b(lo),b(He),b(Ee),b(co,e),b(po),b(ho),b(Be),b(Se),b(mo,e)}}}const Ci='{"title":"Whisper","local":"whisper","sections":[{"title":"개요","local":"overview","sections":[],"depth":2},{"title":"WhisperConfig","local":"whisperconfig ][ transformers.WhisperConfig","sections":[],"depth":2},{"title":"WhisperTokenizer","local":"whispertokenizer ][ transformers.WhisperTokenizer","sections":[],"depth":2},{"title":"WhisperTokenizerFast","local":"whispertokenizerfast ][ transformers.WhisperTokenizerFast","sections":[],"depth":2},{"title":"WhisperFeatureExtractor","local":"whisperfeatureextractor ][ transformers.WhisperFeatureExtractor","sections":[],"depth":2},{"title":"WhisperProcessor","local":"whisperprocessor ][ transformers.WhisperProcessor","sections":[],"depth":2},{"title":"WhisperModel","local":"whispermodel ][ transformers.WhisperModel","sections":[],"depth":2},{"title":"WhisperForConditionalGeneration","local":"whisperforconditionalgeneration ][ transformers.WhisperForConditionalGeneration","sections":[],"depth":2},{"title":"WhisperForAudioClassification","local":"whisperforaudioclassification ][ transformers.WhisperForAudioClassification","sections":[],"depth":2},{"title":"TFWhisperModel","local":"tfwhispermodel ][ transformers.TFWhisperModel","sections":[],"depth":2},{"title":"TFWhisperForConditionalGeneration","local":"tfwhisperforconditionalgeneration ][ transformers.TFWhisperForConditionalGeneration","sections":[],"depth":2},{"title":"FlaxWhisperModel","local":"flaxwhispermodel ][ transformers.FlaxWhisperModel","sections":[],"depth":2},{"title":"FlaxWhisperForConditionalGeneration","local":"flaxwhisperforconditionalgeneration ][ transformers.FlaxWhisperForConditionalGeneration","sections":[],"depth":2},{"title":"FlaxWhisperForAudioClassification","local":"flaxwhisperforaudioclassification ][ transformers.FlaxWhisperForAudioClassification","sections":[],"depth":2}],"depth":1}';function Ji(T){return si(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Xi extends ai{constructor(t){super(),ri(this,t,Ji,ji,ni,{})}}export{Xi as component}; | |
Xet Storage Details
- Size:
- 233 kB
- Xet hash:
- 5d205dab7b7ff4638a4d41454b80ab0d282d8bc066fc48eae6174b5ca6736792
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.