Buckets:

hf-doc-build/doc-dev / inference-providers /pr_1821 /en /tasks /audio-classification.html
rtrm's picture
download
raw
14.1 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Audio Classification&quot;,&quot;local&quot;:&quot;audio-classification&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Recommended models&quot;,&quot;local&quot;:&quot;recommended-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Using the API&quot;,&quot;local&quot;:&quot;using-the-api&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;API specification&quot;,&quot;local&quot;:&quot;api-specification&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Request&quot;,&quot;local&quot;:&quot;request&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Response&quot;,&quot;local&quot;:&quot;response&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3}],&quot;depth&quot;:2}">
<link href="/docs/inference-providers/pr_1821/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/inference-providers/pr_1821/en/_app/immutable/entry/start.652aa0ea.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1821/en/_app/immutable/chunks/scheduler.2427eaa0.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1821/en/_app/immutable/chunks/singletons.342bf38a.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1821/en/_app/immutable/chunks/index.2f106b1a.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1821/en/_app/immutable/chunks/paths.fbcfdda0.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1821/en/_app/immutable/entry/app.8c7cd661.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1821/en/_app/immutable/chunks/index.cf54e274.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1821/en/_app/immutable/nodes/0.f29beee7.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1821/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1821/en/_app/immutable/nodes/24.cfccb30d.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1821/en/_app/immutable/chunks/Tip.ba8beabf.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1821/en/_app/immutable/chunks/getInferenceSnippets.f04a0048.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Audio Classification&quot;,&quot;local&quot;:&quot;audio-classification&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Recommended models&quot;,&quot;local&quot;:&quot;recommended-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Using the API&quot;,&quot;local&quot;:&quot;using-the-api&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;API specification&quot;,&quot;local&quot;:&quot;api-specification&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Request&quot;,&quot;local&quot;:&quot;request&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Response&quot;,&quot;local&quot;:&quot;response&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3}],&quot;depth&quot;:2}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h2 class="relative group"><a id="audio-classification" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#audio-classification"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Audio Classification</span></h2> <p data-svelte-h="svelte-1ny8y9l">Audio classification is the task of assigning a label or class to a given audio.</p> <p data-svelte-h="svelte-1iml56d">Example applications:</p> <ul data-svelte-h="svelte-zjmuse"><li>Recognizing which command a user is giving</li> <li>Identifying a speaker</li> <li>Detecting the genre of a song</li></ul> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1upmpac">For more details about the <code>audio-classification</code> task, check out its <a href="https://huggingface.co/tasks/audio-classification" rel="nofollow">dedicated page</a>! You will find examples and related materials.</p></div> <h3 class="relative group"><a id="recommended-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#recommended-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Recommended models</span></h3> <p data-svelte-h="svelte-1n75j2e">Explore all available models and find the one that suits you best <a href="https://huggingface.co/models?inference=warm&pipeline_tag=audio-classification&sort=trending" rel="nofollow">here</a>.</p> <h3 class="relative group"><a id="using-the-api" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-the-api"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using the API</span></h3> <p data-svelte-h="svelte-1lxh3d7">There are currently no snippet examples for the <strong>audio-classification</strong> task, as no providers support it yet.</p> <h3 class="relative group"><a id="api-specification" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#api-specification"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>API specification</span></h3> <h4 class="relative group"><a id="request" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#request"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Request</span></h4> <table data-svelte-h="svelte-1rgocha"><thead><tr><th align="left">Headers</th> <th align="left"></th> <th align="left"></th></tr></thead> <tbody><tr><td align="left"><strong>authorization</strong></td> <td align="left"><em>string</em></td> <td align="left">Authentication header in the form <code>&#39;Bearer: hf_****&#39;</code> when <code>hf_****</code> is a personal user access token with “Inference Providers” permission. You can generate one from <a href="https://huggingface.co/settings/tokens/new?ownUserPermissions=inference.serverless.write&tokenType=fineGrained" rel="nofollow">your settings page</a>.</td></tr></tbody></table> <table data-svelte-h="svelte-2n8vhn"><thead><tr><th align="left">Payload</th> <th align="left"></th> <th align="left"></th></tr></thead> <tbody><tr><td align="left"><strong>inputs*</strong></td> <td align="left"><em>string</em></td> <td align="left">The input audio data as a base64-encoded string. If no <code>parameters</code> are provided, you can also provide the audio data as a raw bytes payload.</td></tr> <tr><td align="left"><strong>parameters</strong></td> <td align="left"><em>object</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        function_to_apply</strong></td> <td align="left"><em>enum</em></td> <td align="left">Possible values: sigmoid, softmax, none.</td></tr> <tr><td align="left"><strong>        top_k</strong></td> <td align="left"><em>integer</em></td> <td align="left">When specified, limits the output to the top K most probable classes.</td></tr></tbody></table> <h4 class="relative group"><a id="response" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#response"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Response</span></h4> <table data-svelte-h="svelte-1mk6wu5"><thead><tr><th align="left">Body</th> <th align="left"></th> <th align="left"></th></tr></thead> <tbody><tr><td align="left"><strong>(array)</strong></td> <td align="left"><em>object[]</em></td> <td align="left">Output is an array of objects.</td></tr> <tr><td align="left"><strong>        label</strong></td> <td align="left"><em>string</em></td> <td align="left">The predicted class label.</td></tr> <tr><td align="left"><strong>        score</strong></td> <td align="left"><em>number</em></td> <td align="left">The corresponding probability.</td></tr></tbody></table> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hub-docs/blob/main/docs/inference-providers/tasks/audio-classification.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_tpxpzz = {
assets: "/docs/inference-providers/pr_1821/en",
base: "/docs/inference-providers/pr_1821/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/inference-providers/pr_1821/en/_app/immutable/entry/start.652aa0ea.js"),
import("/docs/inference-providers/pr_1821/en/_app/immutable/entry/app.8c7cd661.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 24],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
14.1 kB
·
Xet hash:
82d79198e33a279f21ffa67498f946a55a6a40522121ed5c984d3580359f2660

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.