Buckets:

rtrm's picture
download
raw
34.6 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Inference Providers&quot;,&quot;local&quot;:&quot;inference-providers&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Why use Inference Providers?&quot;,&quot;local&quot;:&quot;why-use-inference-providers&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Key Features&quot;,&quot;local&quot;:&quot;key-features&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inference Playground&quot;,&quot;local&quot;:&quot;inference-playground&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Get Started&quot;,&quot;local&quot;:&quot;get-started&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Authentication&quot;,&quot;local&quot;:&quot;authentication&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;cURL&quot;,&quot;local&quot;:&quot;curl&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Python&quot;,&quot;local&quot;:&quot;python&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;JavaScript&quot;,&quot;local&quot;:&quot;javascript&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Next Steps&quot;,&quot;local&quot;:&quot;next-steps&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/inference-providers/pr_1663/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/entry/start.d5f15666.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/scheduler.ddb4e551.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/singletons.0f5b782d.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.ce98237b.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/paths.b324c1e2.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/entry/app.68b4644d.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.e16e4efa.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/nodes/0.80863911.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/nodes/4.41c5181c.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/CodeBlock.754e6cfc.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.e108c5ed.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Inference Providers&quot;,&quot;local&quot;:&quot;inference-providers&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Why use Inference Providers?&quot;,&quot;local&quot;:&quot;why-use-inference-providers&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Key Features&quot;,&quot;local&quot;:&quot;key-features&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inference Playground&quot;,&quot;local&quot;:&quot;inference-playground&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Get Started&quot;,&quot;local&quot;:&quot;get-started&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Authentication&quot;,&quot;local&quot;:&quot;authentication&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;cURL&quot;,&quot;local&quot;:&quot;curl&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Python&quot;,&quot;local&quot;:&quot;python&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;JavaScript&quot;,&quot;local&quot;:&quot;javascript&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Next Steps&quot;,&quot;local&quot;:&quot;next-steps&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="inference-providers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference-providers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference Providers</span></h1> <div class="flex justify-center" data-svelte-h="svelte-11agwm4"><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-providers/Inference-providers-banner-light.png"> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-providers/Inference-providers-banner-dark.png"></div> <p data-svelte-h="svelte-1a4io10">Hugging Face Inference Providers simplify and unify how developers access and run machine learning models by offering a unified, flexible interface to multiple serverless inference providers. This new approach extends our previous Serverless Inference API, providing more models, increased performances and better reliability thanks to our inference partners.</p> <p data-svelte-h="svelte-my20pe">To learn more about the launch of Inference Providers, check out our <a href="https://huggingface.co/blog/inference-providers" rel="nofollow">announcement blog post</a>.</p> <h2 class="relative group"><a id="why-use-inference-providers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#why-use-inference-providers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Why use Inference Providers?</span></h2> <p data-svelte-h="svelte-3l2dil">Inference Providers offers a fast and simple way to explore thousands of models for a variety of tasks. Whether you’re experimenting with ML capabilities or building a new application, this API gives you instant access to high-performing models across multiple domains:</p> <ul data-svelte-h="svelte-r6lxgx"><li><strong>Text Generation:</strong> Including large language models and tool-calling prompts, generate and experiment with high-quality responses.</li> <li><strong>Image and Video Generation:</strong> Easily create customized images, including LoRAs for your own styles.</li> <li><strong>Document Embeddings:</strong> Build search and retrieval systems with SOTA embeddings.</li> <li><strong>Classical AI Tasks:</strong> Ready-to-use models for text classification, image classification, speech recognition, and more.</li></ul> <p data-svelte-h="svelte-owiwcw"><strong>Fast and Free to Get Started</strong>: Inference Providers comes with a free-tier and additional included credits for <a href="https://hf.co/subscribe/pro" rel="nofollow">PRO users</a>, as well as <a href="https://huggingface.co/enterprise" rel="nofollow">Enterprise Hub organizations</a>.</p> <h2 class="relative group"><a id="key-features" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#key-features"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Key Features</span></h2> <ul data-svelte-h="svelte-1hpjin0"><li><strong>🎯 All-in-One API</strong>: A single API for text generation, image generation, document embeddings, NER, summarization, image classification, and more.</li> <li><strong>🔀 Multi-Provider Support</strong>: Easily run models from top-tier providers like fal, Replicate, Sambanova, Together AI, and others.</li> <li><strong>🚀 Scalable &amp; Reliable</strong>: Built for high availability and low-latency performance in production environments.</li> <li><strong>🔧 Developer-Friendly</strong>: Simple requests, fast responses, and a consistent developer experience across Python and JavaScript clients.</li> <li><strong>💰 Cost-Effective</strong>: No extra markup on provider rates.</li></ul> <h2 class="relative group"><a id="inference-playground" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference-playground"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference Playground</span></h2> <p data-svelte-h="svelte-u2apxl">To get started quickly with <a href="http://huggingface.co/models?inference_provider=all&sort=trending&other=conversational" rel="nofollow">Chat Completion models</a>, use the <a href="https://huggingface.co/playground" rel="nofollow">Inference Playground</a> to easily test and compare models with your prompts.</p> <a href="https://huggingface.co/playground" target="blank" data-svelte-h="svelte-1yefxq"><img src="https://cdn-uploads.huggingface.co/production/uploads/5f17f0a0925b9863e28ad517/9_Tgf0Tv65srhBirZQMTp.png" style="max-width: 550px; width: 100%;"></a> <h2 class="relative group"><a id="get-started" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#get-started"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Get Started</span></h2> <p data-svelte-h="svelte-hl02sf">You can use Inference Providers with your preferred tools, such as Python, JavaScript, or cURL. To simplify integration, we offer both a Python SDK (<code>huggingface_hub</code>) and a JavaScript SDK (<code>huggingface.js</code>).</p> <p data-svelte-h="svelte-7vavmh">In this section, we will demonstrate a simple example using <a href="https://huggingface.co/deepseek-ai/DeepSeek-V3-0324" rel="nofollow">deepseek-ai/DeepSeek-V3-0324</a>, a conversational Large Language Model. For the example, we will use <a href="https://novita.ai/" rel="nofollow">Novita AI</a> as Inference Provider.</p> <h3 class="relative group"><a id="authentication" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#authentication"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Authentication</span></h3> <p data-svelte-h="svelte-czvqq9">Inference Providers requires passing a user token in the request headers. You can generate a token by signing up on the Hugging Face website and going to the <a href="https://huggingface.co/settings/tokens/new?ownUserPermissions=inference.serverless.write&tokenType=fineGrained" rel="nofollow">settings page</a>. We recommend creating a <code>fine-grained</code> token with the scope to <code>Make calls to Inference Providers</code>.</p> <p data-svelte-h="svelte-14h1l7k">For more details about user tokens, check out <a href="https://huggingface.co/docs/hub/en/security-tokens" rel="nofollow">this guide</a>.</p> <h3 class="relative group"><a id="curl" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#curl"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>cURL</span></h3> <p data-svelte-h="svelte-rx3iua">Let’s start with a cURL command highlighting the raw HTTP request. You can adapt this request to be run with the tool of your choice.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->curl https://router.huggingface.co/novita/v3/openai/chat/completions \
-H <span class="hljs-string">&quot;Authorization: Bearer <span class="hljs-variable">$HF_TOKEN</span>&quot;</span> \
-H <span class="hljs-string">&#x27;Content-Type: application/json&#x27;</span> \
-d <span class="hljs-string">&#x27;{
&quot;messages&quot;: [
{
&quot;role&quot;: &quot;user&quot;,
&quot;content&quot;: &quot;How many G in huggingface?&quot;
}
],
&quot;model&quot;: &quot;deepseek/deepseek-v3-0324&quot;,
&quot;stream&quot;: false
}&#x27;</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="python" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#python"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Python</span></h3> <p data-svelte-h="svelte-1e368qz">In Python, you can use the <code>requests</code> library to make raw requests to the API:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> requests
API_URL = <span class="hljs-string">&quot;https://router.huggingface.co/novita/v3/openai/chat/completions&quot;</span>
headers = {<span class="hljs-string">&quot;Authorization&quot;</span>: <span class="hljs-string">&quot;Bearer hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx&quot;</span>}
payload = {
<span class="hljs-string">&quot;messages&quot;</span>: [
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;How many &#x27;G&#x27;s in &#x27;huggingface&#x27;?&quot;</span>
}
],
<span class="hljs-string">&quot;model&quot;</span>: <span class="hljs-string">&quot;deepseek/deepseek-v3-0324&quot;</span>,
}
response = requests.post(API_URL, headers=headers, json=payload)
<span class="hljs-built_in">print</span>(response.json()[<span class="hljs-string">&quot;choices&quot;</span>][<span class="hljs-number">0</span>][<span class="hljs-string">&quot;message&quot;</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-16da4wg">For convenience, the Python library <code>huggingface_hub</code> provides an <a href="https://huggingface.co/docs/huggingface_hub/guides/inference" rel="nofollow"><code>InferenceClient</code></a> that handles inference for you. Make sure to install it with <code>pip install huggingface_hub</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient
client = InferenceClient(
provider=<span class="hljs-string">&quot;novita&quot;</span>,
api_key=<span class="hljs-string">&quot;hf_xxxxxxxxxxxxxxxxxxxxxxxx&quot;</span>,
)
completion = client.chat.completions.create(
model=<span class="hljs-string">&quot;deepseek-ai/DeepSeek-V3-0324&quot;</span>,
messages=[
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;How many &#x27;G&#x27;s in &#x27;huggingface&#x27;?&quot;</span>
}
],
)
<span class="hljs-built_in">print</span>(completion.choices[<span class="hljs-number">0</span>].message)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="javascript" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#javascript"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>JavaScript</span></h3> <p data-svelte-h="svelte-y5hdls">In JS, you can use the <code>fetch</code> library to make raw requests to the API:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> fetch <span class="hljs-keyword">from</span> <span class="hljs-string">&quot;node-fetch&quot;</span>;
<span class="hljs-keyword">const</span> response = <span class="hljs-keyword">await</span> <span class="hljs-title function_">fetch</span>(
<span class="hljs-string">&quot;https://router.huggingface.co/novita/v3/openai/chat/completions&quot;</span>,
{
<span class="hljs-attr">method</span>: <span class="hljs-string">&quot;POST&quot;</span>,
<span class="hljs-attr">headers</span>: {
<span class="hljs-title class_">Authorization</span>: <span class="hljs-string">`Bearer hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`</span>,
<span class="hljs-string">&quot;Content-Type&quot;</span>: <span class="hljs-string">&quot;application/json&quot;</span>,
},
<span class="hljs-attr">body</span>: <span class="hljs-title class_">JSON</span>.<span class="hljs-title function_">stringify</span>({
<span class="hljs-attr">provider</span>: <span class="hljs-string">&quot;novita&quot;</span>,
<span class="hljs-attr">model</span>: <span class="hljs-string">&quot;deepseek-ai/DeepSeek-V3-0324&quot;</span>,
<span class="hljs-attr">messages</span>: [
{
<span class="hljs-attr">role</span>: <span class="hljs-string">&quot;user&quot;</span>,
<span class="hljs-attr">content</span>: <span class="hljs-string">&quot;How many &#x27;G&#x27;s in &#x27;huggingface&#x27;?&quot;</span>,
},
],
}),
}
);
<span class="hljs-variable language_">console</span>.<span class="hljs-title function_">log</span>(<span class="hljs-keyword">await</span> response.<span class="hljs-title function_">json</span>());<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1qnfqp9">For convenience, the JS library <code>@huggingface/inference</code> provides an <a href="https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient" rel="nofollow"><code>InferenceClient</code></a> that handles inference for you. You can install it with <code>npm install @huggingface/inference</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">InferenceClient</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">&quot;@huggingface/inference&quot;</span>;
<span class="hljs-keyword">const</span> client = <span class="hljs-keyword">new</span> <span class="hljs-title class_">InferenceClient</span>(<span class="hljs-string">&quot;hf_xxxxxxxxxxxxxxxxxxxxxxxx&quot;</span>);
<span class="hljs-keyword">const</span> chatCompletion = <span class="hljs-keyword">await</span> client.<span class="hljs-title function_">chatCompletion</span>({
<span class="hljs-attr">provider</span>: <span class="hljs-string">&quot;novita&quot;</span>,
<span class="hljs-attr">model</span>: <span class="hljs-string">&quot;deepseek-ai/DeepSeek-V3-0324&quot;</span>,
<span class="hljs-attr">messages</span>: [
{
<span class="hljs-attr">role</span>: <span class="hljs-string">&quot;user&quot;</span>,
<span class="hljs-attr">content</span>: <span class="hljs-string">&quot;How many &#x27;G&#x27;s in &#x27;huggingface&#x27;?&quot;</span>,
},
],
});
<span class="hljs-variable language_">console</span>.<span class="hljs-title function_">log</span>(chatCompletion.<span class="hljs-property">choices</span>[<span class="hljs-number">0</span>].<span class="hljs-property">message</span>);<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="next-steps" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#next-steps"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Next Steps</span></h2> <p data-svelte-h="svelte-9ernno">In this introduction, we’ve covered the basics of Inference Providers. To learn more about this service, check out our guides and API Reference:</p> <ul data-svelte-h="svelte-16ni88p"><li><a href="./pricing">Pricing and Billing</a>: everything you need to know about billing.</li> <li><a href="./hub-integration">Hub integration</a>: how is Inference Providers integrated with the Hub?</li> <li><a href="./register-as-a-provider.md">Register as an Inference Provider</a>: everything about how to become an official partner.</li> <li><a href="./hub-api">Hub API</a>: high-level API for Inference Providers.</li> <li><a href="./tasks/index">API Reference</a>: learn more about the parameters and task-specific settings.</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hub-docs/blob/main/docs/inference-providers/index.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1o5mypj = {
assets: "/docs/inference-providers/pr_1663/en",
base: "/docs/inference-providers/pr_1663/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/inference-providers/pr_1663/en/_app/immutable/entry/start.d5f15666.js"),
import("/docs/inference-providers/pr_1663/en/_app/immutable/entry/app.68b4644d.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 4],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
34.6 kB
·
Xet hash:
ebea1bedf845ed2e6d549381cc6b65bdfddd614204facb3e5b2b5793c5beeaf2

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.