Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Inference Providers","local":"inference-providers","sections":[{"title":"Partners","local":"partners","sections":[],"depth":2},{"title":"Why use Inference Providers?","local":"why-use-inference-providers","sections":[],"depth":2},{"title":"Key Features","local":"key-features","sections":[],"depth":2},{"title":"Inference Playground","local":"inference-playground","sections":[],"depth":2},{"title":"Get Started","local":"get-started","sections":[{"title":"Authentication","local":"authentication","sections":[],"depth":3},{"title":"cURL","local":"curl","sections":[],"depth":3},{"title":"Python","local":"python","sections":[],"depth":3},{"title":"JavaScript","local":"javascript","sections":[],"depth":3}],"depth":2},{"title":"Next Steps","local":"next-steps","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/inference-providers/pr_1721/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1721/en/_app/immutable/entry/start.dc95e1a7.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1721/en/_app/immutable/chunks/scheduler.2427eaa0.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1721/en/_app/immutable/chunks/singletons.2dd1c5f2.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1721/en/_app/immutable/chunks/index.2f106b1a.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1721/en/_app/immutable/chunks/paths.1924696a.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1721/en/_app/immutable/entry/app.64a85c6c.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1721/en/_app/immutable/chunks/index.0f2ff8a3.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1721/en/_app/immutable/nodes/0.094fd6be.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1721/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1721/en/_app/immutable/nodes/4.21502739.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1721/en/_app/immutable/chunks/CodeBlock.1bc84186.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1721/en/_app/immutable/chunks/index.61bead48.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Inference Providers","local":"inference-providers","sections":[{"title":"Partners","local":"partners","sections":[],"depth":2},{"title":"Why use Inference Providers?","local":"why-use-inference-providers","sections":[],"depth":2},{"title":"Key Features","local":"key-features","sections":[],"depth":2},{"title":"Inference Playground","local":"inference-playground","sections":[],"depth":2},{"title":"Get Started","local":"get-started","sections":[{"title":"Authentication","local":"authentication","sections":[],"depth":3},{"title":"cURL","local":"curl","sections":[],"depth":3},{"title":"Python","local":"python","sections":[],"depth":3},{"title":"JavaScript","local":"javascript","sections":[],"depth":3}],"depth":2},{"title":"Next Steps","local":"next-steps","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="inference-providers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference-providers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference Providers</span></h1> <div class="flex justify-center" data-svelte-h="svelte-11agwm4"><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-providers/Inference-providers-banner-light.png"> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-providers/Inference-providers-banner-dark.png"></div> <p data-svelte-h="svelte-1p105z9">Hugging Face’s Inference Providers give developers streamlined, unified access to hundreds of machine learning models, powered by our serverless inference partners. This new approach builds on our previous Serverless Inference API, offering more models, improved performance, and greater reliability thanks to world-class providers.</p> <p data-svelte-h="svelte-my20pe">To learn more about the launch of Inference Providers, check out our <a href="https://huggingface.co/blog/inference-providers" rel="nofollow">announcement blog post</a>.</p> <h2 class="relative group"><a id="partners" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#partners"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Partners</span></h2> <p data-svelte-h="svelte-luy5lr">Here is the complete list of partners integrated with Inference Providers, and the supported tasks for each of them:</p> <table data-svelte-h="svelte-pni6em"><thead><tr><th>Provider</th> <th align="center">Chat completion (LLM)</th> <th align="center">Chat completion (VLM)</th> <th align="center">Feature Extraction</th> <th align="center">Text to Image</th> <th align="center">Text to video</th></tr></thead> <tbody><tr><td><a href="./providers/cerebras">Cerebras</a></td> <td align="center">✅</td> <td align="center"></td> <td align="center"></td> <td align="center"></td> <td align="center"></td></tr> <tr><td><a href="./providers/cohere">Cohere</a></td> <td align="center">✅</td> <td align="center">✅</td> <td align="center"></td> <td align="center"></td> <td align="center"></td></tr> <tr><td><a href="./providers/fal-ai">Fal AI</a></td> <td align="center"></td> <td align="center"></td> <td align="center"></td> <td align="center">✅</td> <td align="center">✅</td></tr> <tr><td><a href="./providers/fireworks-ai">Fireworks</a></td> <td align="center">✅</td> <td align="center">✅</td> <td align="center"></td> <td align="center"></td> <td align="center"></td></tr> <tr><td><a href="./providers/hf-inference">HF Inference</a></td> <td align="center">✅</td> <td align="center">✅</td> <td align="center">✅</td> <td align="center">✅</td> <td align="center"></td></tr> <tr><td><a href="./providers/hyperbolic">Hyperbolic</a></td> <td align="center">✅</td> <td align="center">✅</td> <td align="center"></td> <td align="center"></td> <td align="center"></td></tr> <tr><td><a href="./providers/nebius">Nebius</a></td> <td align="center">✅</td> <td align="center">✅</td> <td align="center"></td> <td align="center">✅</td> <td align="center"></td></tr> <tr><td><a href="./providers/novita">Novita</a></td> <td align="center">✅</td> <td align="center">✅</td> <td align="center"></td> <td align="center"></td> <td align="center">✅</td></tr> <tr><td><a href="./providers/replicate">Replicate</a></td> <td align="center"></td> <td align="center"></td> <td align="center"></td> <td align="center">✅</td> <td align="center">✅</td></tr> <tr><td><a href="./providers/sambanova">SambaNova</a></td> <td align="center">✅</td> <td align="center"></td> <td align="center">✅</td> <td align="center"></td> <td align="center"></td></tr> <tr><td><a href="./providers/together">Together</a></td> <td align="center">✅</td> <td align="center">✅</td> <td align="center"></td> <td align="center">✅</td> <td align="center"></td></tr></tbody></table> <h2 class="relative group"><a id="why-use-inference-providers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#why-use-inference-providers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Why use Inference Providers?</span></h2> <p data-svelte-h="svelte-3l2dil">Inference Providers offers a fast and simple way to explore thousands of models for a variety of tasks. Whether you’re experimenting with ML capabilities or building a new application, this API gives you instant access to high-performing models across multiple domains:</p> <ul data-svelte-h="svelte-r6lxgx"><li><strong>Text Generation:</strong> Including large language models and tool-calling prompts, generate and experiment with high-quality responses.</li> <li><strong>Image and Video Generation:</strong> Easily create customized images, including LoRAs for your own styles.</li> <li><strong>Document Embeddings:</strong> Build search and retrieval systems with SOTA embeddings.</li> <li><strong>Classical AI Tasks:</strong> Ready-to-use models for text classification, image classification, speech recognition, and more.</li></ul> <p data-svelte-h="svelte-owiwcw">⚡ <strong>Fast and Free to Get Started</strong>: Inference Providers comes with a free-tier and additional included credits for <a href="https://hf.co/subscribe/pro" rel="nofollow">PRO users</a>, as well as <a href="https://huggingface.co/enterprise" rel="nofollow">Enterprise Hub organizations</a>.</p> <h2 class="relative group"><a id="key-features" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#key-features"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Key Features</span></h2> <ul data-svelte-h="svelte-1hpjin0"><li><strong>🎯 All-in-One API</strong>: A single API for text generation, image generation, document embeddings, NER, summarization, image classification, and more.</li> <li><strong>🔀 Multi-Provider Support</strong>: Easily run models from top-tier providers like fal, Replicate, Sambanova, Together AI, and others.</li> <li><strong>🚀 Scalable & Reliable</strong>: Built for high availability and low-latency performance in production environments.</li> <li><strong>🔧 Developer-Friendly</strong>: Simple requests, fast responses, and a consistent developer experience across Python and JavaScript clients.</li> <li><strong>💰 Cost-Effective</strong>: No extra markup on provider rates.</li></ul> <h2 class="relative group"><a id="inference-playground" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference-playground"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference Playground</span></h2> <p data-svelte-h="svelte-u2apxl">To get started quickly with <a href="http://huggingface.co/models?inference_provider=all&sort=trending&other=conversational" rel="nofollow">Chat Completion models</a>, use the <a href="https://huggingface.co/playground" rel="nofollow">Inference Playground</a> to easily test and compare models with your prompts.</p> <a href="https://huggingface.co/playground" target="blank" data-svelte-h="svelte-1yefxq"><img src="https://cdn-uploads.huggingface.co/production/uploads/5f17f0a0925b9863e28ad517/9_Tgf0Tv65srhBirZQMTp.png" style="max-width: 550px; width: 100%;"></a> <h2 class="relative group"><a id="get-started" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#get-started"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Get Started</span></h2> <p data-svelte-h="svelte-hl02sf">You can use Inference Providers with your preferred tools, such as Python, JavaScript, or cURL. To simplify integration, we offer both a Python SDK (<code>huggingface_hub</code>) and a JavaScript SDK (<code>huggingface.js</code>).</p> <p data-svelte-h="svelte-7vavmh">In this section, we will demonstrate a simple example using <a href="https://huggingface.co/deepseek-ai/DeepSeek-V3-0324" rel="nofollow">deepseek-ai/DeepSeek-V3-0324</a>, a conversational Large Language Model. For the example, we will use <a href="https://novita.ai/" rel="nofollow">Novita AI</a> as Inference Provider.</p> <h3 class="relative group"><a id="authentication" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#authentication"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Authentication</span></h3> <p data-svelte-h="svelte-czvqq9">Inference Providers requires passing a user token in the request headers. You can generate a token by signing up on the Hugging Face website and going to the <a href="https://huggingface.co/settings/tokens/new?ownUserPermissions=inference.serverless.write&tokenType=fineGrained" rel="nofollow">settings page</a>. We recommend creating a <code>fine-grained</code> token with the scope to <code>Make calls to Inference Providers</code>.</p> <p data-svelte-h="svelte-14h1l7k">For more details about user tokens, check out <a href="https://huggingface.co/docs/hub/en/security-tokens" rel="nofollow">this guide</a>.</p> <h3 class="relative group"><a id="curl" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#curl"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>cURL</span></h3> <p data-svelte-h="svelte-rx3iua">Let’s start with a cURL command highlighting the raw HTTP request. You can adapt this request to be run with the tool of your choice.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->curl https://router.huggingface.co/novita/v3/openai/chat/completions \ | |
| -H <span class="hljs-string">"Authorization: Bearer <span class="hljs-variable">$HF_TOKEN</span>"</span> \ | |
| -H <span class="hljs-string">'Content-Type: application/json'</span> \ | |
| -d <span class="hljs-string">'{ | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": "How many G in huggingface?" | |
| } | |
| ], | |
| "model": "deepseek/deepseek-v3-0324", | |
| "stream": false | |
| }'</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="python" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#python"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Python</span></h3> <p data-svelte-h="svelte-1e368qz">In Python, you can use the <code>requests</code> library to make raw requests to the API:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> requests | |
| API_URL = <span class="hljs-string">"https://router.huggingface.co/novita/v3/openai/chat/completions"</span> | |
| headers = {<span class="hljs-string">"Authorization"</span>: <span class="hljs-string">"Bearer hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"</span>} | |
| payload = { | |
| <span class="hljs-string">"messages"</span>: [ | |
| { | |
| <span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, | |
| <span class="hljs-string">"content"</span>: <span class="hljs-string">"How many 'G's in 'huggingface'?"</span> | |
| } | |
| ], | |
| <span class="hljs-string">"model"</span>: <span class="hljs-string">"deepseek/deepseek-v3-0324"</span>, | |
| } | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| <span class="hljs-built_in">print</span>(response.json()[<span class="hljs-string">"choices"</span>][<span class="hljs-number">0</span>][<span class="hljs-string">"message"</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-16da4wg">For convenience, the Python library <code>huggingface_hub</code> provides an <a href="https://huggingface.co/docs/huggingface_hub/guides/inference" rel="nofollow"><code>InferenceClient</code></a> that handles inference for you. Make sure to install it with <code>pip install huggingface_hub</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient | |
| client = InferenceClient( | |
| provider=<span class="hljs-string">"novita"</span>, | |
| api_key=<span class="hljs-string">"hf_xxxxxxxxxxxxxxxxxxxxxxxx"</span>, | |
| ) | |
| completion = client.chat.completions.create( | |
| model=<span class="hljs-string">"deepseek-ai/DeepSeek-V3-0324"</span>, | |
| messages=[ | |
| { | |
| <span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, | |
| <span class="hljs-string">"content"</span>: <span class="hljs-string">"How many 'G's in 'huggingface'?"</span> | |
| } | |
| ], | |
| ) | |
| <span class="hljs-built_in">print</span>(completion.choices[<span class="hljs-number">0</span>].message)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="javascript" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#javascript"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>JavaScript</span></h3> <p data-svelte-h="svelte-y5hdls">In JS, you can use the <code>fetch</code> library to make raw requests to the API:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> fetch <span class="hljs-keyword">from</span> <span class="hljs-string">"node-fetch"</span>; | |
| <span class="hljs-keyword">const</span> response = <span class="hljs-keyword">await</span> <span class="hljs-title function_">fetch</span>( | |
| <span class="hljs-string">"https://router.huggingface.co/novita/v3/openai/chat/completions"</span>, | |
| { | |
| <span class="hljs-attr">method</span>: <span class="hljs-string">"POST"</span>, | |
| <span class="hljs-attr">headers</span>: { | |
| <span class="hljs-title class_">Authorization</span>: <span class="hljs-string">`Bearer hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`</span>, | |
| <span class="hljs-string">"Content-Type"</span>: <span class="hljs-string">"application/json"</span>, | |
| }, | |
| <span class="hljs-attr">body</span>: <span class="hljs-title class_">JSON</span>.<span class="hljs-title function_">stringify</span>({ | |
| <span class="hljs-attr">provider</span>: <span class="hljs-string">"novita"</span>, | |
| <span class="hljs-attr">model</span>: <span class="hljs-string">"deepseek-ai/DeepSeek-V3-0324"</span>, | |
| <span class="hljs-attr">messages</span>: [ | |
| { | |
| <span class="hljs-attr">role</span>: <span class="hljs-string">"user"</span>, | |
| <span class="hljs-attr">content</span>: <span class="hljs-string">"How many 'G's in 'huggingface'?"</span>, | |
| }, | |
| ], | |
| }), | |
| } | |
| ); | |
| <span class="hljs-variable language_">console</span>.<span class="hljs-title function_">log</span>(<span class="hljs-keyword">await</span> response.<span class="hljs-title function_">json</span>());<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1qnfqp9">For convenience, the JS library <code>@huggingface/inference</code> provides an <a href="https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient" rel="nofollow"><code>InferenceClient</code></a> that handles inference for you. You can install it with <code>npm install @huggingface/inference</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">InferenceClient</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">"@huggingface/inference"</span>; | |
| <span class="hljs-keyword">const</span> client = <span class="hljs-keyword">new</span> <span class="hljs-title class_">InferenceClient</span>(<span class="hljs-string">"hf_xxxxxxxxxxxxxxxxxxxxxxxx"</span>); | |
| <span class="hljs-keyword">const</span> chatCompletion = <span class="hljs-keyword">await</span> client.<span class="hljs-title function_">chatCompletion</span>({ | |
| <span class="hljs-attr">provider</span>: <span class="hljs-string">"novita"</span>, | |
| <span class="hljs-attr">model</span>: <span class="hljs-string">"deepseek-ai/DeepSeek-V3-0324"</span>, | |
| <span class="hljs-attr">messages</span>: [ | |
| { | |
| <span class="hljs-attr">role</span>: <span class="hljs-string">"user"</span>, | |
| <span class="hljs-attr">content</span>: <span class="hljs-string">"How many 'G's in 'huggingface'?"</span>, | |
| }, | |
| ], | |
| }); | |
| <span class="hljs-variable language_">console</span>.<span class="hljs-title function_">log</span>(chatCompletion.<span class="hljs-property">choices</span>[<span class="hljs-number">0</span>].<span class="hljs-property">message</span>);<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="next-steps" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#next-steps"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Next Steps</span></h2> <p data-svelte-h="svelte-9ernno">In this introduction, we’ve covered the basics of Inference Providers. To learn more about this service, check out our guides and API Reference:</p> <ul data-svelte-h="svelte-vegp26"><li><a href="./pricing">Pricing and Billing</a>: everything you need to know about billing.</li> <li><a href="./hub-integration">Hub integration</a>: how is Inference Providers integrated with the Hub?</li> <li><a href="./register-as-a-provider">Register as an Inference Provider</a>: everything about how to become an official partner.</li> <li><a href="./hub-api">Hub API</a>: high-level API for Inference Providers.</li> <li><a href="./tasks/index">API Reference</a>: learn more about the parameters and task-specific settings.</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hub-docs/blob/main/docs/inference-providers/index.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_7czhhx = { | |
| assets: "/docs/inference-providers/pr_1721/en", | |
| base: "/docs/inference-providers/pr_1721/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/inference-providers/pr_1721/en/_app/immutable/entry/start.dc95e1a7.js"), | |
| import("/docs/inference-providers/pr_1721/en/_app/immutable/entry/app.64a85c6c.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 4], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 38.4 kB
- Xet hash:
- 3f8c318f407d1953ea1eab98d9dff901f660ab2a5f5f40828d4dfbc8e8a53eed
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.