Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"How to be registered as an inference provider on the Hub?","local":"how-to-be-registered-as-an-inference-provider-on-the-hub","sections":[{"title":"1. Prerequisites","local":"1-prerequisites","sections":[{"title":"What is a Task","local":"what-is-a-task","sections":[],"depth":3},{"title":"Task API schema","local":"task-api-schema","sections":[],"depth":3}],"depth":2},{"title":"2. JS Client Integration","local":"2-js-client-integration","sections":[{"title":"Implement the provider helper (JS)","local":"implement-the-provider-helper-js","sections":[],"depth":3},{"title":"Register the provider","local":"register-the-provider","sections":[],"depth":3}],"depth":2},{"title":"3. Model Mapping API","local":"3-model-mapping-api","sections":[{"title":"Register a mapping item","local":"register-a-mapping-item","sections":[{"title":"Authentication","local":"authentication","sections":[],"depth":4},{"title":"Validation","local":"validation","sections":[],"depth":4}],"depth":3},{"title":"Delete a mapping item","local":"delete-a-mapping-item","sections":[],"depth":3},{"title":"Update a mapping item’s status","local":"update-a-mapping-items-status","sections":[],"depth":3},{"title":"List the whole mapping","local":"list-the-whole-mapping","sections":[],"depth":3}],"depth":2},{"title":"4. Billing","local":"4-billing","sections":[{"title":"HTTP API Specs","local":"http-api-specs","sections":[],"depth":3},{"title":"Price Unit","local":"price-unit","sections":[],"depth":3},{"title":"How to define the request ID","local":"how-to-define-the-request-id","sections":[],"depth":3}],"depth":2},{"title":"5. Python client integration","local":"5-python-client-integration","sections":[{"title":"Implement the provider helper (Python)","local":"implement-the-provider-helper-python","sections":[],"depth":3},{"title":"Register the Provider","local":"register-the-provider","sections":[],"depth":3},{"title":"Add tests","local":"add-tests","sections":[],"depth":3}],"depth":2},{"title":"FAQ","local":"faq","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/inference-providers/pr_1663/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/entry/start.d5f15666.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/scheduler.ddb4e551.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/singletons.0f5b782d.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.ce98237b.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/paths.b324c1e2.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/entry/app.68b4644d.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.e16e4efa.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/nodes/0.80863911.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/nodes/6.cc8da116.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/Tip.20abb04f.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/CodeBlock.754e6cfc.js"> | |
| <link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.e108c5ed.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"How to be registered as an inference provider on the Hub?","local":"how-to-be-registered-as-an-inference-provider-on-the-hub","sections":[{"title":"1. Prerequisites","local":"1-prerequisites","sections":[{"title":"What is a Task","local":"what-is-a-task","sections":[],"depth":3},{"title":"Task API schema","local":"task-api-schema","sections":[],"depth":3}],"depth":2},{"title":"2. JS Client Integration","local":"2-js-client-integration","sections":[{"title":"Implement the provider helper (JS)","local":"implement-the-provider-helper-js","sections":[],"depth":3},{"title":"Register the provider","local":"register-the-provider","sections":[],"depth":3}],"depth":2},{"title":"3. Model Mapping API","local":"3-model-mapping-api","sections":[{"title":"Register a mapping item","local":"register-a-mapping-item","sections":[{"title":"Authentication","local":"authentication","sections":[],"depth":4},{"title":"Validation","local":"validation","sections":[],"depth":4}],"depth":3},{"title":"Delete a mapping item","local":"delete-a-mapping-item","sections":[],"depth":3},{"title":"Update a mapping item’s status","local":"update-a-mapping-items-status","sections":[],"depth":3},{"title":"List the whole mapping","local":"list-the-whole-mapping","sections":[],"depth":3}],"depth":2},{"title":"4. Billing","local":"4-billing","sections":[{"title":"HTTP API Specs","local":"http-api-specs","sections":[],"depth":3},{"title":"Price Unit","local":"price-unit","sections":[],"depth":3},{"title":"How to define the request ID","local":"how-to-define-the-request-id","sections":[],"depth":3}],"depth":2},{"title":"5. Python client integration","local":"5-python-client-integration","sections":[{"title":"Implement the provider helper (Python)","local":"implement-the-provider-helper-python","sections":[],"depth":3},{"title":"Register the Provider","local":"register-the-provider","sections":[],"depth":3},{"title":"Add tests","local":"add-tests","sections":[],"depth":3}],"depth":2},{"title":"FAQ","local":"faq","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="how-to-be-registered-as-an-inference-provider-on-the-hub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-to-be-registered-as-an-inference-provider-on-the-hub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How to be registered as an inference provider on the Hub?</span></h1> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-3qkokv">Want to be listed as an Inference Provider on the Hugging Face Hub? Please reach out to us on social networks or <a href="https://huggingface.co/spaces/huggingface/HuggingDiscussions/discussions/49" rel="nofollow">here on the Hub</a>.</p></div> <p data-svelte-h="svelte-eo2v07">This guide details the steps for registering as an inference provider on the Hub and provides implementation guidance.</p> <ol data-svelte-h="svelte-8emskd"><li><strong>Implement standard task APIs</strong> - Follow our task API schemas for compatibility (see <a href="#1-prerequisites">Prerequisites</a>).</li> <li><strong>Submit a PR for JS client integration</strong> - Add your provider to <a href="https://github.com/huggingface/huggingface.js/tree/main/packages/inference" rel="nofollow">huggingface.js</a> (see <a href="#2-js-client-integration">JS Client Integration</a>).</li> <li><strong>Register model mappings</strong> - Use our Model Mapping API to link your models to Hub models (see <a href="#3-model-mapping-api">Model Mapping API</a>).</li> <li><strong>Implement a billing endpoint</strong> - Provide an API for billing (see <a href="#4-billing">Billing</a>).</li> <li><strong>Submit a PR for Python client integration</strong> - Add your provider to <a href="https://github.com/huggingface/huggingface_hub" rel="nofollow">huggingface_hub</a> (see <a href="#5-python-client-integration">Python client integration</a>).</li> <li><strong>Register your provider server-side and provide an icon</strong> - Reach out to us to add your provider server-side and provide your SVG icon.</li> <li><strong>Create documentation on your side</strong> - Add documentation and do a lot of communication on your side.</li> <li><strong>Add a documentation page</strong> - Open a Pull Request in this repo (huggingface/hub-docs) to add a provider-specific page in the documentation.</li> <li><strong>Share share share</strong> do a lot of comms so that your integration is as successful as possible!</li></ol> <h2 class="relative group"><a id="1-prerequisites" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#1-prerequisites"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>1. Prerequisites</span></h2> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-90vl22">If your implementation strictly follows the OpenAI API for LLMs and VLMs, you may be able to skip most of this section. In that case, simply open a PR on <a href="https://github.com/huggingface/huggingface.js/tree/main/packages/inference" rel="nofollow">huggingface.js</a> to register.</p></div> <p data-svelte-h="svelte-iwney4">The first step to understand the integration is to take a look at the JS inference client that lives | |
| inside the <a href="https://github.com/huggingface/huggingface.js/tree/main/packages/inference" rel="nofollow">huggingface.js</a> repo.</p> <p data-svelte-h="svelte-1xoupob">This is the client that powers our Inference widgets on model pages, and is the blueprint | |
| implementation downstream (for Python SDK, to generate code snippets, etc.).</p> <h3 class="relative group"><a id="what-is-a-task" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-is-a-task"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What is a Task</span></h3> <p data-svelte-h="svelte-hjt0h1">You will see that inference methods (<code>textToImage</code>, <code>chatCompletion</code>, etc.) have names that closely | |
| mirror the task names. A task, also known as <code>pipeline_tag</code> in the HF ecosystem, is the type of | |
| model (basically which types of inputs and outputs the model has), for instance “text-generation” | |
| or “text-to-image”. It is indicated prominently on model pages, here:</p> <div class="flex justify-center" data-svelte-h="svelte-1cxy3tx"><picture><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-providers/pipeline-tag-on-model-page-light.png"> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-providers/pipeline-tag-on-model-page-dark.png"></picture></div> <p data-svelte-h="svelte-vmhaok">The list of all possible tasks can be found at <a href="https://huggingface.co/tasks" rel="nofollow">https://huggingface.co/tasks</a> and the list of JS method names is documented in the README at <a href="https://github.com/huggingface/huggingface.js/tree/main/packages/inference" rel="nofollow">https://github.com/huggingface/huggingface.js/tree/main/packages/inference</a>.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-19q0jcc">Note that <code>chatCompletion</code> is an exception as it is not a pipeline_tag, per se. Instead, it | |
| includes models with either <code>pipeline_tag="text-generation"</code> or <code>pipeline_tag="image-text-to-text"</code> | |
| which are tagged as “conversational”.</p></div> <h3 class="relative group"><a id="task-api-schema" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#task-api-schema"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Task API schema</span></h3> <p data-svelte-h="svelte-epfhsj">For each task type, we enforce an API schema to make it easier for end users to use different | |
| models interchangeably. To be compatible, your third-party API must adhere to a “standard” shape API we expect on HF model pages for each pipeline task type.</p> <p data-svelte-h="svelte-n5dyw">This is not an issue for LLMs as everyone converged on the OpenAI API anyways, but can be | |
| more tricky for other tasks like “text-to-image” or “automatic-speech-recognition” where there | |
| exists no standard API.</p> <p data-svelte-h="svelte-qm2nt8">For example, you can find the expected schema for Text to Speech here: <a href="https://github.com/huggingface/huggingface.js/blob/0a690a14d52041a872dc103846225603599f4a33/packages/tasks/src/tasks/text-to-speech/spec/input.json#L4" rel="nofollow">https://github.com/huggingface/huggingface.js/packages/src/tasks/text-to-speech/spec/input.json#L4</a>, and similarly for other supported tasks. If your API for a given task is different from HF’s, it is not an issue: you can tweak the code in <code>huggingface.js</code> to be able to call your models, i.e., provide some kind of “translation” of parameter names and output names. However, API specs should not be model-specific, only task-specific. Run the JS code and add some <a href="https://github.com/huggingface/huggingface.js/blob/main/packages/inference/test/HfInference.spec.ts" rel="nofollow">tests</a> to make sure it works well. We can help with this step!</p> <h2 class="relative group"><a id="2-js-client-integration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#2-js-client-integration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>2. JS Client Integration</span></h2> <p data-svelte-h="svelte-aqaw0f">Before proceeding with the next steps, ensure you’ve implemented the necessary code to integrate with the JS client and thoroughly tested your implementation. Here are the steps to follow:</p> <h3 class="relative group"><a id="implement-the-provider-helper-js" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#implement-the-provider-helper-js"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Implement the provider helper (JS)</span></h3> <p data-svelte-h="svelte-1ccsiy1">Create a new file under <code>packages/inference/src/providers/{provider_name}.ts</code> and copy-paste the following snippet.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">TaskProviderHelper</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">"./providerHelper"</span>; | |
| <span class="hljs-keyword">export</span> <span class="hljs-keyword">class</span> <span class="hljs-title class_">MyNewProviderTask</span> <span class="hljs-keyword">extends</span> <span class="hljs-title class_ inherited__">TaskProviderHelper</span> { | |
| <span class="hljs-title function_">constructor</span>(<span class="hljs-params"></span>) { | |
| <span class="hljs-variable language_">super</span>(<span class="hljs-string">"your-provider-name"</span>, <span class="hljs-string">"your-api-base-url"</span>, <span class="hljs-string">"task-name"</span>); | |
| } | |
| <span class="hljs-keyword">override</span> <span class="hljs-title function_">prepareHeaders</span>(<span class="hljs-attr">params</span>: <span class="hljs-title class_">HeaderParams</span>, <span class="hljs-attr">binary</span>: <span class="hljs-built_in">boolean</span>): <span class="hljs-title class_">Record</span><<span class="hljs-built_in">string</span>, <span class="hljs-built_in">string</span>> { | |
| <span class="hljs-comment">// Override the headers to use for the request.</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-variable language_">super</span>.<span class="hljs-title function_">prepareHeaders</span>(params, binary); | |
| } | |
| <span class="hljs-title function_">makeRoute</span>(<span class="hljs-attr">params</span>: <span class="hljs-title class_">UrlParams</span>): <span class="hljs-built_in">string</span> { | |
| <span class="hljs-comment">// Return the route to use for the request. e.g. /v1/chat/completions route is commonly use for chat completion.</span> | |
| <span class="hljs-keyword">throw</span> <span class="hljs-keyword">new</span> <span class="hljs-title class_">Error</span>(<span class="hljs-string">"Needs to be implemented"</span>); | |
| } | |
| <span class="hljs-title function_">preparePayload</span>(<span class="hljs-attr">params</span>: <span class="hljs-title class_">BodyParams</span>): <span class="hljs-title class_">Record</span><<span class="hljs-built_in">string</span>, <span class="hljs-built_in">unknown</span>> { | |
| <span class="hljs-comment">// Return the payload to use for the request, as a dict.</span> | |
| <span class="hljs-keyword">throw</span> <span class="hljs-keyword">new</span> <span class="hljs-title class_">Error</span>(<span class="hljs-string">"Needs to be implemented"</span>); | |
| } | |
| <span class="hljs-title function_">getResponse</span>(<span class="hljs-attr">response</span>: <span class="hljs-built_in">unknown</span>, outputType?: <span class="hljs-string">"url"</span> | <span class="hljs-string">"blob"</span>): <span class="hljs-built_in">string</span> | <span class="hljs-title class_">Promise</span><<span class="hljs-title class_">Blob</span>>{ | |
| <span class="hljs-comment">// Return the response in the expected format.</span> | |
| <span class="hljs-keyword">throw</span> <span class="hljs-keyword">new</span> <span class="hljs-title class_">Error</span>(<span class="hljs-string">"Needs to be implemented"</span>); | |
| } | |
| }<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-qutua8">Implement the methods that require custom handling. Check out the base implementation to check default behavior. If you don’t need to override a method, just remove it. You have to define at least <code>makeRoute</code>, <code>preparePayload</code> and <code>getResponse</code>.</p> <p data-svelte-h="svelte-uj5o95">If the provider supports multiple tasks that require different implementations, create dedicated subclasses for each task, following the pattern used in the existing providers implementation, e.g. <a href="https://github.com/huggingface/huggingface.js/blob/main/packages/inference/src/providers/together.ts" rel="nofollow">Together AI provider implementation</a>.</p> <p data-svelte-h="svelte-1ozzcb1">For text-generation and conversational tasks, you can just inherit from <code>BaseTextGenerationTask</code> and <code>BaseConversationalTask</code> respectively (defined in <a href="(https://github.com/huggingface/huggingface.js/blob/main/packages/inference/src/providers/providerHelper.ts)">providerHelper.ts</a>) and override the methods if needed. Examples can be found in <a href="https://github.com/huggingface/huggingface.js/blob/main/packages/inference/src/providers/cerebras.ts" rel="nofollow">Cerebras</a> or <a href="https://github.com/huggingface/huggingface.js/blob/main/packages/inference/src/providers/fireworks.ts" rel="nofollow">Fireworks</a> provider implementations.</p> <h3 class="relative group"><a id="register-the-provider" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#register-the-provider"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Register the provider</span></h3> <p data-svelte-h="svelte-bhaoly">Go to <a href="https://github.com/huggingface/huggingface.js//blob/main/packages/inference/src/lib/getProviderHelper.ts" rel="nofollow">packages/inference/src/lib/getProviderHelper.ts</a> and add your provider to <code>PROVIDERS</code>. Please try to respect alphabetical order.</p> <h2 class="relative group"><a id="3-model-mapping-api" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#3-model-mapping-api"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>3. Model Mapping API</span></h2> <p data-svelte-h="svelte-1vjzmk">Congratulations! You now have a JS implementation to successfully make inference calls on your infra! Time to integrate with the Hub!</p> <p data-svelte-h="svelte-1147onl">First step is to use the Model Mapping API to register which HF models are supported.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1letj1o">To proceed with this step, we have to enable your account server-side. Make sure you have an organization on the Hub for your enterprise.</p></div> <h3 class="relative group"><a id="register-a-mapping-item" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#register-a-mapping-item"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Register a mapping item</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->POST /api/partners/{provider}/models<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-f1hll8">Create a new mapping item, with the following body (JSON-encoded):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"task"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"WidgetType"</span><span class="hljs-punctuation">,</span> <span class="hljs-comment">// required</span> | |
| <span class="hljs-attr">"hfModel"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"string"</span><span class="hljs-punctuation">,</span> <span class="hljs-comment">// required: the name of the model on HF: namespace/model-name</span> | |
| <span class="hljs-attr">"providerModel"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"string"</span><span class="hljs-punctuation">,</span> <span class="hljs-comment">// required: the partner's "model id" i.e. id on your side</span> | |
| <span class="hljs-attr">"status"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"live"</span> | <span class="hljs-string">"staging"</span> <span class="hljs-comment">// Optional: defaults to "staging". "staging" models are only available to members of the partner's org, then you switch them to "live" when they're ready to go live</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <ul data-svelte-h="svelte-u6m976"><li><code>task</code>, also known as <code>pipeline_tag</code> in the HF ecosystem, is the type of model / type of API | |
| (examples: “text-to-image”, “text-generation”, but you should use “conversational” for chat models)</li> <li><code>hfModel</code> is the model id on the Hub’s side.</li> <li><code>providerModel</code> is the model id on your side (can be the same or different).</li></ul> <p data-svelte-h="svelte-1h7c5bh">In the future, we will add support for a new parameter (ping us if it’s important to you now):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"hfFilter"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span><span class="hljs-string">"string"</span><span class="hljs-punctuation">]</span> | |
| <span class="hljs-comment">// ^Power user move: register a "tag" slice of HF in one go.</span> | |
| <span class="hljs-comment">// Example: tag == "base_model:adapter:black-forest-labs/FLUX.1-dev" for all Flux-dev LoRAs</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <h4 class="relative group"><a id="authentication" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#authentication"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Authentication</span></h4> <p data-svelte-h="svelte-1avk6lq">You need to be in the <em>provider</em> Hub organization (e.g. <a href="https://huggingface.co/togethercomputer" rel="nofollow">https://huggingface.co/togethercomputer</a> | |
| for TogetherAI) with <strong>Write</strong> permissions to be able to access this endpoint.</p> <h4 class="relative group"><a id="validation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#validation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Validation</span></h4> <p data-svelte-h="svelte-pwj71q">The endpoint validates that:</p> <ul data-svelte-h="svelte-1i002r6"><li><code>hfModel</code> is indeed of <code>pipeline_tag == task</code> OR <code>task</code> is “conversational” and the model is | |
| compatible (i.e. the <code>pipeline_tag</code> is either “text-generation” or “image-text-to-text” AND the model is tagged as “conversational”).</li> <li>(in the future) we auto-test that the Partner’s API successfully responds to a | |
| huggingface.js/inference call of the corresponding task i.e. the API specs are valid.</li></ul> <h3 class="relative group"><a id="delete-a-mapping-item" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#delete-a-mapping-item"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Delete a mapping item</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->DELETE /api/partners/{provider}/models?hfModel=namespace/model-name<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="update-a-mapping-items-status" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#update-a-mapping-items-status"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Update a mapping item’s status</span></h3> <p data-svelte-h="svelte-18yjufw">Call this HTTP PUT endpoint:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->PUT /api/partners/{provider}/models/status<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ar1qn2">With the following body (JSON-encoded):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"hfModel"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"namespace/model-name"</span><span class="hljs-punctuation">,</span> <span class="hljs-comment">// The name of the model on HF</span> | |
| <span class="hljs-attr">"status"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"live"</span> | <span class="hljs-string">"staging"</span> <span class="hljs-comment">// The new status, one of "staging" or "live"</span> | |
| <span class="hljs-punctuation">}</span> <!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="list-the-whole-mapping" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#list-the-whole-mapping"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>List the whole mapping</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->GET /api/partners/{provider}/models?status=staging|live<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-16sv3hm">This gets all mapping items from the DB. For clarity, the output is grouped by task.</p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-94fcay">This is publicly accessible. It’s useful to be transparent by default and it helps debug client SDKs, etc.</p></div> <p data-svelte-h="svelte-1ovcz13">Here is an example of response:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"text-to-image"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"black-forest-labs/FLUX.1-Canny-dev"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"providerId"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"black-forest-labs/FLUX.1-canny"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"status"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"live"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"black-forest-labs/FLUX.1-Depth-dev"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"providerId"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"black-forest-labs/FLUX.1-depth"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"status"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"live"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"conversational"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"deepseek-ai/DeepSeek-R1"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"providerId"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"deepseek-ai/DeepSeek-R1"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"status"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"live"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"text-generation"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"meta-llama/Llama-2-70b-hf"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"providerId"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"meta-llama/Llama-2-70b-hf"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"status"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"live"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"mistralai/Mixtral-8x7B-v0.1"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"providerId"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"mistralai/Mixtral-8x7B-v0.1"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"status"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"live"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="4-billing" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#4-billing"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>4. Billing</span></h2> <p data-svelte-h="svelte-1vo10n6">For routed requests (see figure below), i.e. when users authenticate via HF, our intent is that | |
| our users only pay the standard provider API rates. There’s no additional markup from us, we | |
| just pass through the provider costs directly. | |
| More details about the pricing structure can be found on the <a href="./pricing.md">pricing page</a>.</p> <div class="flex justify-center" data-svelte-h="svelte-1roilif"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-providers/types_of_billing.png"></div> <p data-svelte-h="svelte-1072wo1">We propose an easier way to figure out this cost and charge it to our users, by asking you to | |
| provide the cost for each request via an HTTP API you host on your end.</p> <h3 class="relative group"><a id="http-api-specs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#http-api-specs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>HTTP API Specs</span></h3> <p data-svelte-h="svelte-74sgny">We ask that you expose an API that supports a HTTP POST request. | |
| The body of the request is a JSON-encoded object containing a list of request IDs for which we | |
| request the cost.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->POST {your URL here} | |
| <span class="hljs-attribute">Content-Type</span><span class="hljs-punctuation">: </span>application/json | |
| { | |
| "requestIds": [ | |
| "deadbeef0", | |
| "deadbeef1", | |
| "deadbeef2", | |
| "deadbeef3" | |
| ] | |
| }<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-krncuc">The response is also JSON-encoded. The response contains an array of objects specifying the | |
| request’s ID and its cost in nano-USD (10^-9 USD).</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">HTTP/1.1</span> <span class="hljs-number">200</span> OK | |
| <span class="hljs-attribute">Content-Type</span><span class="hljs-punctuation">: </span>application/json | |
| <span class="language-prolog">{ | |
| <span class="hljs-string">"requests"</span>: [ | |
| { <span class="hljs-string">"requestId"</span>: <span class="hljs-string">"deadbeef0"</span>, <span class="hljs-string">"costNanoUsd"</span>: <span class="hljs-number">100</span> }, | |
| { <span class="hljs-string">"requestId"</span>: <span class="hljs-string">"deadbeef1"</span>, <span class="hljs-string">"costNanoUsd"</span>: <span class="hljs-number">100</span> }, | |
| { <span class="hljs-string">"requestId"</span>: <span class="hljs-string">"deadbeef2"</span>, <span class="hljs-string">"costNanoUsd"</span>: <span class="hljs-number">100</span> }, | |
| { <span class="hljs-string">"requestId"</span>: <span class="hljs-string">"deadbeef3"</span>, <span class="hljs-string">"costNanoUsd"</span>: <span class="hljs-number">100</span> } | |
| ] | |
| }</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="price-unit" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#price-unit"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Price Unit</span></h3> <p data-svelte-h="svelte-cj93i4">We require the price to be an <strong>integer</strong> number of <strong>nano-USDs</strong> (10^-9 USD).</p> <h3 class="relative group"><a id="how-to-define-the-request-id" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-to-define-the-request-id"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How to define the request ID</span></h3> <p data-svelte-h="svelte-speu7d">For each request/generation you serve, you should define a unique request (or response) ID, | |
| and provide it as a response Header. We will use this ID as the request ID for the billing API | |
| above.</p> <p data-svelte-h="svelte-13ler1d">As part of those requirements, please let us know your Header name. If you don’t already have one, we suggest the <code>Inference-Id</code> name for instance, and it should contain a UUID character string.</p> <p data-svelte-h="svelte-ub059v"><strong>Example</strong>: Defining an <code>Inference-Id</code> header in your inference response.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->POST /v1/chat/completions | |
| <span class="hljs-attribute">Content-Type</span><span class="hljs-punctuation">: </span>application/json | |
| [request headers] | |
| [request body] | |
| ------ | |
| <span class="hljs-meta">HTTP/1.1</span> <span class="hljs-number">200</span> OK | |
| <span class="hljs-attribute">Content-Type</span><span class="hljs-punctuation">: </span>application/json | |
| [other request headers] | |
| <span class="hljs-attribute">Inference-Id</span><span class="hljs-punctuation">: </span>unique-id-00131 | |
| [response body]<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="5-python-client-integration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#5-python-client-integration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>5. Python client integration</span></h2> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-33g0sl">Before adding a new provider to the <code>huggingface_hub</code> Python library, make sure that all the previous steps have been completed and everything is working on the Hub. Support in the Python library comes as a second step.</p></div> <h3 class="relative group"><a id="implement-the-provider-helper-python" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#implement-the-provider-helper-python"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Implement the provider helper (Python)</span></h3> <p data-svelte-h="svelte-102eqn3">Create a new file under <code>src/huggingface_hub/inference/_providers/{provider_name}.py</code> and copy-paste the following snippet.</p> <p data-svelte-h="svelte-lavds">Implement the methods that require custom handling. Check out the base implementation to check default behavior. If you don’t need to override a method, just remove it. At least one of <code>_prepare_payload_as_dict</code> or <code>_prepare_payload_as_bytes</code> must be overwritten.</p> <p data-svelte-h="svelte-f0qt8k">If the provider supports multiple tasks that require different implementations, create dedicated subclasses for each task, following the pattern shown in fal_ai.py.</p> <p data-svelte-h="svelte-w9ft7n">For text-generation and conversational tasks, one can just inherit from BaseTextGenerationTask and BaseConversationalTask respectively (defined in _common.py) and override the methods if needed. Examples can be found in fireworks_ai.py and together.py.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> typing <span class="hljs-keyword">import</span> <span class="hljs-type">Any</span>, <span class="hljs-type">Dict</span>, <span class="hljs-type">Optional</span>, <span class="hljs-type">Union</span> | |
| <span class="hljs-keyword">from</span> ._common <span class="hljs-keyword">import</span> TaskProviderHelper | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">MyNewProviderTaskProviderHelper</span>(<span class="hljs-title class_ inherited__">TaskProviderHelper</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>): | |
| <span class="hljs-string">"""Define high-level parameters."""</span> | |
| <span class="hljs-built_in">super</span>().__init__(provider=..., base_url=..., task=...) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">get_response</span>(<span class="hljs-params"> | |
| self, | |
| response: <span class="hljs-type">Union</span>[<span class="hljs-built_in">bytes</span>, <span class="hljs-type">Dict</span>], | |
| request_params: <span class="hljs-type">Optional</span>[RequestParameters] = <span class="hljs-literal">None</span>, | |
| </span>) -> <span class="hljs-type">Any</span>: | |
| <span class="hljs-string">""" | |
| Return the response in the expected format. | |
| Override this method in subclasses for customized response handling."""</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-built_in">super</span>().get_response(response) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">_prepare_headers</span>(<span class="hljs-params">self, headers: <span class="hljs-type">Dict</span>, api_key: <span class="hljs-built_in">str</span></span>) -> <span class="hljs-type">Dict</span>: | |
| <span class="hljs-string">"""Return the headers to use for the request. | |
| Override this method in subclasses for customized headers. | |
| """</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-built_in">super</span>()._prepare_headers(headers, api_key) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">_prepare_route</span>(<span class="hljs-params">self, mapped_model: <span class="hljs-built_in">str</span>, api_key: <span class="hljs-built_in">str</span></span>) -> <span class="hljs-built_in">str</span>: | |
| <span class="hljs-string">"""Return the route to use for the request. | |
| Override this method in subclasses for customized routes. | |
| """</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-built_in">super</span>()._prepare_route(mapped_model) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">_prepare_payload_as_dict</span>(<span class="hljs-params">self, inputs: <span class="hljs-type">Any</span>, parameters: <span class="hljs-type">Dict</span>, mapped_model: <span class="hljs-built_in">str</span></span>) -> <span class="hljs-type">Optional</span>[<span class="hljs-type">Dict</span>]: | |
| <span class="hljs-string">"""Return the payload to use for the request, as a dict. | |
| Override this method in subclasses for customized payloads. | |
| Only one of `_prepare_payload_as_dict` and `_prepare_payload_as_bytes` should return a value. | |
| """</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-built_in">super</span>()._prepare_payload_as_dict(inputs, parameters, mapped_model) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">_prepare_payload_as_bytes</span>(<span class="hljs-params"> | |
| self, inputs: <span class="hljs-type">Any</span>, parameters: <span class="hljs-type">Dict</span>, mapped_model: <span class="hljs-built_in">str</span>, extra_payload: <span class="hljs-type">Optional</span>[<span class="hljs-type">Dict</span>] | |
| </span>) -> <span class="hljs-type">Optional</span>[<span class="hljs-built_in">bytes</span>]: | |
| <span class="hljs-string">"""Return the body to use for the request, as bytes. | |
| Override this method in subclasses for customized body data. | |
| Only one of `_prepare_payload_as_dict` and `_prepare_payload_as_bytes` should return a value. | |
| """</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-built_in">super</span>()._prepare_payload_as_bytes(inputs, parameters, mapped_model, extra_payload)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="register-the-provider" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#register-the-provider"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Register the Provider</span></h3> <ul data-svelte-h="svelte-117ozzs"><li>Go to <a href="https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/inference/_providers/__init__.py" rel="nofollow">src/huggingface_hub/inference/_providers/<strong>init</strong>.py</a> and add your provider to <code>PROVIDER_T</code> and <code>PROVIDERS</code>. Please try to respect alphabetical order.</li> <li>Go to <a href="https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/inference/_client.py" rel="nofollow">src/huggingface_hub/inference/_client.py</a> and update docstring in <code>InferenceClient.__init__</code> to document your provider.</li></ul> <h3 class="relative group"><a id="add-tests" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#add-tests"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Add tests</span></h3> <ul data-svelte-h="svelte-vl7c7p"><li>Go to <a href="https://github.com/huggingface/huggingface_hub/blob/main/tests/test_inference_providers.py" rel="nofollow">tests/test_inference_providers.py</a> and add static tests for overridden methods.</li></ul> <h2 class="relative group"><a id="faq" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#faq"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>FAQ</span></h2> <p data-svelte-h="svelte-1yfr1yr"><strong>Question:</strong> By default, in which order do we list providers in the settings page?</p> <p data-svelte-h="svelte-1duzsqa"><strong>Answer:</strong> The default sort is by total number of requests routed by HF over the last 7 days. This order defines which provider will be used in priority by the widget on the model page (but the user’s order takes precedence).</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hub-docs/blob/main/docs/inference-providers/register-as-a-provider.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_1o5mypj = { | |
| assets: "/docs/inference-providers/pr_1663/en", | |
| base: "/docs/inference-providers/pr_1663/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/inference-providers/pr_1663/en/_app/immutable/entry/start.d5f15666.js"), | |
| import("/docs/inference-providers/pr_1663/en/_app/immutable/entry/app.68b4644d.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 6], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 83.3 kB
- Xet hash:
- 8fb87767356773b26097c3bd584ae06c9335abacca56b0aebc1f143009a1a6ca
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.