Buckets:

hf-doc-build/doc-dev / inference-providers /pr_1663 /en /register-as-a-provider.html
rtrm's picture
download
raw
83.3 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;How to be registered as an inference provider on the Hub?&quot;,&quot;local&quot;:&quot;how-to-be-registered-as-an-inference-provider-on-the-hub&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;1. Prerequisites&quot;,&quot;local&quot;:&quot;1-prerequisites&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;What is a Task&quot;,&quot;local&quot;:&quot;what-is-a-task&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Task API schema&quot;,&quot;local&quot;:&quot;task-api-schema&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;2. JS Client Integration&quot;,&quot;local&quot;:&quot;2-js-client-integration&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Implement the provider helper (JS)&quot;,&quot;local&quot;:&quot;implement-the-provider-helper-js&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Register the provider&quot;,&quot;local&quot;:&quot;register-the-provider&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;3. Model Mapping API&quot;,&quot;local&quot;:&quot;3-model-mapping-api&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Register a mapping item&quot;,&quot;local&quot;:&quot;register-a-mapping-item&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Authentication&quot;,&quot;local&quot;:&quot;authentication&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Validation&quot;,&quot;local&quot;:&quot;validation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Delete a mapping item&quot;,&quot;local&quot;:&quot;delete-a-mapping-item&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Update a mapping item’s status&quot;,&quot;local&quot;:&quot;update-a-mapping-items-status&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;List the whole mapping&quot;,&quot;local&quot;:&quot;list-the-whole-mapping&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;4. Billing&quot;,&quot;local&quot;:&quot;4-billing&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;HTTP API Specs&quot;,&quot;local&quot;:&quot;http-api-specs&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Price Unit&quot;,&quot;local&quot;:&quot;price-unit&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;How to define the request ID&quot;,&quot;local&quot;:&quot;how-to-define-the-request-id&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;5. Python client integration&quot;,&quot;local&quot;:&quot;5-python-client-integration&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Implement the provider helper (Python)&quot;,&quot;local&quot;:&quot;implement-the-provider-helper-python&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Register the Provider&quot;,&quot;local&quot;:&quot;register-the-provider&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Add tests&quot;,&quot;local&quot;:&quot;add-tests&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;FAQ&quot;,&quot;local&quot;:&quot;faq&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/inference-providers/pr_1663/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/entry/start.d5f15666.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/scheduler.ddb4e551.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/singletons.0f5b782d.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.ce98237b.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/paths.b324c1e2.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/entry/app.68b4644d.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.e16e4efa.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/nodes/0.80863911.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/nodes/6.cc8da116.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/Tip.20abb04f.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/CodeBlock.754e6cfc.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.e108c5ed.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;How to be registered as an inference provider on the Hub?&quot;,&quot;local&quot;:&quot;how-to-be-registered-as-an-inference-provider-on-the-hub&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;1. Prerequisites&quot;,&quot;local&quot;:&quot;1-prerequisites&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;What is a Task&quot;,&quot;local&quot;:&quot;what-is-a-task&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Task API schema&quot;,&quot;local&quot;:&quot;task-api-schema&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;2. JS Client Integration&quot;,&quot;local&quot;:&quot;2-js-client-integration&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Implement the provider helper (JS)&quot;,&quot;local&quot;:&quot;implement-the-provider-helper-js&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Register the provider&quot;,&quot;local&quot;:&quot;register-the-provider&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;3. Model Mapping API&quot;,&quot;local&quot;:&quot;3-model-mapping-api&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Register a mapping item&quot;,&quot;local&quot;:&quot;register-a-mapping-item&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Authentication&quot;,&quot;local&quot;:&quot;authentication&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Validation&quot;,&quot;local&quot;:&quot;validation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Delete a mapping item&quot;,&quot;local&quot;:&quot;delete-a-mapping-item&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Update a mapping item’s status&quot;,&quot;local&quot;:&quot;update-a-mapping-items-status&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;List the whole mapping&quot;,&quot;local&quot;:&quot;list-the-whole-mapping&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;4. Billing&quot;,&quot;local&quot;:&quot;4-billing&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;HTTP API Specs&quot;,&quot;local&quot;:&quot;http-api-specs&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Price Unit&quot;,&quot;local&quot;:&quot;price-unit&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;How to define the request ID&quot;,&quot;local&quot;:&quot;how-to-define-the-request-id&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;5. Python client integration&quot;,&quot;local&quot;:&quot;5-python-client-integration&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Implement the provider helper (Python)&quot;,&quot;local&quot;:&quot;implement-the-provider-helper-python&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Register the Provider&quot;,&quot;local&quot;:&quot;register-the-provider&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Add tests&quot;,&quot;local&quot;:&quot;add-tests&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;FAQ&quot;,&quot;local&quot;:&quot;faq&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="how-to-be-registered-as-an-inference-provider-on-the-hub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-to-be-registered-as-an-inference-provider-on-the-hub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How to be registered as an inference provider on the Hub?</span></h1> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-3qkokv">Want to be listed as an Inference Provider on the Hugging Face Hub? Please reach out to us on social networks or <a href="https://huggingface.co/spaces/huggingface/HuggingDiscussions/discussions/49" rel="nofollow">here on the Hub</a>.</p></div> <p data-svelte-h="svelte-eo2v07">This guide details the steps for registering as an inference provider on the Hub and provides implementation guidance.</p> <ol data-svelte-h="svelte-8emskd"><li><strong>Implement standard task APIs</strong> - Follow our task API schemas for compatibility (see <a href="#1-prerequisites">Prerequisites</a>).</li> <li><strong>Submit a PR for JS client integration</strong> - Add your provider to <a href="https://github.com/huggingface/huggingface.js/tree/main/packages/inference" rel="nofollow">huggingface.js</a> (see <a href="#2-js-client-integration">JS Client Integration</a>).</li> <li><strong>Register model mappings</strong> - Use our Model Mapping API to link your models to Hub models (see <a href="#3-model-mapping-api">Model Mapping API</a>).</li> <li><strong>Implement a billing endpoint</strong> - Provide an API for billing (see <a href="#4-billing">Billing</a>).</li> <li><strong>Submit a PR for Python client integration</strong> - Add your provider to <a href="https://github.com/huggingface/huggingface_hub" rel="nofollow">huggingface_hub</a> (see <a href="#5-python-client-integration">Python client integration</a>).</li> <li><strong>Register your provider server-side and provide an icon</strong> - Reach out to us to add your provider server-side and provide your SVG icon.</li> <li><strong>Create documentation on your side</strong> - Add documentation and do a lot of communication on your side.</li> <li><strong>Add a documentation page</strong> - Open a Pull Request in this repo (huggingface/hub-docs) to add a provider-specific page in the documentation.</li> <li><strong>Share share share</strong> do a lot of comms so that your integration is as successful as possible!</li></ol> <h2 class="relative group"><a id="1-prerequisites" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#1-prerequisites"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>1. Prerequisites</span></h2> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-90vl22">If your implementation strictly follows the OpenAI API for LLMs and VLMs, you may be able to skip most of this section. In that case, simply open a PR on <a href="https://github.com/huggingface/huggingface.js/tree/main/packages/inference" rel="nofollow">huggingface.js</a> to register.</p></div> <p data-svelte-h="svelte-iwney4">The first step to understand the integration is to take a look at the JS inference client that lives
inside the <a href="https://github.com/huggingface/huggingface.js/tree/main/packages/inference" rel="nofollow">huggingface.js</a> repo.</p> <p data-svelte-h="svelte-1xoupob">This is the client that powers our Inference widgets on model pages, and is the blueprint
implementation downstream (for Python SDK, to generate code snippets, etc.).</p> <h3 class="relative group"><a id="what-is-a-task" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-is-a-task"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What is a Task</span></h3> <p data-svelte-h="svelte-hjt0h1">You will see that inference methods (<code>textToImage</code>, <code>chatCompletion</code>, etc.) have names that closely
mirror the task names. A task, also known as <code>pipeline_tag</code> in the HF ecosystem, is the type of
model (basically which types of inputs and outputs the model has), for instance “text-generation”
or “text-to-image”. It is indicated prominently on model pages, here:</p> <div class="flex justify-center" data-svelte-h="svelte-1cxy3tx"><picture><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-providers/pipeline-tag-on-model-page-light.png"> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-providers/pipeline-tag-on-model-page-dark.png"></picture></div> <p data-svelte-h="svelte-vmhaok">The list of all possible tasks can be found at <a href="https://huggingface.co/tasks" rel="nofollow">https://huggingface.co/tasks</a> and the list of JS method names is documented in the README at <a href="https://github.com/huggingface/huggingface.js/tree/main/packages/inference" rel="nofollow">https://github.com/huggingface/huggingface.js/tree/main/packages/inference</a>.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-19q0jcc">Note that <code>chatCompletion</code> is an exception as it is not a pipeline_tag, per se. Instead, it
includes models with either <code>pipeline_tag=&quot;text-generation&quot;</code> or <code>pipeline_tag=&quot;image-text-to-text&quot;</code>
which are tagged as “conversational”.</p></div> <h3 class="relative group"><a id="task-api-schema" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#task-api-schema"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Task API schema</span></h3> <p data-svelte-h="svelte-epfhsj">For each task type, we enforce an API schema to make it easier for end users to use different
models interchangeably. To be compatible, your third-party API must adhere to a “standard” shape API we expect on HF model pages for each pipeline task type.</p> <p data-svelte-h="svelte-n5dyw">This is not an issue for LLMs as everyone converged on the OpenAI API anyways, but can be
more tricky for other tasks like “text-to-image” or “automatic-speech-recognition” where there
exists no standard API.</p> <p data-svelte-h="svelte-qm2nt8">For example, you can find the expected schema for Text to Speech here: <a href="https://github.com/huggingface/huggingface.js/blob/0a690a14d52041a872dc103846225603599f4a33/packages/tasks/src/tasks/text-to-speech/spec/input.json#L4" rel="nofollow">https://github.com/huggingface/huggingface.js/packages/src/tasks/text-to-speech/spec/input.json#L4</a>, and similarly for other supported tasks. If your API for a given task is different from HF’s, it is not an issue: you can tweak the code in <code>huggingface.js</code> to be able to call your models, i.e., provide some kind of “translation” of parameter names and output names. However, API specs should not be model-specific, only task-specific. Run the JS code and add some <a href="https://github.com/huggingface/huggingface.js/blob/main/packages/inference/test/HfInference.spec.ts" rel="nofollow">tests</a> to make sure it works well. We can help with this step!</p> <h2 class="relative group"><a id="2-js-client-integration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#2-js-client-integration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>2. JS Client Integration</span></h2> <p data-svelte-h="svelte-aqaw0f">Before proceeding with the next steps, ensure you’ve implemented the necessary code to integrate with the JS client and thoroughly tested your implementation. Here are the steps to follow:</p> <h3 class="relative group"><a id="implement-the-provider-helper-js" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#implement-the-provider-helper-js"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Implement the provider helper (JS)</span></h3> <p data-svelte-h="svelte-1ccsiy1">Create a new file under <code>packages/inference/src/providers/{provider_name}.ts</code> and copy-paste the following snippet.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">TaskProviderHelper</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">&quot;./providerHelper&quot;</span>;
<span class="hljs-keyword">export</span> <span class="hljs-keyword">class</span> <span class="hljs-title class_">MyNewProviderTask</span> <span class="hljs-keyword">extends</span> <span class="hljs-title class_ inherited__">TaskProviderHelper</span> {
<span class="hljs-title function_">constructor</span>(<span class="hljs-params"></span>) {
<span class="hljs-variable language_">super</span>(<span class="hljs-string">&quot;your-provider-name&quot;</span>, <span class="hljs-string">&quot;your-api-base-url&quot;</span>, <span class="hljs-string">&quot;task-name&quot;</span>);
}
<span class="hljs-keyword">override</span> <span class="hljs-title function_">prepareHeaders</span>(<span class="hljs-attr">params</span>: <span class="hljs-title class_">HeaderParams</span>, <span class="hljs-attr">binary</span>: <span class="hljs-built_in">boolean</span>): <span class="hljs-title class_">Record</span>&lt;<span class="hljs-built_in">string</span>, <span class="hljs-built_in">string</span>&gt; {
<span class="hljs-comment">// Override the headers to use for the request.</span>
<span class="hljs-keyword">return</span> <span class="hljs-variable language_">super</span>.<span class="hljs-title function_">prepareHeaders</span>(params, binary);
}
<span class="hljs-title function_">makeRoute</span>(<span class="hljs-attr">params</span>: <span class="hljs-title class_">UrlParams</span>): <span class="hljs-built_in">string</span> {
<span class="hljs-comment">// Return the route to use for the request. e.g. /v1/chat/completions route is commonly use for chat completion.</span>
<span class="hljs-keyword">throw</span> <span class="hljs-keyword">new</span> <span class="hljs-title class_">Error</span>(<span class="hljs-string">&quot;Needs to be implemented&quot;</span>);
}
<span class="hljs-title function_">preparePayload</span>(<span class="hljs-attr">params</span>: <span class="hljs-title class_">BodyParams</span>): <span class="hljs-title class_">Record</span>&lt;<span class="hljs-built_in">string</span>, <span class="hljs-built_in">unknown</span>&gt; {
<span class="hljs-comment">// Return the payload to use for the request, as a dict.</span>
<span class="hljs-keyword">throw</span> <span class="hljs-keyword">new</span> <span class="hljs-title class_">Error</span>(<span class="hljs-string">&quot;Needs to be implemented&quot;</span>);
}
<span class="hljs-title function_">getResponse</span>(<span class="hljs-attr">response</span>: <span class="hljs-built_in">unknown</span>, outputType?: <span class="hljs-string">&quot;url&quot;</span> | <span class="hljs-string">&quot;blob&quot;</span>): <span class="hljs-built_in">string</span> | <span class="hljs-title class_">Promise</span>&lt;<span class="hljs-title class_">Blob</span>&gt;{
<span class="hljs-comment">// Return the response in the expected format.</span>
<span class="hljs-keyword">throw</span> <span class="hljs-keyword">new</span> <span class="hljs-title class_">Error</span>(<span class="hljs-string">&quot;Needs to be implemented&quot;</span>);
}
}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-qutua8">Implement the methods that require custom handling. Check out the base implementation to check default behavior. If you don’t need to override a method, just remove it. You have to define at least <code>makeRoute</code>, <code>preparePayload</code> and <code>getResponse</code>.</p> <p data-svelte-h="svelte-uj5o95">If the provider supports multiple tasks that require different implementations, create dedicated subclasses for each task, following the pattern used in the existing providers implementation, e.g. <a href="https://github.com/huggingface/huggingface.js/blob/main/packages/inference/src/providers/together.ts" rel="nofollow">Together AI provider implementation</a>.</p> <p data-svelte-h="svelte-1ozzcb1">For text-generation and conversational tasks, you can just inherit from <code>BaseTextGenerationTask</code> and <code>BaseConversationalTask</code> respectively (defined in <a href="(https://github.com/huggingface/huggingface.js/blob/main/packages/inference/src/providers/providerHelper.ts)">providerHelper.ts</a>) and override the methods if needed. Examples can be found in <a href="https://github.com/huggingface/huggingface.js/blob/main/packages/inference/src/providers/cerebras.ts" rel="nofollow">Cerebras</a> or <a href="https://github.com/huggingface/huggingface.js/blob/main/packages/inference/src/providers/fireworks.ts" rel="nofollow">Fireworks</a> provider implementations.</p> <h3 class="relative group"><a id="register-the-provider" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#register-the-provider"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Register the provider</span></h3> <p data-svelte-h="svelte-bhaoly">Go to <a href="https://github.com/huggingface/huggingface.js//blob/main/packages/inference/src/lib/getProviderHelper.ts" rel="nofollow">packages/inference/src/lib/getProviderHelper.ts</a> and add your provider to <code>PROVIDERS</code>. Please try to respect alphabetical order.</p> <h2 class="relative group"><a id="3-model-mapping-api" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#3-model-mapping-api"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>3. Model Mapping API</span></h2> <p data-svelte-h="svelte-1vjzmk">Congratulations! You now have a JS implementation to successfully make inference calls on your infra! Time to integrate with the Hub!</p> <p data-svelte-h="svelte-1147onl">First step is to use the Model Mapping API to register which HF models are supported.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1letj1o">To proceed with this step, we have to enable your account server-side. Make sure you have an organization on the Hub for your enterprise.</p></div> <h3 class="relative group"><a id="register-a-mapping-item" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#register-a-mapping-item"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Register a mapping item</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->POST /api/partners/{provider}/models<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-f1hll8">Create a new mapping item, with the following body (JSON-encoded):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;task&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;WidgetType&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-comment">// required</span>
<span class="hljs-attr">&quot;hfModel&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;string&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-comment">// required: the name of the model on HF: namespace/model-name</span>
<span class="hljs-attr">&quot;providerModel&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;string&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-comment">// required: the partner&#x27;s &quot;model id&quot; i.e. id on your side</span>
<span class="hljs-attr">&quot;status&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;live&quot;</span> | <span class="hljs-string">&quot;staging&quot;</span> <span class="hljs-comment">// Optional: defaults to &quot;staging&quot;. &quot;staging&quot; models are only available to members of the partner&#x27;s org, then you switch them to &quot;live&quot; when they&#x27;re ready to go live</span>
<span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <ul data-svelte-h="svelte-u6m976"><li><code>task</code>, also known as <code>pipeline_tag</code> in the HF ecosystem, is the type of model / type of API
(examples: “text-to-image”, “text-generation”, but you should use “conversational” for chat models)</li> <li><code>hfModel</code> is the model id on the Hub’s side.</li> <li><code>providerModel</code> is the model id on your side (can be the same or different).</li></ul> <p data-svelte-h="svelte-1h7c5bh">In the future, we will add support for a new parameter (ping us if it’s important to you now):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;hfFilter&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span><span class="hljs-string">&quot;string&quot;</span><span class="hljs-punctuation">]</span>
<span class="hljs-comment">// ^Power user move: register a &quot;tag&quot; slice of HF in one go.</span>
<span class="hljs-comment">// Example: tag == &quot;base_model:adapter:black-forest-labs/FLUX.1-dev&quot; for all Flux-dev LoRAs</span>
<span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <h4 class="relative group"><a id="authentication" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#authentication"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Authentication</span></h4> <p data-svelte-h="svelte-1avk6lq">You need to be in the <em>provider</em> Hub organization (e.g. <a href="https://huggingface.co/togethercomputer" rel="nofollow">https://huggingface.co/togethercomputer</a>
for TogetherAI) with <strong>Write</strong> permissions to be able to access this endpoint.</p> <h4 class="relative group"><a id="validation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#validation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Validation</span></h4> <p data-svelte-h="svelte-pwj71q">The endpoint validates that:</p> <ul data-svelte-h="svelte-1i002r6"><li><code>hfModel</code> is indeed of <code>pipeline_tag == task</code> OR <code>task</code> is “conversational” and the model is
compatible (i.e. the <code>pipeline_tag</code> is either “text-generation” or “image-text-to-text” AND the model is tagged as “conversational”).</li> <li>(in the future) we auto-test that the Partner’s API successfully responds to a
huggingface.js/inference call of the corresponding task i.e. the API specs are valid.</li></ul> <h3 class="relative group"><a id="delete-a-mapping-item" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#delete-a-mapping-item"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Delete a mapping item</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->DELETE /api/partners/{provider}/models?hfModel=namespace/model-name<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="update-a-mapping-items-status" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#update-a-mapping-items-status"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Update a mapping item’s status</span></h3> <p data-svelte-h="svelte-18yjufw">Call this HTTP PUT endpoint:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->PUT /api/partners/{provider}/models/status<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ar1qn2">With the following body (JSON-encoded):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;hfModel&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;namespace/model-name&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-comment">// The name of the model on HF</span>
<span class="hljs-attr">&quot;status&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;live&quot;</span> | <span class="hljs-string">&quot;staging&quot;</span> <span class="hljs-comment">// The new status, one of &quot;staging&quot; or &quot;live&quot;</span>
<span class="hljs-punctuation">}</span> <!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="list-the-whole-mapping" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#list-the-whole-mapping"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>List the whole mapping</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->GET /api/partners/{provider}/models?status=staging|live<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-16sv3hm">This gets all mapping items from the DB. For clarity, the output is grouped by task.</p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-94fcay">This is publicly accessible. It’s useful to be transparent by default and it helps debug client SDKs, etc.</p></div> <p data-svelte-h="svelte-1ovcz13">Here is an example of response:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;text-to-image&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;black-forest-labs/FLUX.1-Canny-dev&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;providerId&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;black-forest-labs/FLUX.1-canny&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;status&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;live&quot;</span>
<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;black-forest-labs/FLUX.1-Depth-dev&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;providerId&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;black-forest-labs/FLUX.1-depth&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;status&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;live&quot;</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;conversational&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;deepseek-ai/DeepSeek-R1&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;providerId&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;deepseek-ai/DeepSeek-R1&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;status&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;live&quot;</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;text-generation&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;meta-llama/Llama-2-70b-hf&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;providerId&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;meta-llama/Llama-2-70b-hf&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;status&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;live&quot;</span>
<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;mistralai/Mixtral-8x7B-v0.1&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;providerId&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;mistralai/Mixtral-8x7B-v0.1&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;status&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;live&quot;</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="4-billing" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#4-billing"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>4. Billing</span></h2> <p data-svelte-h="svelte-1vo10n6">For routed requests (see figure below), i.e. when users authenticate via HF, our intent is that
our users only pay the standard provider API rates. There’s no additional markup from us, we
just pass through the provider costs directly.
More details about the pricing structure can be found on the <a href="./pricing.md">pricing page</a>.</p> <div class="flex justify-center" data-svelte-h="svelte-1roilif"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-providers/types_of_billing.png"></div> <p data-svelte-h="svelte-1072wo1">We propose an easier way to figure out this cost and charge it to our users, by asking you to
provide the cost for each request via an HTTP API you host on your end.</p> <h3 class="relative group"><a id="http-api-specs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#http-api-specs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>HTTP API Specs</span></h3> <p data-svelte-h="svelte-74sgny">We ask that you expose an API that supports a HTTP POST request.
The body of the request is a JSON-encoded object containing a list of request IDs for which we
request the cost.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->POST {your URL here}
<span class="hljs-attribute">Content-Type</span><span class="hljs-punctuation">: </span>application/json
{
&quot;requestIds&quot;: [
&quot;deadbeef0&quot;,
&quot;deadbeef1&quot;,
&quot;deadbeef2&quot;,
&quot;deadbeef3&quot;
]
}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-krncuc">The response is also JSON-encoded. The response contains an array of objects specifying the
request’s ID and its cost in nano-USD (10^-9 USD).</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">HTTP/1.1</span> <span class="hljs-number">200</span> OK
<span class="hljs-attribute">Content-Type</span><span class="hljs-punctuation">: </span>application/json
<span class="language-prolog">{
<span class="hljs-string">&quot;requests&quot;</span>: [
{ <span class="hljs-string">&quot;requestId&quot;</span>: <span class="hljs-string">&quot;deadbeef0&quot;</span>, <span class="hljs-string">&quot;costNanoUsd&quot;</span>: <span class="hljs-number">100</span> },
{ <span class="hljs-string">&quot;requestId&quot;</span>: <span class="hljs-string">&quot;deadbeef1&quot;</span>, <span class="hljs-string">&quot;costNanoUsd&quot;</span>: <span class="hljs-number">100</span> },
{ <span class="hljs-string">&quot;requestId&quot;</span>: <span class="hljs-string">&quot;deadbeef2&quot;</span>, <span class="hljs-string">&quot;costNanoUsd&quot;</span>: <span class="hljs-number">100</span> },
{ <span class="hljs-string">&quot;requestId&quot;</span>: <span class="hljs-string">&quot;deadbeef3&quot;</span>, <span class="hljs-string">&quot;costNanoUsd&quot;</span>: <span class="hljs-number">100</span> }
]
}</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="price-unit" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#price-unit"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Price Unit</span></h3> <p data-svelte-h="svelte-cj93i4">We require the price to be an <strong>integer</strong> number of <strong>nano-USDs</strong> (10^-9 USD).</p> <h3 class="relative group"><a id="how-to-define-the-request-id" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-to-define-the-request-id"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How to define the request ID</span></h3> <p data-svelte-h="svelte-speu7d">For each request/generation you serve, you should define a unique request (or response) ID,
and provide it as a response Header. We will use this ID as the request ID for the billing API
above.</p> <p data-svelte-h="svelte-13ler1d">As part of those requirements, please let us know your Header name. If you don’t already have one, we suggest the <code>Inference-Id</code> name for instance, and it should contain a UUID character string.</p> <p data-svelte-h="svelte-ub059v"><strong>Example</strong>: Defining an <code>Inference-Id</code> header in your inference response.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->POST /v1/chat/completions
<span class="hljs-attribute">Content-Type</span><span class="hljs-punctuation">: </span>application/json
[request headers]
[request body]
------
<span class="hljs-meta">HTTP/1.1</span> <span class="hljs-number">200</span> OK
<span class="hljs-attribute">Content-Type</span><span class="hljs-punctuation">: </span>application/json
[other request headers]
<span class="hljs-attribute">Inference-Id</span><span class="hljs-punctuation">: </span>unique-id-00131
[response body]<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="5-python-client-integration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#5-python-client-integration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>5. Python client integration</span></h2> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-33g0sl">Before adding a new provider to the <code>huggingface_hub</code> Python library, make sure that all the previous steps have been completed and everything is working on the Hub. Support in the Python library comes as a second step.</p></div> <h3 class="relative group"><a id="implement-the-provider-helper-python" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#implement-the-provider-helper-python"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Implement the provider helper (Python)</span></h3> <p data-svelte-h="svelte-102eqn3">Create a new file under <code>src/huggingface_hub/inference/_providers/{provider_name}.py</code> and copy-paste the following snippet.</p> <p data-svelte-h="svelte-lavds">Implement the methods that require custom handling. Check out the base implementation to check default behavior. If you don’t need to override a method, just remove it. At least one of <code>_prepare_payload_as_dict</code> or <code>_prepare_payload_as_bytes</code> must be overwritten.</p> <p data-svelte-h="svelte-f0qt8k">If the provider supports multiple tasks that require different implementations, create dedicated subclasses for each task, following the pattern shown in fal_ai.py.</p> <p data-svelte-h="svelte-w9ft7n">For text-generation and conversational tasks, one can just inherit from BaseTextGenerationTask and BaseConversationalTask respectively (defined in _common.py) and override the methods if needed. Examples can be found in fireworks_ai.py and together.py.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> typing <span class="hljs-keyword">import</span> <span class="hljs-type">Any</span>, <span class="hljs-type">Dict</span>, <span class="hljs-type">Optional</span>, <span class="hljs-type">Union</span>
<span class="hljs-keyword">from</span> ._common <span class="hljs-keyword">import</span> TaskProviderHelper
<span class="hljs-keyword">class</span> <span class="hljs-title class_">MyNewProviderTaskProviderHelper</span>(<span class="hljs-title class_ inherited__">TaskProviderHelper</span>):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
<span class="hljs-string">&quot;&quot;&quot;Define high-level parameters.&quot;&quot;&quot;</span>
<span class="hljs-built_in">super</span>().__init__(provider=..., base_url=..., task=...)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">get_response</span>(<span class="hljs-params">
self,
response: <span class="hljs-type">Union</span>[<span class="hljs-built_in">bytes</span>, <span class="hljs-type">Dict</span>],
request_params: <span class="hljs-type">Optional</span>[RequestParameters] = <span class="hljs-literal">None</span>,
</span>) -&gt; <span class="hljs-type">Any</span>:
<span class="hljs-string">&quot;&quot;&quot;
Return the response in the expected format.
Override this method in subclasses for customized response handling.&quot;&quot;&quot;</span>
<span class="hljs-keyword">return</span> <span class="hljs-built_in">super</span>().get_response(response)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">_prepare_headers</span>(<span class="hljs-params">self, headers: <span class="hljs-type">Dict</span>, api_key: <span class="hljs-built_in">str</span></span>) -&gt; <span class="hljs-type">Dict</span>:
<span class="hljs-string">&quot;&quot;&quot;Return the headers to use for the request.
Override this method in subclasses for customized headers.
&quot;&quot;&quot;</span>
<span class="hljs-keyword">return</span> <span class="hljs-built_in">super</span>()._prepare_headers(headers, api_key)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">_prepare_route</span>(<span class="hljs-params">self, mapped_model: <span class="hljs-built_in">str</span>, api_key: <span class="hljs-built_in">str</span></span>) -&gt; <span class="hljs-built_in">str</span>:
<span class="hljs-string">&quot;&quot;&quot;Return the route to use for the request.
Override this method in subclasses for customized routes.
&quot;&quot;&quot;</span>
<span class="hljs-keyword">return</span> <span class="hljs-built_in">super</span>()._prepare_route(mapped_model)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">_prepare_payload_as_dict</span>(<span class="hljs-params">self, inputs: <span class="hljs-type">Any</span>, parameters: <span class="hljs-type">Dict</span>, mapped_model: <span class="hljs-built_in">str</span></span>) -&gt; <span class="hljs-type">Optional</span>[<span class="hljs-type">Dict</span>]:
<span class="hljs-string">&quot;&quot;&quot;Return the payload to use for the request, as a dict.
Override this method in subclasses for customized payloads.
Only one of `_prepare_payload_as_dict` and `_prepare_payload_as_bytes` should return a value.
&quot;&quot;&quot;</span>
<span class="hljs-keyword">return</span> <span class="hljs-built_in">super</span>()._prepare_payload_as_dict(inputs, parameters, mapped_model)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">_prepare_payload_as_bytes</span>(<span class="hljs-params">
self, inputs: <span class="hljs-type">Any</span>, parameters: <span class="hljs-type">Dict</span>, mapped_model: <span class="hljs-built_in">str</span>, extra_payload: <span class="hljs-type">Optional</span>[<span class="hljs-type">Dict</span>]
</span>) -&gt; <span class="hljs-type">Optional</span>[<span class="hljs-built_in">bytes</span>]:
<span class="hljs-string">&quot;&quot;&quot;Return the body to use for the request, as bytes.
Override this method in subclasses for customized body data.
Only one of `_prepare_payload_as_dict` and `_prepare_payload_as_bytes` should return a value.
&quot;&quot;&quot;</span>
<span class="hljs-keyword">return</span> <span class="hljs-built_in">super</span>()._prepare_payload_as_bytes(inputs, parameters, mapped_model, extra_payload)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="register-the-provider" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#register-the-provider"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Register the Provider</span></h3> <ul data-svelte-h="svelte-117ozzs"><li>Go to <a href="https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/inference/_providers/__init__.py" rel="nofollow">src/huggingface_hub/inference/_providers/<strong>init</strong>.py</a> and add your provider to <code>PROVIDER_T</code> and <code>PROVIDERS</code>. Please try to respect alphabetical order.</li> <li>Go to <a href="https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/inference/_client.py" rel="nofollow">src/huggingface_hub/inference/_client.py</a> and update docstring in <code>InferenceClient.__init__</code> to document your provider.</li></ul> <h3 class="relative group"><a id="add-tests" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#add-tests"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Add tests</span></h3> <ul data-svelte-h="svelte-vl7c7p"><li>Go to <a href="https://github.com/huggingface/huggingface_hub/blob/main/tests/test_inference_providers.py" rel="nofollow">tests/test_inference_providers.py</a> and add static tests for overridden methods.</li></ul> <h2 class="relative group"><a id="faq" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#faq"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>FAQ</span></h2> <p data-svelte-h="svelte-1yfr1yr"><strong>Question:</strong> By default, in which order do we list providers in the settings page?</p> <p data-svelte-h="svelte-1duzsqa"><strong>Answer:</strong> The default sort is by total number of requests routed by HF over the last 7 days. This order defines which provider will be used in priority by the widget on the model page (but the user’s order takes precedence).</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hub-docs/blob/main/docs/inference-providers/register-as-a-provider.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1o5mypj = {
assets: "/docs/inference-providers/pr_1663/en",
base: "/docs/inference-providers/pr_1663/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/inference-providers/pr_1663/en/_app/immutable/entry/start.d5f15666.js"),
import("/docs/inference-providers/pr_1663/en/_app/immutable/entry/app.68b4644d.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 6],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
83.3 kB
·
Xet hash:
8fb87767356773b26097c3bd584ae06c9335abacca56b0aebc1f143009a1a6ca

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.