Buckets:

rtrm's picture
download
raw
21.1 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Pricing and Billing&quot;,&quot;local&quot;:&quot;pricing-and-billing&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Pay-as-you-Go&quot;,&quot;local&quot;:&quot;pay-as-you-go&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Routed requests vs direct calls&quot;,&quot;local&quot;:&quot;routed-requests-vs-direct-calls&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;HF-Inference cost&quot;,&quot;local&quot;:&quot;hf-inference-cost&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Organization billing&quot;,&quot;local&quot;:&quot;organization-billing&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/inference-providers/pr_1663/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/entry/start.d5f15666.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/scheduler.ddb4e551.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/singletons.0f5b782d.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.ce98237b.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/paths.b324c1e2.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/entry/app.68b4644d.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.e16e4efa.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/nodes/0.80863911.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/nodes/5.b31e2126.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/Tip.20abb04f.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/CodeBlock.754e6cfc.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.e108c5ed.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Pricing and Billing&quot;,&quot;local&quot;:&quot;pricing-and-billing&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Pay-as-you-Go&quot;,&quot;local&quot;:&quot;pay-as-you-go&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Routed requests vs direct calls&quot;,&quot;local&quot;:&quot;routed-requests-vs-direct-calls&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;HF-Inference cost&quot;,&quot;local&quot;:&quot;hf-inference-cost&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Organization billing&quot;,&quot;local&quot;:&quot;organization-billing&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="pricing-and-billing" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pricing-and-billing"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Pricing and Billing</span></h1> <p data-svelte-h="svelte-w3jifx">Inference Providers is a production-ready service involving external partners and is therefore a paid product. However, as a Hugging Face user, you get monthly credits to run experiments. The amount of credits you get depends on your type of account:</p> <table data-svelte-h="svelte-l6ir4w"><thead><tr><th>Tier</th> <th>Included monthly credits</th></tr></thead> <tbody><tr><td>Free Users</td> <td>subject to change, less than $0.10</td></tr> <tr><td>PRO Users</td> <td>$2.00</td></tr> <tr><td>Enterprise Hub Organizations</td> <td>$2.00 per seat, shared among members</td></tr></tbody></table> <p data-svelte-h="svelte-1ugzym0">To benefit from Enterprise Hub included credits, you need to explicitly specify the organization to be billed when performing the inference requests.
See the <a href="#organization-billing">Organization Billing section</a> below for more details.</p> <h2 class="relative group"><a id="pay-as-you-go" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pay-as-you-go"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Pay-as-you-Go</span></h2> <p data-svelte-h="svelte-u3yxol"><strong>PRO users and Enterprise Hub organizations</strong> can continue using the API once their monthly included credits are exhausted. This billing model, known as “Pay-as-you-Go” (PAYG), is charged on top of the monthly subscription. PAYG is only available for providers that are integrated with our billing system. We’re actively working to integrate all providers, but in the meantime, any providers that are not yet integrated will be blocked once the free-tier limit is reached.</p> <p data-svelte-h="svelte-hbomte">If you have remaining credits, we estimate costs for providers that aren’t fully integrated with our billing system. These estimates are usually higher than the actual cost to prevent abuse, which is why PAYG is currently disabled for those providers.</p> <p data-svelte-h="svelte-1yz6ra5">You can track your spending on your <a href="https://huggingface.co/settings/billing" rel="nofollow">billing page</a>.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-fcw0wf">Hugging Face charges you the same rates as the provider, with no additional fees. We just pass through the provider costs directly.</p></div> <h2 class="relative group"><a id="routed-requests-vs-direct-calls" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#routed-requests-vs-direct-calls"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Routed requests vs direct calls</span></h2> <p data-svelte-h="svelte-1yrvom4">The documentation above assumes you are making routed requests to external providers. In practice, there are 3 different ways to run inference, each with unique billing implications:</p> <ul data-svelte-h="svelte-3k5n7f"><li><p><strong>Routed Request</strong>: This is the default method for using Inference Providers. Simply use the JavaScript or Python <code>InferenceClient</code>, or make raw HTTP requests with your Hugging Face User Access Token. Your request is automatically routed through Hugging Face to the provider’s platform. No separate provider account is required, and billing is managed directly by Hugging Face. This approach lets you seamlessly switch between providers without additional setup.</p></li> <li><p><strong>Routed Request with Custom Key</strong>: In your <a href="https://huggingface.co/settings/inference-providers" rel="nofollow">settings page</a> on the Hub, you can configure a custom key for each provider. To use this option, you’ll need to create an account on the provider’s platform, and billing will be handled directly by that provider. Hugging Face won’t charge you for the call. This method gives you more control over billing when experimenting with models on the Hub. When making a routed request with a custom key, your code remains unchanged—you’ll still pass your Hugging Face User Access Token. Hugging Face will automatically swap the authentication when routing the request.</p></li> <li><p><strong>Direct Calls</strong>: If you provide a custom key when using the JavaScript or Python <code>InferenceClient</code>, the call will be made directly to the provider’s platform. Billing is managed by the provider, and Hugging Face is not notified of the request. This option is ideal if you want to use the unified <code>InferenceClient</code> interface without routing through Hugging Face.</p></li></ul> <p data-svelte-h="svelte-1b2o2p1">Here is a table that sums up what we’ve seen so far:</p> <table data-svelte-h="svelte-3yjka0"><thead><tr><th></th> <th>HF routing</th> <th>Billed by</th> <th>Free-tier included</th> <th>Pay-as-you-go</th> <th>Integration</th></tr></thead> <tbody><tr><td><strong>Routed request</strong></td> <td>Yes</td> <td>Hugging Face</td> <td>Yes</td> <td>Only for PRO users and for integrated providers</td> <td>SDKs, Playground, widgets, Data AI Studio</td></tr> <tr><td><strong>Routed request with custom key</strong></td> <td>Yes</td> <td>Provider</td> <td>No</td> <td>Yes</td> <td>SDKs, Playground, widgets, Data AI Studio</td></tr> <tr><td><strong>Direct call</strong></td> <td>No</td> <td>Provider</td> <td>No</td> <td>Yes</td> <td>SDKs only</td></tr></tbody></table> <h2 class="relative group"><a id="hf-inference-cost" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#hf-inference-cost"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>HF-Inference cost</span></h2> <p data-svelte-h="svelte-1yovgaz">As you may have noticed, you can select to work with <code>&quot;hf-inference&quot;</code> provider. This service used to be called “Inference API (serverless)” prior to Inference Providers. From a user point of view, working with HF Inference is the same as with any other provider. Past the free-tier credits, you get charged for every inference request based on the compute time x price of the underlying hardware.</p> <p data-svelte-h="svelte-xg5589">For instance, a request to <a href="https://huggingface.co/black-forest-labs/FLUX.1-dev" rel="nofollow">black-forest-labs/FLUX.1-dev</a> that takes 10 seconds to complete on a GPU machine that costs $0.00012 per second to run, will be billed $0.0012.</p> <p data-svelte-h="svelte-o2p8qd">The <code>&quot;hf-inference&quot;</code> provider is currently the default provider when working with the JavaScript and Python SDKs. Note that this default might change in the future.</p> <h2 class="relative group"><a id="organization-billing" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#organization-billing"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Organization billing</span></h2> <p data-svelte-h="svelte-tvaw01">For Enterprise Hub organizations, it is possible to centralize billing for all of your users. Each user still uses their own User Access Token but the requests are billed to your organization. This can be done by passing <code>&quot;X-HF-Bill-To: my-org-name&quot;</code> as a header in your HTTP requests.</p> <p data-svelte-h="svelte-1r9q8ej">Enterprise Hub organizations receive a pool of free usage credits based on the number of seats in the subscription. Inference Providers usage can be tracked on the organization’s billing page. Enterprise Hub organization administrators can also set a spending limit and disable a set of Inference Providers from the organization’s settings.</p> <div class="flex justify-center" data-svelte-h="svelte-inql4"><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-providers/enterprise-org-settings-light.png"> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-providers/enterprise-org-settings-dark.png"></div> <p data-svelte-h="svelte-1di7aer">If you are using the JavaScript <code>InferenceClient</code>, you can set the <code>billTo</code> attribute at a client level:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">InferenceClient</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">&quot;@huggingface/inference&quot;</span>;
<span class="hljs-keyword">const</span> client = <span class="hljs-keyword">new</span> <span class="hljs-title class_">InferenceClient</span>(<span class="hljs-string">&quot;hf_token&quot;</span>, { <span class="hljs-attr">billTo</span>: <span class="hljs-string">&quot;my-org-name&quot;</span> });
<span class="hljs-keyword">const</span> image = <span class="hljs-keyword">await</span> client.<span class="hljs-title function_">textToImage</span>({
<span class="hljs-attr">model</span>: <span class="hljs-string">&quot;black-forest-labs/FLUX.1-schnell&quot;</span>,
<span class="hljs-attr">inputs</span>: <span class="hljs-string">&quot;A majestic lion in a fantasy forest&quot;</span>,
<span class="hljs-attr">provider</span>: <span class="hljs-string">&quot;fal-ai&quot;</span>,
});
<span class="hljs-comment">/// Use the generated image (it&#x27;s a Blob)</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1og7foy">And similarly in Python:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient
client = InferenceClient(provider=<span class="hljs-string">&quot;fal-ai&quot;</span>, bill_to=<span class="hljs-string">&quot;my-org-name&quot;</span>)
image = client.text_to_image(
<span class="hljs-string">&quot;A majestic lion in a fantasy forest&quot;</span>,
model=<span class="hljs-string">&quot;black-forest-labs/FLUX.1-schnell&quot;</span>,
)
image.save(<span class="hljs-string">&quot;lion.png&quot;</span>)<!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hub-docs/blob/main/docs/inference-providers/pricing.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1o5mypj = {
assets: "/docs/inference-providers/pr_1663/en",
base: "/docs/inference-providers/pr_1663/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/inference-providers/pr_1663/en/_app/immutable/entry/start.d5f15666.js"),
import("/docs/inference-providers/pr_1663/en/_app/immutable/entry/app.68b4644d.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 5],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
21.1 kB
·
Xet hash:
c186e67506061415d4e83f7899d69ec8e42b8cd414eaac4f04acfad1bc2a008b

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.