Buckets:

rtrm's picture
download
raw
52.6 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;FAQs&quot;,&quot;local&quot;:&quot;faqs&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;General questions&quot;,&quot;local&quot;:&quot;general-questions&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;In which regions can I deploy an Inference Endpoints?&quot;,&quot;local&quot;:&quot;in-which-regions-can-i-deploy-an-inference-endpoints&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Can I access the instance my Endpoint is running on?&quot;,&quot;local&quot;:&quot;can-i-access-the-instance-my-endpoint-is-running-on&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;What’s the difference between Inference Providers and Inference Endpoints?&quot;,&quot;local&quot;:&quot;whats-the-difference-between-inference-providers-and-inference-endpoints&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;How much does it cost to run my Endpoint?&quot;,&quot;local&quot;:&quot;how-much-does-it-cost-to-run-my-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;How do I monitor my deployed Endpoint?&quot;,&quot;local&quot;:&quot;how-do-i-monitor-my-deployed-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Security&quot;,&quot;local&quot;:&quot;security&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Is the data transiting to the Endpoint encrypted?&quot;,&quot;local&quot;:&quot;is-the-data-transiting-to-the-endpoint-encrypted&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I accidentally leaked my token. Do I need to delete my endpoint?&quot;,&quot;local&quot;:&quot;i-accidentally-leaked-my-token-do-i-need-to-delete-my-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Can I see my Private Endpoint running on my VPC account?&quot;,&quot;local&quot;:&quot;can-i-see-my-private-endpoint-running-on-my-vpc-account&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Configuration&quot;,&quot;local&quot;:&quot;configuration&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;How can I scale my deployment?&quot;,&quot;local&quot;:&quot;how-can-i-scale-my-deployment&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Will my endpoint still be running if no more requests are processed?&quot;,&quot;local&quot;:&quot;will-my-endpoint-still-be-running-if-no-more-requests-are-processed&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I would like to deploy a model which is not in the supported tasks, is this possible?&quot;,&quot;local&quot;:&quot;i-would-like-to-deploy-a-model-which-is-not-in-the-supported-tasks-is-this-possible&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;What if I would like to deploy to a different instance type that is not listed?&quot;,&quot;local&quot;:&quot;what-if-i-would-like-to-deploy-to-a-different-instance-type-that-is-not-listed&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I need to add a custom environment variable (default or secrets) to my endpoint. How can I do this?&quot;,&quot;local&quot;:&quot;i-need-to-add-a-custom-environment-variable-default-or-secrets-to-my-endpoint-how-can-i-do-this&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inference Engines&quot;,&quot;local&quot;:&quot;inference-engines&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Can I run inference in batches?&quot;,&quot;local&quot;:&quot;can-i-run-inference-in-batches&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I’m using a specific Inference Engine type for my Endpoint. Is there more information about how to use it?&quot;,&quot;local&quot;:&quot;im-using-a-specific-inference-engine-type-for-my-endpoint-is-there-more-information-about-how-to-use-it&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Debugging&quot;,&quot;local&quot;:&quot;debugging&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;I can see from the logs that my endpoint is running but the status is stuck at “initializing”&quot;,&quot;local&quot;:&quot;i-can-see-from-the-logs-that-my-endpoint-is-running-but-the-status-is-stuck-at-initializing&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I’m getting a 500 response in the beginning of my endpoint deployment or when scaling is happening&quot;,&quot;local&quot;:&quot;im-getting-a-500-response-in-the-beginning-of-my-endpoint-deployment-or-when-scaling-is-happening&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I see there’s an option to select a Download Pattern under Instance Configuration. What does this mean?&quot;,&quot;local&quot;:&quot;i-see-theres-an-option-to-select-a-download-pattern-under-instance-configuration-what-does-this-mean&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I’m sometimes running into a 503 error on a running endpoint in production. What can I do?&quot;,&quot;local&quot;:&quot;im-sometimes-running-into-a-503-error-on-a-running-endpoint-in-production-what-can-i-do&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/inference-endpoints/pr_151/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/entry/start.56631b46.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/scheduler.eb244325.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/singletons.54c25bcd.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/index.3c23fb4b.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/paths.12ce0a18.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/entry/app.08bc0e6a.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/preload-helper.0ac538a1.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/index.661680a1.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/nodes/0.69485259.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/nodes/20.ff0ec91d.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.c047d438.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/CodeBlock.0d14d0aa.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;FAQs&quot;,&quot;local&quot;:&quot;faqs&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;General questions&quot;,&quot;local&quot;:&quot;general-questions&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;In which regions can I deploy an Inference Endpoints?&quot;,&quot;local&quot;:&quot;in-which-regions-can-i-deploy-an-inference-endpoints&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Can I access the instance my Endpoint is running on?&quot;,&quot;local&quot;:&quot;can-i-access-the-instance-my-endpoint-is-running-on&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;What’s the difference between Inference Providers and Inference Endpoints?&quot;,&quot;local&quot;:&quot;whats-the-difference-between-inference-providers-and-inference-endpoints&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;How much does it cost to run my Endpoint?&quot;,&quot;local&quot;:&quot;how-much-does-it-cost-to-run-my-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;How do I monitor my deployed Endpoint?&quot;,&quot;local&quot;:&quot;how-do-i-monitor-my-deployed-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Security&quot;,&quot;local&quot;:&quot;security&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Is the data transiting to the Endpoint encrypted?&quot;,&quot;local&quot;:&quot;is-the-data-transiting-to-the-endpoint-encrypted&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I accidentally leaked my token. Do I need to delete my endpoint?&quot;,&quot;local&quot;:&quot;i-accidentally-leaked-my-token-do-i-need-to-delete-my-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Can I see my Private Endpoint running on my VPC account?&quot;,&quot;local&quot;:&quot;can-i-see-my-private-endpoint-running-on-my-vpc-account&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Configuration&quot;,&quot;local&quot;:&quot;configuration&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;How can I scale my deployment?&quot;,&quot;local&quot;:&quot;how-can-i-scale-my-deployment&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Will my endpoint still be running if no more requests are processed?&quot;,&quot;local&quot;:&quot;will-my-endpoint-still-be-running-if-no-more-requests-are-processed&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I would like to deploy a model which is not in the supported tasks, is this possible?&quot;,&quot;local&quot;:&quot;i-would-like-to-deploy-a-model-which-is-not-in-the-supported-tasks-is-this-possible&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;What if I would like to deploy to a different instance type that is not listed?&quot;,&quot;local&quot;:&quot;what-if-i-would-like-to-deploy-to-a-different-instance-type-that-is-not-listed&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I need to add a custom environment variable (default or secrets) to my endpoint. How can I do this?&quot;,&quot;local&quot;:&quot;i-need-to-add-a-custom-environment-variable-default-or-secrets-to-my-endpoint-how-can-i-do-this&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inference Engines&quot;,&quot;local&quot;:&quot;inference-engines&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Can I run inference in batches?&quot;,&quot;local&quot;:&quot;can-i-run-inference-in-batches&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I’m using a specific Inference Engine type for my Endpoint. Is there more information about how to use it?&quot;,&quot;local&quot;:&quot;im-using-a-specific-inference-engine-type-for-my-endpoint-is-there-more-information-about-how-to-use-it&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Debugging&quot;,&quot;local&quot;:&quot;debugging&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;I can see from the logs that my endpoint is running but the status is stuck at “initializing”&quot;,&quot;local&quot;:&quot;i-can-see-from-the-logs-that-my-endpoint-is-running-but-the-status-is-stuck-at-initializing&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I’m getting a 500 response in the beginning of my endpoint deployment or when scaling is happening&quot;,&quot;local&quot;:&quot;im-getting-a-500-response-in-the-beginning-of-my-endpoint-deployment-or-when-scaling-is-happening&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I see there’s an option to select a Download Pattern under Instance Configuration. What does this mean?&quot;,&quot;local&quot;:&quot;i-see-theres-an-option-to-select-a-download-pattern-under-instance-configuration-what-does-this-mean&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;I’m sometimes running into a 503 error on a running endpoint in production. What can I do?&quot;,&quot;local&quot;:&quot;im-sometimes-running-into-a-503-error-on-a-running-endpoint-in-production-what-can-i-do&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="faqs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#faqs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>FAQs</span></h1> <h2 class="relative group"><a id="general-questions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#general-questions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>General questions</span></h2> <h3 class="relative group"><a id="in-which-regions-can-i-deploy-an-inference-endpoints" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#in-which-regions-can-i-deploy-an-inference-endpoints"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>In which regions can I deploy an Inference Endpoints?</span></h3> <p data-svelte-h="svelte-1xmun2n">Inference Endpoints are currently available on AWS in us-east-1 (N. Virginia) &amp; eu-west-1 (Ireland), on Azure in eastus (Virginia), and on
GCP in us-east4 (Virginia). If you need to deploy in a different region, please let us know.</p> <h3 class="relative group"><a id="can-i-access-the-instance-my-endpoint-is-running-on" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#can-i-access-the-instance-my-endpoint-is-running-on"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Can I access the instance my Endpoint is running on?</span></h3> <p data-svelte-h="svelte-x69cj2">No, you cannot access the instance hosting your Endpoint. But if you are missing information or need more insights on the machine where
the Endpoint is running, please contact us.</p> <h3 class="relative group"><a id="whats-the-difference-between-inference-providers-and-inference-endpoints" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#whats-the-difference-between-inference-providers-and-inference-endpoints"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What’s the difference between Inference Providers and Inference Endpoints?</span></h3> <p data-svelte-h="svelte-te77b4">The <a href="https://huggingface.co/docs/inference-providers/index" rel="nofollow">Inference Providers</a> is a solution to easily explore and evaluate models. Its a
single consistent API Inference giving access to Hugging Face partners, that host a wide selection of AI models. Inference Endpoints is a
service for you to deploy your models on managed infrastructure.</p> <h3 class="relative group"><a id="how-much-does-it-cost-to-run-my-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-much-does-it-cost-to-run-my-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How much does it cost to run my Endpoint?</span></h3> <p data-svelte-h="svelte-1cll41y">Dedicated Endpoints are billed based on the compute hours of your Running Endpoints, and the associated instance types. We may add usage
costs for load balancers and Private Links in the future.</p> <h3 class="relative group"><a id="how-do-i-monitor-my-deployed-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-do-i-monitor-my-deployed-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How do I monitor my deployed Endpoint?</span></h3> <p data-svelte-h="svelte-dr5xc3">You can currently monitor your Endpoint through the <a href="https://endpoints.huggingface.co/endpoints" rel="nofollow">Inference Endpoints web application</a>,
where you have access to the <a href="/docs/inference-endpoints/guides/logs">Logs of your Endpoints</a> as well as a
<a href="/docs/inference-endpoints/guides/analytics">metrics dashboard</a>.</p> <h2 class="relative group"><a id="security" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#security"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Security</span></h2> <h3 class="relative group"><a id="is-the-data-transiting-to-the-endpoint-encrypted" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#is-the-data-transiting-to-the-endpoint-encrypted"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Is the data transiting to the Endpoint encrypted?</span></h3> <p data-svelte-h="svelte-1h7j8nb">Yes, data is encrypted during transit with TLS/SSL.</p> <h3 class="relative group"><a id="i-accidentally-leaked-my-token-do-i-need-to-delete-my-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#i-accidentally-leaked-my-token-do-i-need-to-delete-my-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I accidentally leaked my token. Do I need to delete my endpoint?</span></h3> <p data-svelte-h="svelte-11wxz3">You can invalidate existing personal tokens and create new ones in your settings here: <a href="https://huggingface.co/settings/tokens" rel="nofollow">https://huggingface.co/settings/tokens</a>.
Please use fine-grained tokens when possible!</p> <h3 class="relative group"><a id="can-i-see-my-private-endpoint-running-on-my-vpc-account" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#can-i-see-my-private-endpoint-running-on-my-vpc-account"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Can I see my Private Endpoint running on my VPC account?</span></h3> <p data-svelte-h="svelte-1gq60n7">No, when creating a Private Endpoint (a Hugging Face Inference Endpoint linked to your VPC via AWS PrivateLink), you can only see the
ENI in your VPC where the Endpoint is available.</p> <h2 class="relative group"><a id="configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Configuration</span></h2> <h3 class="relative group"><a id="how-can-i-scale-my-deployment" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-can-i-scale-my-deployment"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How can I scale my deployment?</span></h3> <p data-svelte-h="svelte-1whrba1">The Endpoints are scaled automatically for you. You can set a minimum and maximum amount of replicas, and the system will scale them up and down
depending on the scaling strategy you configured. We recommend reading the <a href="./guides/autoscaling">autoscaling section</a> for more information</p> <h3 class="relative group"><a id="will-my-endpoint-still-be-running-if-no-more-requests-are-processed" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#will-my-endpoint-still-be-running-if-no-more-requests-are-processed"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Will my endpoint still be running if no more requests are processed?</span></h3> <p data-svelte-h="svelte-1cbmzoj">Unless you allowed scale-to-zero your Inference Endpoint will always stay available/up with the number of min replicas defined in the Autoscaling
configuration</p> <h3 class="relative group"><a id="i-would-like-to-deploy-a-model-which-is-not-in-the-supported-tasks-is-this-possible" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#i-would-like-to-deploy-a-model-which-is-not-in-the-supported-tasks-is-this-possible"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I would like to deploy a model which is not in the supported tasks, is this possible?</span></h3> <p data-svelte-h="svelte-1g656bw">Yes, you can deploy any repository from the <a href="https://huggingface.co/models" rel="nofollow">Hugging Face Hub</a> and if your task/model/framework is not
supported out of the box. For this we recommended setting up a <a href="./engines/custom_container">custom container</a></p> <h3 class="relative group"><a id="what-if-i-would-like-to-deploy-to-a-different-instance-type-that-is-not-listed" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-if-i-would-like-to-deploy-to-a-different-instance-type-that-is-not-listed"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What if I would like to deploy to a different instance type that is not listed?</span></h3> <p data-svelte-h="svelte-1542t8a">Please contact us if you feel your model would do better on a different instance type than what is listed.</p> <h3 class="relative group"><a id="i-need-to-add-a-custom-environment-variable-default-or-secrets-to-my-endpoint-how-can-i-do-this" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#i-need-to-add-a-custom-environment-variable-default-or-secrets-to-my-endpoint-how-can-i-do-this"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I need to add a custom environment variable (default or secrets) to my endpoint. How can I do this?</span></h3> <p data-svelte-h="svelte-1ljdukv">This is now possible in the UI, or via the API:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;model&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;image&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;huggingface&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;env&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> <span class="hljs-attr">&quot;var1&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;value&quot;</span> <span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="inference-engines" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference-engines"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference Engines</span></h2> <h3 class="relative group"><a id="can-i-run-inference-in-batches" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#can-i-run-inference-in-batches"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Can I run inference in batches?</span></h3> <p data-svelte-h="svelte-1r1z2fa">In most cases yes but it depends on the Inference Engine. In practice all high performance Inference Engines like vLLM, TGI, llama.cpp, SGLang
and TEI support batching, whereas the Inference Toolkit might not. Each Inference Enginge also has configuration to adjust batch sizes, we recommend
reading up on the documentation to understand best how to tune the configuration to meet your needs.</p> <h3 class="relative group"><a id="im-using-a-specific-inference-engine-type-for-my-endpoint-is-there-more-information-about-how-to-use-it" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#im-using-a-specific-inference-engine-type-for-my-endpoint-is-there-more-information-about-how-to-use-it"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I’m using a specific Inference Engine type for my Endpoint. Is there more information about how to use it?</span></h3> <p data-svelte-h="svelte-131ger4">Yes! Please check the Inference Engines section and also check out the Engines own documentation.</p> <h2 class="relative group"><a id="debugging" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#debugging"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Debugging</span></h2> <h3 class="relative group"><a id="i-can-see-from-the-logs-that-my-endpoint-is-running-but-the-status-is-stuck-at-initializing" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#i-can-see-from-the-logs-that-my-endpoint-is-running-but-the-status-is-stuck-at-initializing"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I can see from the logs that my endpoint is running but the status is stuck at “initializing”</span></h3> <p data-svelte-h="svelte-10aht64">This usually means that the port mapping is incorrect. Ensure your app is listening on port 80 and that the Docker container is exposing
port 80 externally. If you’re deploying a custom container you can change these values, but make sure to keep them aligned.</p> <h3 class="relative group"><a id="im-getting-a-500-response-in-the-beginning-of-my-endpoint-deployment-or-when-scaling-is-happening" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#im-getting-a-500-response-in-the-beginning-of-my-endpoint-deployment-or-when-scaling-is-happening"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I’m getting a 500 response in the beginning of my endpoint deployment or when scaling is happening</span></h3> <p data-svelte-h="svelte-1iyzmay">Confirm that you have a health route implemented in your app that returns a status code 200 when your application is ready to serve
requests. Otherwise your app is considered ready as soon as the container has started, potentially resulting in 500s. You can configure
the health route in the Container Configuration of your Endpoint.</p> <p data-svelte-h="svelte-1nz2hrl">You can also add the ‘X-Scale-Up-Timeout’ header to your requests. This means that when the endpoint is scaling the proxy will hold
requests until a replica is ready, or timeout after the specified amount of seconds. For example ‘X-Scale-Up-Timeout: 600’</p> <h3 class="relative group"><a id="i-see-theres-an-option-to-select-a-download-pattern-under-instance-configuration-what-does-this-mean" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#i-see-theres-an-option-to-select-a-download-pattern-under-instance-configuration-what-does-this-mean"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I see there’s an option to select a Download Pattern under Instance Configuration. What does this mean?</span></h3> <p data-svelte-h="svelte-z486y1">You have an option to choose the download pattern of the model’s files when deploying an Endpoint, to help with limiting the volume of
downloaded files. If a selected download pattern is not possible or compatible with the model, the system will not allow a change to the
pattern.</p> <h3 class="relative group"><a id="im-sometimes-running-into-a-503-error-on-a-running-endpoint-in-production-what-can-i-do" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#im-sometimes-running-into-a-503-error-on-a-running-endpoint-in-production-what-can-i-do"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I’m sometimes running into a 503 error on a running endpoint in production. What can I do?</span></h3> <p data-svelte-h="svelte-7z17l1">To help mitigate service interruptions on an Inference Endpoint that needs to be highly available, please make sure to use at least 2 replicas,
ie min replicas set to 2.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hf-endpoints-documentation/blob/main/docs/source/support/faq.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_10wt9iy = {
assets: "/docs/inference-endpoints/pr_151/en",
base: "/docs/inference-endpoints/pr_151/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/inference-endpoints/pr_151/en/_app/immutable/entry/start.56631b46.js"),
import("/docs/inference-endpoints/pr_151/en/_app/immutable/entry/app.08bc0e6a.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 20],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
52.6 kB
·
Xet hash:
b22034bcc6f26e7b23b06a6b6f69d4e8c46c63ae97267ae912ecec539d41d165

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.