Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"FAQs","local":"faqs","sections":[{"title":"General questions","local":"general-questions","sections":[{"title":"In which regions can I deploy an Inference Endpoints?","local":"in-which-regions-can-i-deploy-an-inference-endpoints","sections":[],"depth":3},{"title":"Can I access the instance my Endpoint is running on?","local":"can-i-access-the-instance-my-endpoint-is-running-on","sections":[],"depth":3},{"title":"What’s the difference between Inference Providers and Inference Endpoints?","local":"whats-the-difference-between-inference-providers-and-inference-endpoints","sections":[],"depth":3},{"title":"How much does it cost to run my Endpoint?","local":"how-much-does-it-cost-to-run-my-endpoint","sections":[],"depth":3},{"title":"How do I monitor my deployed Endpoint?","local":"how-do-i-monitor-my-deployed-endpoint","sections":[],"depth":3}],"depth":2},{"title":"Security","local":"security","sections":[{"title":"Is the data transiting to the Endpoint encrypted?","local":"is-the-data-transiting-to-the-endpoint-encrypted","sections":[],"depth":3},{"title":"I accidentally leaked my token. Do I need to delete my endpoint?","local":"i-accidentally-leaked-my-token-do-i-need-to-delete-my-endpoint","sections":[],"depth":3},{"title":"Can I see my Private Endpoint running on my VPC account?","local":"can-i-see-my-private-endpoint-running-on-my-vpc-account","sections":[],"depth":3}],"depth":2},{"title":"Configuration","local":"configuration","sections":[{"title":"How can I scale my deployment?","local":"how-can-i-scale-my-deployment","sections":[],"depth":3},{"title":"Will my endpoint still be running if no more requests are processed?","local":"will-my-endpoint-still-be-running-if-no-more-requests-are-processed","sections":[],"depth":3},{"title":"I would like to deploy a model which is not in the supported tasks, is this possible?","local":"i-would-like-to-deploy-a-model-which-is-not-in-the-supported-tasks-is-this-possible","sections":[],"depth":3},{"title":"What if I would like to deploy to a different instance type that is not listed?","local":"what-if-i-would-like-to-deploy-to-a-different-instance-type-that-is-not-listed","sections":[],"depth":3},{"title":"I need to add a custom environment variable (default or secrets) to my endpoint. How can I do this?","local":"i-need-to-add-a-custom-environment-variable-default-or-secrets-to-my-endpoint-how-can-i-do-this","sections":[],"depth":3}],"depth":2},{"title":"Inference Engines","local":"inference-engines","sections":[{"title":"Can I run inference in batches?","local":"can-i-run-inference-in-batches","sections":[],"depth":3},{"title":"I’m using a specific Inference Engine type for my Endpoint. Is there more information about how to use it?","local":"im-using-a-specific-inference-engine-type-for-my-endpoint-is-there-more-information-about-how-to-use-it","sections":[],"depth":3}],"depth":2},{"title":"Debugging","local":"debugging","sections":[{"title":"I can see from the logs that my endpoint is running but the status is stuck at “initializing”","local":"i-can-see-from-the-logs-that-my-endpoint-is-running-but-the-status-is-stuck-at-initializing","sections":[],"depth":3},{"title":"I’m getting a 500 response in the beginning of my endpoint deployment or when scaling is happening","local":"im-getting-a-500-response-in-the-beginning-of-my-endpoint-deployment-or-when-scaling-is-happening","sections":[],"depth":3},{"title":"I see there’s an option to select a Download Pattern under Instance Configuration. What does this mean?","local":"i-see-theres-an-option-to-select-a-download-pattern-under-instance-configuration-what-does-this-mean","sections":[],"depth":3},{"title":"I’m sometimes running into a 503 error on a running endpoint in production. What can I do?","local":"im-sometimes-running-into-a-503-error-on-a-running-endpoint-in-production-what-can-i-do","sections":[],"depth":3}],"depth":2}],"depth":1}"> | |
| <link href="/docs/inference-endpoints/pr_151/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/entry/start.56631b46.js"> | |
| <link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/scheduler.eb244325.js"> | |
| <link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/singletons.54c25bcd.js"> | |
| <link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/index.3c23fb4b.js"> | |
| <link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/paths.12ce0a18.js"> | |
| <link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/entry/app.08bc0e6a.js"> | |
| <link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/preload-helper.0ac538a1.js"> | |
| <link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/index.661680a1.js"> | |
| <link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/nodes/0.69485259.js"> | |
| <link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/nodes/20.ff0ec91d.js"> | |
| <link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.c047d438.js"> | |
| <link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/CodeBlock.0d14d0aa.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"FAQs","local":"faqs","sections":[{"title":"General questions","local":"general-questions","sections":[{"title":"In which regions can I deploy an Inference Endpoints?","local":"in-which-regions-can-i-deploy-an-inference-endpoints","sections":[],"depth":3},{"title":"Can I access the instance my Endpoint is running on?","local":"can-i-access-the-instance-my-endpoint-is-running-on","sections":[],"depth":3},{"title":"What’s the difference between Inference Providers and Inference Endpoints?","local":"whats-the-difference-between-inference-providers-and-inference-endpoints","sections":[],"depth":3},{"title":"How much does it cost to run my Endpoint?","local":"how-much-does-it-cost-to-run-my-endpoint","sections":[],"depth":3},{"title":"How do I monitor my deployed Endpoint?","local":"how-do-i-monitor-my-deployed-endpoint","sections":[],"depth":3}],"depth":2},{"title":"Security","local":"security","sections":[{"title":"Is the data transiting to the Endpoint encrypted?","local":"is-the-data-transiting-to-the-endpoint-encrypted","sections":[],"depth":3},{"title":"I accidentally leaked my token. Do I need to delete my endpoint?","local":"i-accidentally-leaked-my-token-do-i-need-to-delete-my-endpoint","sections":[],"depth":3},{"title":"Can I see my Private Endpoint running on my VPC account?","local":"can-i-see-my-private-endpoint-running-on-my-vpc-account","sections":[],"depth":3}],"depth":2},{"title":"Configuration","local":"configuration","sections":[{"title":"How can I scale my deployment?","local":"how-can-i-scale-my-deployment","sections":[],"depth":3},{"title":"Will my endpoint still be running if no more requests are processed?","local":"will-my-endpoint-still-be-running-if-no-more-requests-are-processed","sections":[],"depth":3},{"title":"I would like to deploy a model which is not in the supported tasks, is this possible?","local":"i-would-like-to-deploy-a-model-which-is-not-in-the-supported-tasks-is-this-possible","sections":[],"depth":3},{"title":"What if I would like to deploy to a different instance type that is not listed?","local":"what-if-i-would-like-to-deploy-to-a-different-instance-type-that-is-not-listed","sections":[],"depth":3},{"title":"I need to add a custom environment variable (default or secrets) to my endpoint. How can I do this?","local":"i-need-to-add-a-custom-environment-variable-default-or-secrets-to-my-endpoint-how-can-i-do-this","sections":[],"depth":3}],"depth":2},{"title":"Inference Engines","local":"inference-engines","sections":[{"title":"Can I run inference in batches?","local":"can-i-run-inference-in-batches","sections":[],"depth":3},{"title":"I’m using a specific Inference Engine type for my Endpoint. Is there more information about how to use it?","local":"im-using-a-specific-inference-engine-type-for-my-endpoint-is-there-more-information-about-how-to-use-it","sections":[],"depth":3}],"depth":2},{"title":"Debugging","local":"debugging","sections":[{"title":"I can see from the logs that my endpoint is running but the status is stuck at “initializing”","local":"i-can-see-from-the-logs-that-my-endpoint-is-running-but-the-status-is-stuck-at-initializing","sections":[],"depth":3},{"title":"I’m getting a 500 response in the beginning of my endpoint deployment or when scaling is happening","local":"im-getting-a-500-response-in-the-beginning-of-my-endpoint-deployment-or-when-scaling-is-happening","sections":[],"depth":3},{"title":"I see there’s an option to select a Download Pattern under Instance Configuration. What does this mean?","local":"i-see-theres-an-option-to-select-a-download-pattern-under-instance-configuration-what-does-this-mean","sections":[],"depth":3},{"title":"I’m sometimes running into a 503 error on a running endpoint in production. What can I do?","local":"im-sometimes-running-into-a-503-error-on-a-running-endpoint-in-production-what-can-i-do","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="faqs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#faqs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>FAQs</span></h1> <h2 class="relative group"><a id="general-questions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#general-questions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>General questions</span></h2> <h3 class="relative group"><a id="in-which-regions-can-i-deploy-an-inference-endpoints" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#in-which-regions-can-i-deploy-an-inference-endpoints"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>In which regions can I deploy an Inference Endpoints?</span></h3> <p data-svelte-h="svelte-1xmun2n">Inference Endpoints are currently available on AWS in us-east-1 (N. Virginia) & eu-west-1 (Ireland), on Azure in eastus (Virginia), and on | |
| GCP in us-east4 (Virginia). If you need to deploy in a different region, please let us know.</p> <h3 class="relative group"><a id="can-i-access-the-instance-my-endpoint-is-running-on" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#can-i-access-the-instance-my-endpoint-is-running-on"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Can I access the instance my Endpoint is running on?</span></h3> <p data-svelte-h="svelte-x69cj2">No, you cannot access the instance hosting your Endpoint. But if you are missing information or need more insights on the machine where | |
| the Endpoint is running, please contact us.</p> <h3 class="relative group"><a id="whats-the-difference-between-inference-providers-and-inference-endpoints" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#whats-the-difference-between-inference-providers-and-inference-endpoints"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What’s the difference between Inference Providers and Inference Endpoints?</span></h3> <p data-svelte-h="svelte-te77b4">The <a href="https://huggingface.co/docs/inference-providers/index" rel="nofollow">Inference Providers</a> is a solution to easily explore and evaluate models. Its a | |
| single consistent API Inference giving access to Hugging Face partners, that host a wide selection of AI models. Inference Endpoints is a | |
| service for you to deploy your models on managed infrastructure.</p> <h3 class="relative group"><a id="how-much-does-it-cost-to-run-my-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-much-does-it-cost-to-run-my-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How much does it cost to run my Endpoint?</span></h3> <p data-svelte-h="svelte-1cll41y">Dedicated Endpoints are billed based on the compute hours of your Running Endpoints, and the associated instance types. We may add usage | |
| costs for load balancers and Private Links in the future.</p> <h3 class="relative group"><a id="how-do-i-monitor-my-deployed-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-do-i-monitor-my-deployed-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How do I monitor my deployed Endpoint?</span></h3> <p data-svelte-h="svelte-dr5xc3">You can currently monitor your Endpoint through the <a href="https://endpoints.huggingface.co/endpoints" rel="nofollow">Inference Endpoints web application</a>, | |
| where you have access to the <a href="/docs/inference-endpoints/guides/logs">Logs of your Endpoints</a> as well as a | |
| <a href="/docs/inference-endpoints/guides/analytics">metrics dashboard</a>.</p> <h2 class="relative group"><a id="security" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#security"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Security</span></h2> <h3 class="relative group"><a id="is-the-data-transiting-to-the-endpoint-encrypted" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#is-the-data-transiting-to-the-endpoint-encrypted"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Is the data transiting to the Endpoint encrypted?</span></h3> <p data-svelte-h="svelte-1h7j8nb">Yes, data is encrypted during transit with TLS/SSL.</p> <h3 class="relative group"><a id="i-accidentally-leaked-my-token-do-i-need-to-delete-my-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#i-accidentally-leaked-my-token-do-i-need-to-delete-my-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I accidentally leaked my token. Do I need to delete my endpoint?</span></h3> <p data-svelte-h="svelte-11wxz3">You can invalidate existing personal tokens and create new ones in your settings here: <a href="https://huggingface.co/settings/tokens" rel="nofollow">https://huggingface.co/settings/tokens</a>. | |
| Please use fine-grained tokens when possible!</p> <h3 class="relative group"><a id="can-i-see-my-private-endpoint-running-on-my-vpc-account" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#can-i-see-my-private-endpoint-running-on-my-vpc-account"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Can I see my Private Endpoint running on my VPC account?</span></h3> <p data-svelte-h="svelte-1gq60n7">No, when creating a Private Endpoint (a Hugging Face Inference Endpoint linked to your VPC via AWS PrivateLink), you can only see the | |
| ENI in your VPC where the Endpoint is available.</p> <h2 class="relative group"><a id="configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Configuration</span></h2> <h3 class="relative group"><a id="how-can-i-scale-my-deployment" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-can-i-scale-my-deployment"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How can I scale my deployment?</span></h3> <p data-svelte-h="svelte-1whrba1">The Endpoints are scaled automatically for you. You can set a minimum and maximum amount of replicas, and the system will scale them up and down | |
| depending on the scaling strategy you configured. We recommend reading the <a href="./guides/autoscaling">autoscaling section</a> for more information</p> <h3 class="relative group"><a id="will-my-endpoint-still-be-running-if-no-more-requests-are-processed" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#will-my-endpoint-still-be-running-if-no-more-requests-are-processed"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Will my endpoint still be running if no more requests are processed?</span></h3> <p data-svelte-h="svelte-1cbmzoj">Unless you allowed scale-to-zero your Inference Endpoint will always stay available/up with the number of min replicas defined in the Autoscaling | |
| configuration</p> <h3 class="relative group"><a id="i-would-like-to-deploy-a-model-which-is-not-in-the-supported-tasks-is-this-possible" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#i-would-like-to-deploy-a-model-which-is-not-in-the-supported-tasks-is-this-possible"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I would like to deploy a model which is not in the supported tasks, is this possible?</span></h3> <p data-svelte-h="svelte-1g656bw">Yes, you can deploy any repository from the <a href="https://huggingface.co/models" rel="nofollow">Hugging Face Hub</a> and if your task/model/framework is not | |
| supported out of the box. For this we recommended setting up a <a href="./engines/custom_container">custom container</a></p> <h3 class="relative group"><a id="what-if-i-would-like-to-deploy-to-a-different-instance-type-that-is-not-listed" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-if-i-would-like-to-deploy-to-a-different-instance-type-that-is-not-listed"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What if I would like to deploy to a different instance type that is not listed?</span></h3> <p data-svelte-h="svelte-1542t8a">Please contact us if you feel your model would do better on a different instance type than what is listed.</p> <h3 class="relative group"><a id="i-need-to-add-a-custom-environment-variable-default-or-secrets-to-my-endpoint-how-can-i-do-this" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#i-need-to-add-a-custom-environment-variable-default-or-secrets-to-my-endpoint-how-can-i-do-this"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I need to add a custom environment variable (default or secrets) to my endpoint. How can I do this?</span></h3> <p data-svelte-h="svelte-1ljdukv">This is now possible in the UI, or via the API:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"model"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"image"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"huggingface"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"env"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> <span class="hljs-attr">"var1"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"value"</span> <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="inference-engines" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference-engines"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference Engines</span></h2> <h3 class="relative group"><a id="can-i-run-inference-in-batches" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#can-i-run-inference-in-batches"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Can I run inference in batches?</span></h3> <p data-svelte-h="svelte-1r1z2fa">In most cases yes but it depends on the Inference Engine. In practice all high performance Inference Engines like vLLM, TGI, llama.cpp, SGLang | |
| and TEI support batching, whereas the Inference Toolkit might not. Each Inference Enginge also has configuration to adjust batch sizes, we recommend | |
| reading up on the documentation to understand best how to tune the configuration to meet your needs.</p> <h3 class="relative group"><a id="im-using-a-specific-inference-engine-type-for-my-endpoint-is-there-more-information-about-how-to-use-it" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#im-using-a-specific-inference-engine-type-for-my-endpoint-is-there-more-information-about-how-to-use-it"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I’m using a specific Inference Engine type for my Endpoint. Is there more information about how to use it?</span></h3> <p data-svelte-h="svelte-131ger4">Yes! Please check the Inference Engines section and also check out the Engines own documentation.</p> <h2 class="relative group"><a id="debugging" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#debugging"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Debugging</span></h2> <h3 class="relative group"><a id="i-can-see-from-the-logs-that-my-endpoint-is-running-but-the-status-is-stuck-at-initializing" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#i-can-see-from-the-logs-that-my-endpoint-is-running-but-the-status-is-stuck-at-initializing"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I can see from the logs that my endpoint is running but the status is stuck at “initializing”</span></h3> <p data-svelte-h="svelte-10aht64">This usually means that the port mapping is incorrect. Ensure your app is listening on port 80 and that the Docker container is exposing | |
| port 80 externally. If you’re deploying a custom container you can change these values, but make sure to keep them aligned.</p> <h3 class="relative group"><a id="im-getting-a-500-response-in-the-beginning-of-my-endpoint-deployment-or-when-scaling-is-happening" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#im-getting-a-500-response-in-the-beginning-of-my-endpoint-deployment-or-when-scaling-is-happening"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I’m getting a 500 response in the beginning of my endpoint deployment or when scaling is happening</span></h3> <p data-svelte-h="svelte-1iyzmay">Confirm that you have a health route implemented in your app that returns a status code 200 when your application is ready to serve | |
| requests. Otherwise your app is considered ready as soon as the container has started, potentially resulting in 500s. You can configure | |
| the health route in the Container Configuration of your Endpoint.</p> <p data-svelte-h="svelte-1nz2hrl">You can also add the ‘X-Scale-Up-Timeout’ header to your requests. This means that when the endpoint is scaling the proxy will hold | |
| requests until a replica is ready, or timeout after the specified amount of seconds. For example ‘X-Scale-Up-Timeout: 600’</p> <h3 class="relative group"><a id="i-see-theres-an-option-to-select-a-download-pattern-under-instance-configuration-what-does-this-mean" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#i-see-theres-an-option-to-select-a-download-pattern-under-instance-configuration-what-does-this-mean"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I see there’s an option to select a Download Pattern under Instance Configuration. What does this mean?</span></h3> <p data-svelte-h="svelte-z486y1">You have an option to choose the download pattern of the model’s files when deploying an Endpoint, to help with limiting the volume of | |
| downloaded files. If a selected download pattern is not possible or compatible with the model, the system will not allow a change to the | |
| pattern.</p> <h3 class="relative group"><a id="im-sometimes-running-into-a-503-error-on-a-running-endpoint-in-production-what-can-i-do" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#im-sometimes-running-into-a-503-error-on-a-running-endpoint-in-production-what-can-i-do"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I’m sometimes running into a 503 error on a running endpoint in production. What can I do?</span></h3> <p data-svelte-h="svelte-7z17l1">To help mitigate service interruptions on an Inference Endpoint that needs to be highly available, please make sure to use at least 2 replicas, | |
| ie min replicas set to 2.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hf-endpoints-documentation/blob/main/docs/source/support/faq.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_10wt9iy = { | |
| assets: "/docs/inference-endpoints/pr_151/en", | |
| base: "/docs/inference-endpoints/pr_151/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/inference-endpoints/pr_151/en/_app/immutable/entry/start.56631b46.js"), | |
| import("/docs/inference-endpoints/pr_151/en/_app/immutable/entry/app.08bc0e6a.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 20], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 52.6 kB
- Xet hash:
- b22034bcc6f26e7b23b06a6b6f69d4e8c46c63ae97267ae912ecec539d41d165
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.