Buckets:

rtrm's picture
download
raw
38.9 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;FAQs&quot;,&quot;local&quot;:&quot;faqs&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Q: In which regions are Inference Endpoints available?&quot;,&quot;local&quot;:&quot;q-in-which-regions-are-inference-endpoints-available&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: Can I access the instance my Endpoint is running on?&quot;,&quot;local&quot;:&quot;q-can-i-access-the-instance-my-endpoint-is-running-on&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: Can I see my Private Endpoint running on my VPC account?&quot;,&quot;local&quot;:&quot;q-can-i-see-my-private-endpoint-running-on-my-vpc-account&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: Can I run inference in batches?&quot;,&quot;local&quot;:&quot;q-can-i-run-inference-in-batches&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: How can I scale my deployment?&quot;,&quot;local&quot;:&quot;q-how-can-i-scale-my-deployment&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: Will my endpoint still be running if no more requests are processed?&quot;,&quot;local&quot;:&quot;q-will-my-endpoint-still-be-running-if-no-more-requests-are-processed&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: I would like to deploy a model which is not in the supported tasks, is this possible?&quot;,&quot;local&quot;:&quot;q-i-would-like-to-deploy-a-model-which-is-not-in-the-supported-tasks-is-this-possible&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: How much does it cost to run my Endpoint?&quot;,&quot;local&quot;:&quot;q-how-much-does-it-cost-to-run-my-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: Is the data transiting to the Endpoint encrypted?&quot;,&quot;local&quot;:&quot;q-is-the-data-transiting-to-the-endpoint-encrypted&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: How can I reduce the latency of my Endpoint?&quot;,&quot;local&quot;:&quot;q-how-can-i-reduce-the-latency-of-my-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: How do I monitor my deployed Endpoint?&quot;,&quot;local&quot;:&quot;q-how-do-i-monitor-my-deployed-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: What if I would like to deploy to a different instance type that is not listed?&quot;,&quot;local&quot;:&quot;q-what-if-i-would-like-to-deploy-to-a-different-instance-type-that-is-not-listed&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: I accidentally leaked my token. Do I need to delete my endpoint?&quot;,&quot;local&quot;:&quot;q-i-accidentally-leaked-my-token-do-i-need-to-delete-my-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: I need to add a custom environment variable (default or secrets) to my endpoint. How can I do this?&quot;,&quot;local&quot;:&quot;q-i-need-to-add-a-custom-environment-variable-default-or-secrets-to-my-endpoint-how-can-i-do-this&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: I’m using the text-generation-inference container type for my Endpoint. Is there more information about using TGI?&quot;,&quot;local&quot;:&quot;q-im-using-the-text-generation-inference-container-type-for-my-endpoint-is-there-more-information-about-using-tgi&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: I’m sometimes running into a 503 error on a running endpoint in production. What can I do?&quot;,&quot;local&quot;:&quot;q-im-sometimes-running-into-a-503-error-on-a-running-endpoint-in-production-what-can-i-do&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: What’s the difference between Dedicated and Serverless Endpoints?&quot;,&quot;local&quot;:&quot;q-whats-the-difference-between-dedicated-and-serverless-endpoints&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:1}">
<link href="/docs/inference-endpoints/pr_97/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_97/en/_app/immutable/entry/start.c8bc70a7.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_97/en/_app/immutable/chunks/scheduler.389d799c.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_97/en/_app/immutable/chunks/singletons.daaf663c.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_97/en/_app/immutable/chunks/paths.7c9a8928.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_97/en/_app/immutable/entry/app.5e62e873.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_97/en/_app/immutable/chunks/index.8f81d18f.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_97/en/_app/immutable/nodes/0.721b3eaa.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_97/en/_app/immutable/nodes/4.9ecc117f.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_97/en/_app/immutable/chunks/CodeBlock.3845caa1.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_97/en/_app/immutable/chunks/EditOnGithub.33306dfe.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;FAQs&quot;,&quot;local&quot;:&quot;faqs&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Q: In which regions are Inference Endpoints available?&quot;,&quot;local&quot;:&quot;q-in-which-regions-are-inference-endpoints-available&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: Can I access the instance my Endpoint is running on?&quot;,&quot;local&quot;:&quot;q-can-i-access-the-instance-my-endpoint-is-running-on&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: Can I see my Private Endpoint running on my VPC account?&quot;,&quot;local&quot;:&quot;q-can-i-see-my-private-endpoint-running-on-my-vpc-account&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: Can I run inference in batches?&quot;,&quot;local&quot;:&quot;q-can-i-run-inference-in-batches&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: How can I scale my deployment?&quot;,&quot;local&quot;:&quot;q-how-can-i-scale-my-deployment&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: Will my endpoint still be running if no more requests are processed?&quot;,&quot;local&quot;:&quot;q-will-my-endpoint-still-be-running-if-no-more-requests-are-processed&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: I would like to deploy a model which is not in the supported tasks, is this possible?&quot;,&quot;local&quot;:&quot;q-i-would-like-to-deploy-a-model-which-is-not-in-the-supported-tasks-is-this-possible&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: How much does it cost to run my Endpoint?&quot;,&quot;local&quot;:&quot;q-how-much-does-it-cost-to-run-my-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: Is the data transiting to the Endpoint encrypted?&quot;,&quot;local&quot;:&quot;q-is-the-data-transiting-to-the-endpoint-encrypted&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: How can I reduce the latency of my Endpoint?&quot;,&quot;local&quot;:&quot;q-how-can-i-reduce-the-latency-of-my-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: How do I monitor my deployed Endpoint?&quot;,&quot;local&quot;:&quot;q-how-do-i-monitor-my-deployed-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: What if I would like to deploy to a different instance type that is not listed?&quot;,&quot;local&quot;:&quot;q-what-if-i-would-like-to-deploy-to-a-different-instance-type-that-is-not-listed&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: I accidentally leaked my token. Do I need to delete my endpoint?&quot;,&quot;local&quot;:&quot;q-i-accidentally-leaked-my-token-do-i-need-to-delete-my-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: I need to add a custom environment variable (default or secrets) to my endpoint. How can I do this?&quot;,&quot;local&quot;:&quot;q-i-need-to-add-a-custom-environment-variable-default-or-secrets-to-my-endpoint-how-can-i-do-this&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: I’m using the text-generation-inference container type for my Endpoint. Is there more information about using TGI?&quot;,&quot;local&quot;:&quot;q-im-using-the-text-generation-inference-container-type-for-my-endpoint-is-there-more-information-about-using-tgi&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: I’m sometimes running into a 503 error on a running endpoint in production. What can I do?&quot;,&quot;local&quot;:&quot;q-im-sometimes-running-into-a-503-error-on-a-running-endpoint-in-production-what-can-i-do&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Q: What’s the difference between Dedicated and Serverless Endpoints?&quot;,&quot;local&quot;:&quot;q-whats-the-difference-between-dedicated-and-serverless-endpoints&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="faqs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#faqs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>FAQs</span></h1> <h3 class="relative group"><a id="q-in-which-regions-are-inference-endpoints-available" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-in-which-regions-are-inference-endpoints-available"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: In which regions are Inference Endpoints available?</span></h3> <p data-svelte-h="svelte-1j304g2">A: Inference Endpoints are currently available on AWS in us-east-1 (N. Virginia) &amp; eu-west-1 (Ireland), on Azure in eastus (Virginia), and on GCP in us-east4 (Virginia). If you need to deploy in a different region, please let us know.</p> <h3 class="relative group"><a id="q-can-i-access-the-instance-my-endpoint-is-running-on" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-can-i-access-the-instance-my-endpoint-is-running-on"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: Can I access the instance my Endpoint is running on?</span></h3> <p data-svelte-h="svelte-70k5u1">A: No, you cannot access the instance hosting your Endpoint. But if you are missing information or need more insights on the machine where the Endpoint is running, please contact us.</p> <h3 class="relative group"><a id="q-can-i-see-my-private-endpoint-running-on-my-vpc-account" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-can-i-see-my-private-endpoint-running-on-my-vpc-account"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: Can I see my Private Endpoint running on my VPC account?</span></h3> <p data-svelte-h="svelte-1yippg2">A: No, when creating a Private Endpoint (a Hugging Face Inference Endpoint linked to your VPC via AWS/Azure PrivateLink), you can only see the ENI in your VPC where the Endpoint is available.</p> <h3 class="relative group"><a id="q-can-i-run-inference-in-batches" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-can-i-run-inference-in-batches"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: Can I run inference in batches?</span></h3> <p data-svelte-h="svelte-19pq2oe">A: It depends on the Task. The <a href="/docs/inference-endpoints/supported_tasks">supported Tasks</a> are using the transformers, sentence-transformers, or diffusers pipelines under the hood. If your Task pipeline supports batching, e.g. Zero-Shot Classification then batch inference is supported. In any case, you can always create your own <a href="/docs/inference-endpoints/guides/custom_handler">inference handler</a> and implement batching.</p> <h3 class="relative group"><a id="q-how-can-i-scale-my-deployment" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-how-can-i-scale-my-deployment"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: How can I scale my deployment?</span></h3> <p data-svelte-h="svelte-8t8fdi">A: The Endpoints are scaled automatically for you, the only information you need to provide is a min replica target and a max replica target. Then the system will scale your Endpoint based on the load. Scaling to zero is supported with a variety of timing options.</p> <h3 class="relative group"><a id="q-will-my-endpoint-still-be-running-if-no-more-requests-are-processed" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-will-my-endpoint-still-be-running-if-no-more-requests-are-processed"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: Will my endpoint still be running if no more requests are processed?</span></h3> <p data-svelte-h="svelte-1o8fodi">A: Yes, your Endpoint will always stay available/up with the number of min replicas defined in the Advanced configuration.</p> <h3 class="relative group"><a id="q-i-would-like-to-deploy-a-model-which-is-not-in-the-supported-tasks-is-this-possible" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-i-would-like-to-deploy-a-model-which-is-not-in-the-supported-tasks-is-this-possible"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: I would like to deploy a model which is not in the supported tasks, is this possible?</span></h3> <p data-svelte-h="svelte-1ikn0zc">A: Yes, you can deploy any repository from the <a href="https://huggingface.co/models" rel="nofollow">Hugging Face Hub</a> and if your task/model/framework is not supported out of the box, you can <a href="/docs/inference-endpoints/guides/custom_handler">create your own inference handler</a> and then deploy your model to an Endpoint.</p> <h3 class="relative group"><a id="q-how-much-does-it-cost-to-run-my-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-how-much-does-it-cost-to-run-my-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: How much does it cost to run my Endpoint?</span></h3> <p data-svelte-h="svelte-tl2ufl">A: Dedicated Endpoints are billed based on the compute hours of your Running Endpoints, and the associated instance types. We may add usage costs for load balancers and Private Links in the future.</p> <h3 class="relative group"><a id="q-is-the-data-transiting-to-the-endpoint-encrypted" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-is-the-data-transiting-to-the-endpoint-encrypted"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: Is the data transiting to the Endpoint encrypted?</span></h3> <p data-svelte-h="svelte-10encps">A: Yes, data is encrypted during transit with TLS/SSL.</p> <h3 class="relative group"><a id="q-how-can-i-reduce-the-latency-of-my-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-how-can-i-reduce-the-latency-of-my-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: How can I reduce the latency of my Endpoint?</span></h3> <p data-svelte-h="svelte-tgkqd1">A: There are several ways to reduce the latency of your Endpoint. One is to deploy your Endpoint in a region close to your application to reduce the network overhead. Another is to optimize your model using <a href="https://huggingface.co/docs/optimum/index" rel="nofollow">Hugging Face Optimum</a> before creating your Endpoint. If you need help or have more questions about reducing latency, please contact us.</p> <h3 class="relative group"><a id="q-how-do-i-monitor-my-deployed-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-how-do-i-monitor-my-deployed-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: How do I monitor my deployed Endpoint?</span></h3> <p data-svelte-h="svelte-1ppk556">A: You can currently monitor your Endpoint through the <a href="https://ui.endpoints.huggingface.co/endpoints" rel="nofollow">🤗 Inference Endpoints web application</a>, where you have access to the <a href="/docs/inference-endpoints/guides/logs">Logs of your Endpoints</a> as well as a <a href="/docs/inference-endpoints/guides/metrics">metrics dashboard</a>. If you need programmatic access or more information, please contact us.</p> <h3 class="relative group"><a id="q-what-if-i-would-like-to-deploy-to-a-different-instance-type-that-is-not-listed" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-what-if-i-would-like-to-deploy-to-a-different-instance-type-that-is-not-listed"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: What if I would like to deploy to a different instance type that is not listed?</span></h3> <p data-svelte-h="svelte-bl2jeb">A: Please contact us if you feel your model would do better on a different instance type than what is listed.</p> <h3 class="relative group"><a id="q-i-accidentally-leaked-my-token-do-i-need-to-delete-my-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-i-accidentally-leaked-my-token-do-i-need-to-delete-my-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: I accidentally leaked my token. Do I need to delete my endpoint?</span></h3> <p data-svelte-h="svelte-sn4lhc">A: You can invalidate existing personal tokens and create new ones in your settings here: <a href="https://huggingface.co/settings/tokens" rel="nofollow">https://huggingface.co/settings/tokens</a>. Note that fine-grained tokens <em>are</em> supported in Inference Endpoints - please consider using them!</p> <h3 class="relative group"><a id="q-i-need-to-add-a-custom-environment-variable-default-or-secrets-to-my-endpoint-how-can-i-do-this" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-i-need-to-add-a-custom-environment-variable-default-or-secrets-to-my-endpoint-how-can-i-do-this"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: I need to add a custom environment variable (default or secrets) to my endpoint. How can I do this?</span></h3> <p data-svelte-h="svelte-qwm1nw">A: This is now possible in the UI, or via the API:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;model&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;image&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;huggingface&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;env&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> <span class="hljs-attr">&quot;var1&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;value&quot;</span> <span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="q-im-using-the-text-generation-inference-container-type-for-my-endpoint-is-there-more-information-about-using-tgi" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-im-using-the-text-generation-inference-container-type-for-my-endpoint-is-there-more-information-about-using-tgi"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: I’m using the text-generation-inference container type for my Endpoint. Is there more information about using TGI?</span></h3> <p data-svelte-h="svelte-enpakd">A: Yes! Please check out our <a href="https://huggingface.co/docs/text-generation-inference/index" rel="nofollow">TGI documentation</a> and this <a href="https://www.youtube.com/watch?v=jlMAX2Oaht0" rel="nofollow">video</a> on TGI deploys.</p> <h3 class="relative group"><a id="q-im-sometimes-running-into-a-503-error-on-a-running-endpoint-in-production-what-can-i-do" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-im-sometimes-running-into-a-503-error-on-a-running-endpoint-in-production-what-can-i-do"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: I’m sometimes running into a 503 error on a running endpoint in production. What can I do?</span></h3> <p data-svelte-h="svelte-1iwtr3j">A: To help mitigate service interruptions on an Endpoint that needs to be highly available, please make sure to use at least 2 replicas, ie min replicas set to 2.</p> <h3 class="relative group"><a id="q-whats-the-difference-between-dedicated-and-serverless-endpoints" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#q-whats-the-difference-between-dedicated-and-serverless-endpoints"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Q: What’s the difference between Dedicated and Serverless Endpoints?</span></h3> <p data-svelte-h="svelte-wbdt7p">A: The Inference API (Serverless) is a solution to easily explore and evaluate models. For larger volumes of requests, or if you need guaranteed latency/performance, use <a href="https://ui.endpoints.huggingface.co/new" rel="nofollow">Inference Endpoints (Dedicated)</a> to easily deploy your models on dedicated, fully-managed infrastructure.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hf-endpoints-documentation/blob/main/docs/source/faq.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1adeasz = {
assets: "/docs/inference-endpoints/pr_97/en",
base: "/docs/inference-endpoints/pr_97/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/inference-endpoints/pr_97/en/_app/immutable/entry/start.c8bc70a7.js"),
import("/docs/inference-endpoints/pr_97/en/_app/immutable/entry/app.5e62e873.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 4],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
38.9 kB
·
Xet hash:
542d3894ae9bbec3fd64deb7179c403878b0678b5d19cb8593627791168d3ad8

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.