Buckets:

rtrm's picture
download
raw
27.8 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Configuration&quot;,&quot;local&quot;:&quot;configuration&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Endpoint name, model and organization&quot;,&quot;local&quot;:&quot;endpoint-name-model-and-organization&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Hardware Configuration&quot;,&quot;local&quot;:&quot;hardware-configuration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Authentication&quot;,&quot;local&quot;:&quot;authentication&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Autoscaling&quot;,&quot;local&quot;:&quot;autoscaling&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inference Engine Configuration&quot;,&quot;local&quot;:&quot;inference-engine-configuration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Container Configuration&quot;,&quot;local&quot;:&quot;container-configuration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Environment Variables&quot;,&quot;local&quot;:&quot;environment-variables&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Endpoint Tags&quot;,&quot;local&quot;:&quot;endpoint-tags&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Network&quot;,&quot;local&quot;:&quot;network&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Advanced Settings&quot;,&quot;local&quot;:&quot;advanced-settings&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/inference-endpoints/pr_151/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/entry/start.56631b46.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/scheduler.eb244325.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/singletons.54c25bcd.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/index.3c23fb4b.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/paths.12ce0a18.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/entry/app.08bc0e6a.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/preload-helper.0ac538a1.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/index.661680a1.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/nodes/0.69485259.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/nodes/13.44882577.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_151/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.c047d438.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Configuration&quot;,&quot;local&quot;:&quot;configuration&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Endpoint name, model and organization&quot;,&quot;local&quot;:&quot;endpoint-name-model-and-organization&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Hardware Configuration&quot;,&quot;local&quot;:&quot;hardware-configuration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Authentication&quot;,&quot;local&quot;:&quot;authentication&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Autoscaling&quot;,&quot;local&quot;:&quot;autoscaling&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inference Engine Configuration&quot;,&quot;local&quot;:&quot;inference-engine-configuration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Container Configuration&quot;,&quot;local&quot;:&quot;container-configuration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Environment Variables&quot;,&quot;local&quot;:&quot;environment-variables&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Endpoint Tags&quot;,&quot;local&quot;:&quot;endpoint-tags&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Network&quot;,&quot;local&quot;:&quot;network&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Advanced Settings&quot;,&quot;local&quot;:&quot;advanced-settings&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Configuration</span></h1> <p data-svelte-h="svelte-1iggp98">This section describes the configuration options available when creating a new inference endpoint. Each section of
the interface allows fine-grained control over how the model is deployed, accessed, and scaled.</p> <h2 class="relative group"><a id="endpoint-name-model-and-organization" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#endpoint-name-model-and-organization"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Endpoint name, model and organization</span></h2> <p data-svelte-h="svelte-1sd4j0z">In the top left you can:</p> <ul data-svelte-h="svelte-1qy89hk"><li>change the name of the inference endpoint</li> <li>verify to which organization you’re deploying this model</li> <li>verify which model you are deploying</li> <li>and which Hugging Face Hub repo you are deploying this model from</li></ul> <p data-svelte-h="svelte-8t663c"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/configuration/1-name-org-model.png" alt="name-org-model"></p> <h2 class="relative group"><a id="hardware-configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#hardware-configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Hardware Configuration</span></h2> <p data-svelte-h="svelte-1o9wtbi">The Hardware Configuration section allows you to choose the compute backend used to host the model.
You can select from three major cloud providers:</p> <ul data-svelte-h="svelte-1nit8px"><li>Amazon Web Services (AWS)</li> <li>Microsoft Azure</li> <li>Google Cloud Platform</li></ul> <p data-svelte-h="svelte-171jvsn"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/configuration/2-hardware.png" alt="hardware"></p> <p data-svelte-h="svelte-a5xv7a">You must also choose an accelerator type:</p> <ul data-svelte-h="svelte-1c8pe9h"><li>CPU</li> <li>GPU</li> <li>INF2 (AWS Inferentia)</li></ul> <p data-svelte-h="svelte-14px1ty">Additionally, you can select the deployment region (e.g., East US) using the dropdown menu. Once the
provider, accelerator, and region are chosen, a list of available instance types is displayed. Each instance tile includes:</p> <ul data-svelte-h="svelte-8x219j"><li>GPU Type and Count</li> <li>Memory (e.g., 48 GB)</li> <li>vCPUs and RAM</li> <li>Hourly Pricing (e.g., $1.80 / h)</li></ul> <p data-svelte-h="svelte-tqi8q5">You can select a tile to choose that instance type for your deployment. Instances that are incompatible or unavailable in the
selected region are grayed out and unclickable.</p> <h2 class="relative group"><a id="authentication" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#authentication"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Authentication</span></h2> <p data-svelte-h="svelte-1j8fq6y">This section determines who can access your deployed endpoint. Available options are:</p> <ul data-svelte-h="svelte-zgmog1"><li><strong>Private (default)</strong>: Accessible only to you, or members of your Hugging Face organization, using a personal HF access token.</li> <li><strong>Public</strong>: Anyone can access your endpoint, without authentication.</li> <li><strong>Authenticated</strong>: Anyone with a Hugging Face account can access it, using their personal HF access tokens.</li></ul> <p data-svelte-h="svelte-16pg7ay">Additionally, if you deploy your Inference Endpoint in AWS, you can use <strong>AWS PrivateLink</strong> for an intra-region secured connection to your AWS VPN.</p> <p data-svelte-h="svelte-vf6o2r"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/configuration/11-auth.png" alt="auth"></p> <h2 class="relative group"><a id="autoscaling" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#autoscaling"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Autoscaling</span></h2> <p data-svelte-h="svelte-t69vok">The Autoscaling section configures how many replicas of your model run and whether the system scales down to zero during periods of inactivity. For more
information we recommend reading the <a href="./autoscaling">in-depth guide on autoscaling</a>.</p> <p data-svelte-h="svelte-11k9llv"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/configuration/4-autoscaling.png" alt="autoscaling"></p> <ul data-svelte-h="svelte-1mmank1"><li><strong>Automatic Scale-to-Zero</strong>: A dropdown lets you choose how long the system should wait after the last request before
scaling down to zero. Default is after 1 hour with no activity.</li> <li><strong>Number of Replicas</strong>:
<ul><li>Min: Minimum number of replicas to keep running. Note that enabling automatic scale-to-zero requires setting this to 0.</li> <li>Max: Maximum number of replicas allowed (e.g., 1)</li></ul></li> <li><strong>Autoscaling strategy</strong>:
<ul><li>Based on hardware usage: For example, a scale up will be triggered if the average hardware utilisation (%) exceeds this threshold for more than 20 seconds.</li> <li>Pending requests: A scale up event will be triggered if the average number of pending requests exceeds this threshold for more than 20 seconds.</li></ul></li></ul> <h2 class="relative group"><a id="inference-engine-configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference-engine-configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference Engine Configuration</span></h2> <p data-svelte-h="svelte-1iyhirj">This section allows you to specify how the container hosting your model behaves. This setting depends on the selected inference engine.
For configuration details, please read the Inference Engine section.
<img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/configuration/9-inference-engine.png" alt="inference-engine"></p> <h2 class="relative group"><a id="container-configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#container-configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Container Configuration</span></h2> <p data-svelte-h="svelte-qhhi7y">Here you can edit the container arguments and container command.
<img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/configuration/8-container-config.png" alt="container-configs"></p> <h2 class="relative group"><a id="environment-variables" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#environment-variables"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Environment Variables</span></h2> <p data-svelte-h="svelte-1pyc3um">Environment variables can be provided to customize container behavior or pass secrets.</p> <ul data-svelte-h="svelte-1352j3g"><li><strong>Default Env</strong>: Key-value pairs passed as plain environment variables.</li> <li><strong>Secret Env</strong>: Key-value pairs stored securely and injected at runtime.</li></ul> <p data-svelte-h="svelte-bqywcv">Each section allows you to add multiple entries using the Add button.</p> <p data-svelte-h="svelte-1ikgxv8"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/configuration/5-env-vars.png" alt="env-vars"></p> <h2 class="relative group"><a id="endpoint-tags" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#endpoint-tags"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Endpoint Tags</span></h2> <p data-svelte-h="svelte-x7p47v">You can label endpoints with tags (e.g., for-testing) to help organize and manage deployments across environments or teams. In the dashboard
you will be able to filter and sort endpoints based on these tags.
Tags are plain text labels added via the Add button.</p> <p data-svelte-h="svelte-1gtrisl"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/configuration/6-tags.png" alt="tags"></p> <h2 class="relative group"><a id="network" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#network"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Network</span></h2> <p data-svelte-h="svelte-wt445i">This section determines from where your deployed endpoint can be accessed.</p> <p data-svelte-h="svelte-wljkcn">By default, your endpoint is accessible from the Internet, and secured with TLS/SSL. Endpoints deployed on an AWS instance can use AWS PrivateLink to restrict access to a specific VPC.</p> <p data-svelte-h="svelte-110l7d6">The available options are:</p> <ul data-svelte-h="svelte-tpl1ov"><li>Use AWS PrivateLink: check to activate AWS PrivateLink for your endpoint.</li> <li>AWS Account ID: You need to provide the AWS ID of the account that owns the VPC you want to restrict access to.</li> <li>PrivateLink Sharing: check to enable sharing of the same PrivateLink between different endpoints.</li></ul> <p data-svelte-h="svelte-16te9sq"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/configuration/10-network.png" alt="network"></p> <h2 class="relative group"><a id="advanced-settings" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-settings"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced Settings</span></h2> <p data-svelte-h="svelte-1u5ppiv">Advanced Settings offer more fine-grained control over deployment.</p> <p data-svelte-h="svelte-1khxjze"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/configuration/7-advanced.png" alt="advanced"></p> <ul data-svelte-h="svelte-1fwagwe"><li><strong>Commit Revision</strong>: Optionally specify a commit hash to which revision of the model repository on the Hugging Face Hub
you want to download the model artifacts from</li> <li><strong>Task</strong>: Defines the type of model task. This is usually inferred from the model repository.</li> <li><strong>Container Arguments</strong>: Pass CLI-style arguments to the container entrypoint.</li> <li><strong>Container Command</strong>: Override the container entrypoint entirely.</li> <li><strong>Download Pattern</strong>: Defines which model files are downloaded.</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hf-endpoints-documentation/blob/main/docs/source/guides/configuration.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_10wt9iy = {
assets: "/docs/inference-endpoints/pr_151/en",
base: "/docs/inference-endpoints/pr_151/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/inference-endpoints/pr_151/en/_app/immutable/entry/start.56631b46.js"),
import("/docs/inference-endpoints/pr_151/en/_app/immutable/entry/app.08bc0e6a.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 13],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
27.8 kB
·
Xet hash:
f75f8ceb695b7a074d1e41cb7db3079e855ff7b162c80c940c97b503f6767cf9

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.