Buckets:
| import{s as Ee,n as Pe,o as He}from"../chunks/scheduler.eb244325.js";import{S as Me,i as Ae,e as a,s as o,c as M,h as ke,a as s,d as n,b as l,f as Le,g as A,j as r,k as Ie,l as Se,m as i,n as k,t as S,o as z,p as G}from"../chunks/index.661680a1.js";import{C as ze,H as pe,E as Ge}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.93434bbb.js";function je(ue){let p,U,j,F,u,O,f,W,c,fe=`Inference Endpoints is a managed service to deploy your AI model to production. The infrastructure is managed and configured such that | |
| you can focus on building your AI application.`,D,m,ce="To get an AI model into production, you need three key components:",R,d,me=`<li><p><strong>Model Weights and Artifacts</strong>: These are the trained parameters and files that define your AI model, stored and versioned on the | |
| Hugging Face Hub.</p></li> <li><p><strong>Inference Engine</strong>: This is the software that loads and runs your model to generate predictions. Popular engines include vLLM, TGI, and | |
| others, each optimized for different use cases and performance needs.</p></li> <li><p><strong>Production Infrastructure</strong>: This is what Inference Endpoints is. A scalable, secure, and reliable environment where your model runs—handling | |
| requests, scaling with demand, and ensuring uptime.</p></li>`,Y,h,de=`Inference Endpoints brings all these pieces together into a single managed service. You choose your model from the Hub, select the | |
| inference engine, and Inference Endpoints takes care of the rest—provisioning infrastructure, deploying your model, and making it | |
| accessible via a simple API. This lets you focus on building your application, while we handle the complexity of production AI deployment.`,B,g,he='<img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/about.png" alt="about"/>',J,v,K,y,ge="To achieve that we’ve made Inference Endpoints the central place to deploy high performance and open-source Inference Engines.",N,x,ve="Currently we have native support for:",Q,$,ye="<li>vLLM</li> <li>Text-generation-inference (TGI)</li> <li>SGLang</li> <li>llama.cpp</li> <li>and Text-embeddings-inference (TEI)</li>",V,b,xe=`For the natively supported engines we try to set sensible defaults, expose the most relevant configuration settings and collaborate closely | |
| with the teams maintaining the Inference Engines to make sure they are optimized for production performance.`,X,C,$e='If you don’t find your favourite engine here, please reach out to us at <a href="api-enterprise@huggingface.co">api-enterprise@huggingface.co</a>.',Z,T,ee,w,be=`When you deploy an Inference Endpoint, under the hood your selected inference engine (like vLLM, TGI, SGLang, etc.) is packaged | |
| and launched as a prebuilt Docker container. This container includes the inference engine software, your chosen model | |
| weights and artifacts (downloaded directly from the Hugging Face Hub), and any configuration or environment variables you specify.`,te,_,Ce=`We manage the full lifecycle of these containers: starting, stopping, scaling (including autoscaling and scale-to-zero), | |
| and monitoring them for health and performance. This orchestration is completely managed for you, so you don’t have to worry about | |
| the complexities of containerization, networking, or cloud resource management.`,ne,L,ie,I,Te='For more features consider subscribing to <a href="https://huggingface.co/enterprise" rel="nofollow">Team or Enterprise</a>.',oe,E,we="It gives your organization more control over access controls, dedicated support and more. Features include:",le,P,_e="<li>Higher quotas for the most performant GPUs</li> <li>Single Sign-on (SSO)</li> <li>Access to Audit Logs</li> <li>Manage teams and projects access controls with Resource Groups</li> <li>Private storage for your repositories</li> <li>Disable the ability to create public repositories (or make repositories private by default)</li> <li>You can request a quote for a contract-based-invoice which allows for more payment options + prepaid credits</li> <li>and more!</li>",ae,H,se,q,re;return u=new ze({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),f=new pe({props:{title:"About Inference Endpoints",local:"about-inference-endpoints",headingTag:"h1"}}),v=new pe({props:{title:"Inference Engines",local:"inference-engines",headingTag:"h2"}}),T=new pe({props:{title:"Under the Hood",local:"under-the-hood",headingTag:"h2"}}),L=new pe({props:{title:"Enterprise or Team Subscription",local:"enterprise-or-team-subscription",headingTag:"h2"}}),H=new Ge({props:{source:"https://github.com/huggingface/hf-endpoints-documentation/blob/main/docs/source/about.md"}}),{c(){p=a("meta"),U=o(),j=a("p"),F=o(),M(u.$$.fragment),O=o(),M(f.$$.fragment),W=o(),c=a("p"),c.textContent=fe,D=o(),m=a("p"),m.textContent=ce,R=o(),d=a("ol"),d.innerHTML=me,Y=o(),h=a("p"),h.textContent=de,B=o(),g=a("p"),g.innerHTML=he,J=o(),M(v.$$.fragment),K=o(),y=a("p"),y.textContent=ge,N=o(),x=a("p"),x.textContent=ve,Q=o(),$=a("ul"),$.innerHTML=ye,V=o(),b=a("p"),b.textContent=xe,X=o(),C=a("p"),C.innerHTML=$e,Z=o(),M(T.$$.fragment),ee=o(),w=a("p"),w.textContent=be,te=o(),_=a("p"),_.textContent=Ce,ne=o(),M(L.$$.fragment),ie=o(),I=a("p"),I.innerHTML=Te,oe=o(),E=a("p"),E.textContent=we,le=o(),P=a("ul"),P.innerHTML=_e,ae=o(),M(H.$$.fragment),se=o(),q=a("p"),this.h()},l(e){const t=ke("svelte-u9bgzb",document.head);p=s(t,"META",{name:!0,content:!0}),t.forEach(n),U=l(e),j=s(e,"P",{}),Le(j).forEach(n),F=l(e),A(u.$$.fragment,e),O=l(e),A(f.$$.fragment,e),W=l(e),c=s(e,"P",{"data-svelte-h":!0}),r(c)!=="svelte-12egzkp"&&(c.textContent=fe),D=l(e),m=s(e,"P",{"data-svelte-h":!0}),r(m)!=="svelte-pjhxe6"&&(m.textContent=ce),R=l(e),d=s(e,"OL",{"data-svelte-h":!0}),r(d)!=="svelte-cejzw3"&&(d.innerHTML=me),Y=l(e),h=s(e,"P",{"data-svelte-h":!0}),r(h)!=="svelte-1y1l12u"&&(h.textContent=de),B=l(e),g=s(e,"P",{"data-svelte-h":!0}),r(g)!=="svelte-1s5ukr3"&&(g.innerHTML=he),J=l(e),A(v.$$.fragment,e),K=l(e),y=s(e,"P",{"data-svelte-h":!0}),r(y)!=="svelte-59uh5h"&&(y.textContent=ge),N=l(e),x=s(e,"P",{"data-svelte-h":!0}),r(x)!=="svelte-1ygbjmf"&&(x.textContent=ve),Q=l(e),$=s(e,"UL",{"data-svelte-h":!0}),r($)!=="svelte-1i10e71"&&($.innerHTML=ye),V=l(e),b=s(e,"P",{"data-svelte-h":!0}),r(b)!=="svelte-y0p6de"&&(b.textContent=xe),X=l(e),C=s(e,"P",{"data-svelte-h":!0}),r(C)!=="svelte-l79z0z"&&(C.innerHTML=$e),Z=l(e),A(T.$$.fragment,e),ee=l(e),w=s(e,"P",{"data-svelte-h":!0}),r(w)!=="svelte-po5x90"&&(w.textContent=be),te=l(e),_=s(e,"P",{"data-svelte-h":!0}),r(_)!=="svelte-9jxghf"&&(_.textContent=Ce),ne=l(e),A(L.$$.fragment,e),ie=l(e),I=s(e,"P",{"data-svelte-h":!0}),r(I)!=="svelte-1ne9hjb"&&(I.innerHTML=Te),oe=l(e),E=s(e,"P",{"data-svelte-h":!0}),r(E)!=="svelte-1uam4qw"&&(E.textContent=we),le=l(e),P=s(e,"UL",{"data-svelte-h":!0}),r(P)!=="svelte-1vxs7qh"&&(P.innerHTML=_e),ae=l(e),A(H.$$.fragment,e),se=l(e),q=s(e,"P",{}),Le(q).forEach(n),this.h()},h(){Ie(p,"name","hf:doc:metadata"),Ie(p,"content",qe)},m(e,t){Se(document.head,p),i(e,U,t),i(e,j,t),i(e,F,t),k(u,e,t),i(e,O,t),k(f,e,t),i(e,W,t),i(e,c,t),i(e,D,t),i(e,m,t),i(e,R,t),i(e,d,t),i(e,Y,t),i(e,h,t),i(e,B,t),i(e,g,t),i(e,J,t),k(v,e,t),i(e,K,t),i(e,y,t),i(e,N,t),i(e,x,t),i(e,Q,t),i(e,$,t),i(e,V,t),i(e,b,t),i(e,X,t),i(e,C,t),i(e,Z,t),k(T,e,t),i(e,ee,t),i(e,w,t),i(e,te,t),i(e,_,t),i(e,ne,t),k(L,e,t),i(e,ie,t),i(e,I,t),i(e,oe,t),i(e,E,t),i(e,le,t),i(e,P,t),i(e,ae,t),k(H,e,t),i(e,se,t),i(e,q,t),re=!0},p:Pe,i(e){re||(S(u.$$.fragment,e),S(f.$$.fragment,e),S(v.$$.fragment,e),S(T.$$.fragment,e),S(L.$$.fragment,e),S(H.$$.fragment,e),re=!0)},o(e){z(u.$$.fragment,e),z(f.$$.fragment,e),z(v.$$.fragment,e),z(T.$$.fragment,e),z(L.$$.fragment,e),z(H.$$.fragment,e),re=!1},d(e){e&&(n(U),n(j),n(F),n(O),n(W),n(c),n(D),n(m),n(R),n(d),n(Y),n(h),n(B),n(g),n(J),n(K),n(y),n(N),n(x),n(Q),n($),n(V),n(b),n(X),n(C),n(Z),n(ee),n(w),n(te),n(_),n(ne),n(ie),n(I),n(oe),n(E),n(le),n(P),n(ae),n(se),n(q)),n(p),G(u,e),G(f,e),G(v,e),G(T,e),G(L,e),G(H,e)}}}const qe='{"title":"About Inference Endpoints","local":"about-inference-endpoints","sections":[{"title":"Inference Engines","local":"inference-engines","sections":[],"depth":2},{"title":"Under the Hood","local":"under-the-hood","sections":[],"depth":2},{"title":"Enterprise or Team Subscription","local":"enterprise-or-team-subscription","sections":[],"depth":2}],"depth":1}';function Ue(ue){return He(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class De extends Me{constructor(p){super(),Ae(this,p,Ue,je,Ee,{})}}export{De as component}; | |
Xet Storage Details
- Size:
- 8.56 kB
- Xet hash:
- 5618665a50f08694f1c63b494bb6941fa3aed9d6711e90355845e502f8a7be01
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.