Buckets:
| import{s as Ie,n as Le,o as Ee}from"../chunks/scheduler.f6b352c8.js";import{S as Pe,i as He,g as s,s as o,r as A,A as Me,h as a,f as n,c as l,j as we,u as k,x as r,k as _e,y as Ae,a as i,v as S,d as z,t as q,w as G}from"../chunks/index.b90df637.js";import{H as ae,E as ke}from"../chunks/getInferenceSnippets.c92b3fa8.js";function Se(re){let u,j,H,U,p,F,c,ue=`Inference Endpoints is a managed service to deploy your AI model to production. The infrastructure is managed and configured such that | |
| you can focus on building your AI application.`,O,f,pe="To get an AI model into production, you need three key components:",W,d,ce=`<li><p><strong>Model Weights and Artifacts</strong>: These are the trained parameters and files that define your AI model, stored and versioned on the | |
| Hugging Face Hub.</p></li> <li><p><strong>Inference Engine</strong>: This is the software that loads and runs your model to generate predictions. Popular engines include vLLM, TGI, and | |
| others, each optimized for different use cases and performance needs.</p></li> <li><p><strong>Production Infrastructure</strong>: This is what Inference Endpoints is. A scalable, secure, and reliable environment where your model runs—handling | |
| requests, scaling with demand, and ensuring uptime.</p></li>`,D,m,fe=`Inference Endpoints brings all these pieces together into a single managed service. You choose your model from the Hub, select the | |
| inference engine, and Inference Endpoints takes care of the rest—provisioning infrastructure, deploying your model, and making it | |
| accessible via a simple API. This lets you focus on building your application, while we handle the complexity of production AI deployment.`,R,h,de='<img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/about.png" alt="about"/>',Y,g,B,v,me="To achieve that we’ve made Inference Endpoints the central place to deploy high performance and open-source Inference Engines.",J,x,he="Currently we have native support for:",K,y,ge="<li>vLLM</li> <li>Text-generations-inference (TGI)</li> <li>SGLang</li> <li>llama.cpp</li> <li>and Text-embeddings-inference (TEI)</li>",N,b,ve=`For the natively supported engines we try to set sensible defaults, expose the most relevant configuration settings and collaborate closely | |
| with the teams maintaing the Inference Enginges to make sure they are optimized for production performance.`,Q,$,xe='If you don’t find your favourite engine here, please reach out to us at <a href="api-enterprise@huggingface.co">api-enterprise@huggingface.co</a>.',V,C,X,T,ye=`When you deploy an Inference Endpoint, under the hood your selected inference engine (like vLLM, TGI, SGLang, etc.) is packaged | |
| and launched as a prebuilt Docker container. This container includes the inference engine software, your chosen model | |
| weights and artifacts (downloaded directly from the Hugging Face Hub), and any configuration or environment variables you specify.`,Z,w,be=`We manage the full lifecycle of these containers: starting, stopping, scaling (including autoscaling and scale-to-zero), | |
| and monitoring them for health and performance. This orchestration is completely managed for you, so you don’t have to worry about | |
| the complexities of containerization, networking, or cloud resource management.`,ee,_,te,I,$e='For more features consider subscribing to <a href="https://huggingface.co/enterprise" rel="nofollow">Team or Enterprise</a>.',ne,L,Ce="It gives your organization more control over access controls, dedicated support and more. Features include:",ie,E,Te="<li>Higher quotas for the most performant GPUs</li> <li>Single Sign-on (SSO)</li> <li>Access to Audit Logs</li> <li>Manage teams and projects access controls with Resource Groups</li> <li>Private storage for your repositories</li> <li>Disable the ability to create public repositories (or make repositories private by default)</li> <li>You can request a quote for a contract-based-invoice which allows for more payment options + prepaid credits</li> <li>and more!</li>",oe,P,le,M,se;return p=new ae({props:{title:"About Inference Endpoints",local:"about-inference-endpoints",headingTag:"h1"}}),g=new ae({props:{title:"Inference Enginges",local:"inference-enginges",headingTag:"h2"}}),C=new ae({props:{title:"Under the Hood",local:"under-the-hood",headingTag:"h2"}}),_=new ae({props:{title:"Enterprise or Team Subscription",local:"enterprise-or-team-subscription",headingTag:"h2"}}),P=new ke({props:{source:"https://github.com/huggingface/hf-endpoints-documentation/blob/main/docs/source/about.mdx"}}),{c(){u=s("meta"),j=o(),H=s("p"),U=o(),A(p.$$.fragment),F=o(),c=s("p"),c.textContent=ue,O=o(),f=s("p"),f.textContent=pe,W=o(),d=s("ol"),d.innerHTML=ce,D=o(),m=s("p"),m.textContent=fe,R=o(),h=s("p"),h.innerHTML=de,Y=o(),A(g.$$.fragment),B=o(),v=s("p"),v.textContent=me,J=o(),x=s("p"),x.textContent=he,K=o(),y=s("ul"),y.innerHTML=ge,N=o(),b=s("p"),b.textContent=ve,Q=o(),$=s("p"),$.innerHTML=xe,V=o(),A(C.$$.fragment),X=o(),T=s("p"),T.textContent=ye,Z=o(),w=s("p"),w.textContent=be,ee=o(),A(_.$$.fragment),te=o(),I=s("p"),I.innerHTML=$e,ne=o(),L=s("p"),L.textContent=Ce,ie=o(),E=s("ul"),E.innerHTML=Te,oe=o(),A(P.$$.fragment),le=o(),M=s("p"),this.h()},l(e){const t=Me("svelte-u9bgzb",document.head);u=a(t,"META",{name:!0,content:!0}),t.forEach(n),j=l(e),H=a(e,"P",{}),we(H).forEach(n),U=l(e),k(p.$$.fragment,e),F=l(e),c=a(e,"P",{"data-svelte-h":!0}),r(c)!=="svelte-12egzkp"&&(c.textContent=ue),O=l(e),f=a(e,"P",{"data-svelte-h":!0}),r(f)!=="svelte-pjhxe6"&&(f.textContent=pe),W=l(e),d=a(e,"OL",{"data-svelte-h":!0}),r(d)!=="svelte-cejzw3"&&(d.innerHTML=ce),D=l(e),m=a(e,"P",{"data-svelte-h":!0}),r(m)!=="svelte-1y1l12u"&&(m.textContent=fe),R=l(e),h=a(e,"P",{"data-svelte-h":!0}),r(h)!=="svelte-1s5ukr3"&&(h.innerHTML=de),Y=l(e),k(g.$$.fragment,e),B=l(e),v=a(e,"P",{"data-svelte-h":!0}),r(v)!=="svelte-59uh5h"&&(v.textContent=me),J=l(e),x=a(e,"P",{"data-svelte-h":!0}),r(x)!=="svelte-1ygbjmf"&&(x.textContent=he),K=l(e),y=a(e,"UL",{"data-svelte-h":!0}),r(y)!=="svelte-1xskrnc"&&(y.innerHTML=ge),N=l(e),b=a(e,"P",{"data-svelte-h":!0}),r(b)!=="svelte-qasxl6"&&(b.textContent=ve),Q=l(e),$=a(e,"P",{"data-svelte-h":!0}),r($)!=="svelte-l79z0z"&&($.innerHTML=xe),V=l(e),k(C.$$.fragment,e),X=l(e),T=a(e,"P",{"data-svelte-h":!0}),r(T)!=="svelte-po5x90"&&(T.textContent=ye),Z=l(e),w=a(e,"P",{"data-svelte-h":!0}),r(w)!=="svelte-9jxghf"&&(w.textContent=be),ee=l(e),k(_.$$.fragment,e),te=l(e),I=a(e,"P",{"data-svelte-h":!0}),r(I)!=="svelte-1ne9hjb"&&(I.innerHTML=$e),ne=l(e),L=a(e,"P",{"data-svelte-h":!0}),r(L)!=="svelte-1uam4qw"&&(L.textContent=Ce),ie=l(e),E=a(e,"UL",{"data-svelte-h":!0}),r(E)!=="svelte-1vxs7qh"&&(E.innerHTML=Te),oe=l(e),k(P.$$.fragment,e),le=l(e),M=a(e,"P",{}),we(M).forEach(n),this.h()},h(){_e(u,"name","hf:doc:metadata"),_e(u,"content",ze)},m(e,t){Ae(document.head,u),i(e,j,t),i(e,H,t),i(e,U,t),S(p,e,t),i(e,F,t),i(e,c,t),i(e,O,t),i(e,f,t),i(e,W,t),i(e,d,t),i(e,D,t),i(e,m,t),i(e,R,t),i(e,h,t),i(e,Y,t),S(g,e,t),i(e,B,t),i(e,v,t),i(e,J,t),i(e,x,t),i(e,K,t),i(e,y,t),i(e,N,t),i(e,b,t),i(e,Q,t),i(e,$,t),i(e,V,t),S(C,e,t),i(e,X,t),i(e,T,t),i(e,Z,t),i(e,w,t),i(e,ee,t),S(_,e,t),i(e,te,t),i(e,I,t),i(e,ne,t),i(e,L,t),i(e,ie,t),i(e,E,t),i(e,oe,t),S(P,e,t),i(e,le,t),i(e,M,t),se=!0},p:Le,i(e){se||(z(p.$$.fragment,e),z(g.$$.fragment,e),z(C.$$.fragment,e),z(_.$$.fragment,e),z(P.$$.fragment,e),se=!0)},o(e){q(p.$$.fragment,e),q(g.$$.fragment,e),q(C.$$.fragment,e),q(_.$$.fragment,e),q(P.$$.fragment,e),se=!1},d(e){e&&(n(j),n(H),n(U),n(F),n(c),n(O),n(f),n(W),n(d),n(D),n(m),n(R),n(h),n(Y),n(B),n(v),n(J),n(x),n(K),n(y),n(N),n(b),n(Q),n($),n(V),n(X),n(T),n(Z),n(w),n(ee),n(te),n(I),n(ne),n(L),n(ie),n(E),n(oe),n(le),n(M)),n(u),G(p,e),G(g,e),G(C,e),G(_,e),G(P,e)}}}const ze='{"title":"About Inference Endpoints","local":"about-inference-endpoints","sections":[{"title":"Inference Enginges","local":"inference-enginges","sections":[],"depth":2},{"title":"Under the Hood","local":"under-the-hood","sections":[],"depth":2},{"title":"Enterprise or Team Subscription","local":"enterprise-or-team-subscription","sections":[],"depth":2}],"depth":1}';function qe(re){return Ee(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Fe extends Pe{constructor(u){super(),He(this,u,qe,Se,Ie,{})}}export{Fe as component}; | |
Xet Storage Details
- Size:
- 8.28 kB
- Xet hash:
- 09d6498ec6aa115f7684bd1f28e8d4b64df60ab7a5f20b0aba9d0ba7aa769d36
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.