Buckets:
| import{s as xt,n as Zt,o as Wt}from"../chunks/scheduler.3a17fb72.js";import{S as Et,i as Ht,e as d,s as i,c as a,h as Bt,a as u,d as n,b as s,f as vt,g as r,j as M,k as Ct,l as _t,m as l,n as o,t as p,o as m,p as g}from"../chunks/index.093f8863.js";import{C as kt,H as f,E as At}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.e28c70f3.js";import{C as ge}from"../chunks/CodeBlock.3509844b.js";function Ft(gt){let c,ue,fe,Me,$,ce,y,$e,h,ft=`An alternative to launching the evaluation locally is to serve the model on a | |
| TGI-compatible server/container and then run the evaluation by sending requests | |
| to the server. The command is the same as before, except you specify a path to | |
| a YAML configuration file (detailed below):`,ye,T,he,w,dt=`There are two types of configuration files that can be provided for running on | |
| the server:`,Te,j,we,U,ut=`To launch a model using Hugging Face’s Inference Endpoints, you need to provide | |
| the following file: <code>endpoint_model.yaml</code>. Lighteval will automatically deploy | |
| the endpoint, run the evaluation, and finally delete the endpoint (unless you | |
| specify an endpoint that was already launched, in which case the endpoint won’t | |
| be deleted afterwards).`,je,J,Ue,b,Je,I,be,G,Mt=`To use a model already deployed on a TGI server, for example on Hugging Face’s | |
| serverless inference.`,Ie,v,Ge,C,ve,x,Ce,Z,xe,W,Ze,E,ct="<li><code>model_name</code>: The Hugging Face model ID to deploy</li> <li><code>revision</code>: Model revision (defaults to “main”)</li> <li><code>dtype</code>: Data type for model weights (“float16”, “bfloat16”, “4bit”, “8bit”, etc.)</li> <li><code>framework</code>: Framework to use (“pytorch”, “tensorflow”)</li>",We,H,Ee,B,$t="<li><code>accelerator</code>: Hardware accelerator (“gpu”, “cpu”)</li> <li><code>region</code>: AWS region for deployment</li> <li><code>vendor</code>: Cloud vendor (“aws”, “azure”, “gcp”)</li> <li><code>instance_type</code>: Instance type (e.g., “nvidia-a10g”, “nvidia-t4”)</li> <li><code>instance_size</code>: Instance size (“x1”, “x2”, etc.)</li>",He,_,Be,k,yt="<li><code>endpoint_type</code>: Endpoint access level (“public”, “protected”, “private”)</li> <li><code>namespace</code>: Organization namespace for deployment</li> <li><code>reuse_existing</code>: Whether to reuse an existing endpoint</li> <li><code>endpoint_name</code>: Custom endpoint name (lowercase, no special characters)</li>",_e,A,ke,F,ht="<li><code>image_url</code>: Custom Docker image URL</li> <li><code>env_vars</code>: Environment variables for the endpoint</li>",Ae,S,Fe,q,Se,V,Tt="<li><code>inference_server_address</code>: URL of the TGI server</li> <li><code>inference_server_auth</code>: Authentication credentials</li> <li><code>model_id</code>: Model identifier (if using local model directory)</li>",qe,L,Ve,z,Le,R,ze,X,Re,Y,Xe,N,Ye,Q,Ne,D,Qe,P,De,O,wt='<li>Endpoints are automatically deleted after evaluation (unless <code>reuse_existing: true</code>)</li> <li>Costs are based on instance type and runtime</li> <li>Monitor usage in the <a href="https://huggingface.co/settings/billing" rel="nofollow">Hugging Face billing dashboard</a></li>',Pe,K,Oe,ee,jt="<li>No additional costs beyond your existing server infrastructure</li> <li>Useful for cost-effective evaluation of already-deployed models</li>",Ke,te,et,ne,tt,le,Ut="<li><strong>Endpoint Deployment Failures</strong>: Check instance availability in your region</li> <li><strong>Authentication Errors</strong>: Ensure proper Hugging Face token permissions</li> <li><strong>Model Loading Errors</strong>: Verify model name and revision are correct</li> <li><strong>Resource Constraints</strong>: Choose appropriate instance type for your model size</li>",nt,ie,lt,se,Jt="<li>Use appropriate instance types for your model size</li> <li>Consider using quantized models (4bit, 8bit) for cost savings</li> <li>Reuse existing endpoints for multiple evaluations</li> <li>Use serverless TGI for cost-effective evaluation</li>",it,ae,st,re,bt="Common error messages and solutions:",at,oe,It="<li><strong>“Instance not available”</strong>: Try a different region or instance type</li> <li><strong>“Model not found”</strong>: Check the model name and revision</li> <li><strong>“Insufficient permissions”</strong>: Verify your Hugging Face token has endpoint deployment permissions</li> <li><strong>“Endpoint already exists”</strong>: Use <code>reuse_existing: true</code> or choose a different endpoint name</li>",rt,pe,Gt='For more detailed information about Hugging Face Inference Endpoints, see the <a href="https://huggingface.co/docs/inference-endpoints/" rel="nofollow">official documentation</a>.',ot,me,pt,de,mt;return $=new kt({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),y=new f({props:{title:"Using Hugging Face Inference Endpoints or TGI as Backend",local:"using-hugging-face-inference-endpoints-or-tgi-as-backend",headingTag:"h1"}}),T=new ge({props:{code:"bGlnaHRldmFsJTIwZW5kcG9pbnQlMjAlN0J0Z2klMkNpbmZlcmVuY2UtZW5kcG9pbnQlN0QlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjIlMkZwYXRoJTJGdG8lMkZjb25maWclMkZmaWxlJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTNDdGFza19wYXJhbWV0ZXJzJTNF",highlighted:`lighteval endpoint {tgi,inference-endpoint} \\ | |
| <span class="hljs-string">"/path/to/config/file"</span> \\ | |
| <task_parameters>`,wrap:!1}}),j=new f({props:{title:"Hugging Face Inference Endpoints",local:"hugging-face-inference-endpoints",headingTag:"h2"}}),J=new f({props:{title:"Configuration File Example",local:"configuration-file-example",headingTag:"h3"}}),b=new ge({props:{code:"bW9kZWxfcGFyYW1ldGVycyUzQSUwQSUyMCUyMCUyMCUyMHJldXNlX2V4aXN0aW5nJTNBJTIwZmFsc2UlMjAlMjMlMjBJZiUyMHRydWUlMkMlMjBpZ25vcmUlMjBhbGwlMjBwYXJhbXMlMjBpbiUyMGluc3RhbmNlJTJDJTIwYW5kJTIwZG9uJ3QlMjBkZWxldGUlMjB0aGUlMjBlbmRwb2ludCUyMGFmdGVyJTIwZXZhbHVhdGlvbiUwQSUyMCUyMCUyMCUyMCUyMyUyMGVuZHBvaW50X25hbWUlM0ElMjAlMjJsbGFtYS0yLTdCLWxpZ2h0ZXZhbCUyMiUyMCUyMyUyME5lZWRzJTIwdG8lMjBiZSUyMGxvd2VyY2FzZSUyMHdpdGhvdXQlMjBzcGVjaWFsJTIwY2hhcmFjdGVycyUwQSUyMCUyMCUyMCUyMG1vZGVsX25hbWUlM0ElMjAlMjJtZXRhLWxsYW1hJTJGTGxhbWEtMi03Yi1oZiUyMiUwQSUyMCUyMCUyMCUyMHJldmlzaW9uJTNBJTIwJTIybWFpbiUyMiUyMCUyMCUyMyUyMERlZmF1bHRzJTIwdG8lMjAlMjJtYWluJTIyJTBBJTIwJTIwJTIwJTIwZHR5cGUlM0ElMjAlMjJmbG9hdDE2JTIyJTIwJTIzJTIwQ2FuJTIwYmUlMjBhbnklMjBvZiUyMCUyMmF3cSUyMiUyQyUyMCUyMmVldHElMjIlMkMlMjAlMjJncHRxJTIyJTJDJTIwJTIyNGJpdCUyMiUyMG9yJTIwJTIyOGJpdCUyMiUyMCh3aWxsJTIwdXNlJTIwYml0c2FuZGJ5dGVzKSUyQyUyMCUyMmJmbG9hdDE2JTIyJTIwb3IlMjAlMjJmbG9hdDE2JTIyJTBBJTIwJTIwJTIwJTIwYWNjZWxlcmF0b3IlM0ElMjAlMjJncHUlMjIlMEElMjAlMjAlMjAlMjByZWdpb24lM0ElMjAlMjJldS13ZXN0LTElMjIlMEElMjAlMjAlMjAlMjB2ZW5kb3IlM0ElMjAlMjJhd3MlMjIlMEElMjAlMjAlMjAlMjBpbnN0YW5jZV90eXBlJTNBJTIwJTIybnZpZGlhLWExMGclMjIlMEElMjAlMjAlMjAlMjBpbnN0YW5jZV9zaXplJTNBJTIwJTIyeDElMjIlMEElMjAlMjAlMjAlMjBmcmFtZXdvcmslM0ElMjAlMjJweXRvcmNoJTIyJTBBJTIwJTIwJTIwJTIwZW5kcG9pbnRfdHlwZSUzQSUyMCUyMnByb3RlY3RlZCUyMiUwQSUyMCUyMCUyMCUyMG5hbWVzcGFjZSUzQSUyMG51bGwlMjAlMjMlMjBUaGUlMjBuYW1lc3BhY2UlMjB1bmRlciUyMHdoaWNoJTIwdG8lMjBsYXVuY2glMjB0aGUlMjBlbmRwb2ludC4lMjBEZWZhdWx0cyUyMHRvJTIwdGhlJTIwY3VycmVudCUyMHVzZXIncyUyMG5hbWVzcGFjZSUwQSUyMCUyMCUyMCUyMGltYWdlX3VybCUzQSUyMG51bGwlMjAlMjMlMjBPcHRpb25hbGx5JTIwc3BlY2lmeSUyMHRoZSUyMGRvY2tlciUyMGltYWdlJTIwdG8lMjB1c2UlMjB3aGVuJTIwbGF1bmNoaW5nJTIwdGhlJTIwZW5kcG9pbnQlMjBtb2RlbC4lMjBFLmcuJTJDJTIwbGF1bmNoaW5nJTIwbW9kZWxzJTIwd2l0aCUyMGxhdGVyJTIwcmVsZWFzZXMlMjBvZiUyMHRoZSUyMFRHSSUyMGNvbnRhaW5lciUyMHdpdGglMjBzdXBwb3J0JTIwZm9yJTIwbmV3ZXIlMjBtb2RlbHMuJTBBJTIwJTIwJTIwJTIwZW52X3ZhcnMlM0ElMjBudWxsJTIwJTIzJTIwT3B0aW9uYWwlMjBlbnZpcm9ubWVudCUyMHZhcmlhYmxlcyUyMHRvJTIwaW5jbHVkZSUyMHdoZW4lMjBsYXVuY2hpbmclMjB0aGUlMjBlbmRwb2ludC4lMjBlLmcuJTJDJTIwJTYwTUFYX0lOUFVUX0xFTkdUSCUzQSUyMDIwNDglNjA=",highlighted:`<span class="hljs-attr">model_parameters:</span> | |
| <span class="hljs-attr">reuse_existing:</span> <span class="hljs-literal">false</span> <span class="hljs-comment"># If true, ignore all params in instance, and don't delete the endpoint after evaluation</span> | |
| <span class="hljs-comment"># endpoint_name: "llama-2-7B-lighteval" # Needs to be lowercase without special characters</span> | |
| <span class="hljs-attr">model_name:</span> <span class="hljs-string">"meta-llama/Llama-2-7b-hf"</span> | |
| <span class="hljs-attr">revision:</span> <span class="hljs-string">"main"</span> <span class="hljs-comment"># Defaults to "main"</span> | |
| <span class="hljs-attr">dtype:</span> <span class="hljs-string">"float16"</span> <span class="hljs-comment"># Can be any of "awq", "eetq", "gptq", "4bit" or "8bit" (will use bitsandbytes), "bfloat16" or "float16"</span> | |
| <span class="hljs-attr">accelerator:</span> <span class="hljs-string">"gpu"</span> | |
| <span class="hljs-attr">region:</span> <span class="hljs-string">"eu-west-1"</span> | |
| <span class="hljs-attr">vendor:</span> <span class="hljs-string">"aws"</span> | |
| <span class="hljs-attr">instance_type:</span> <span class="hljs-string">"nvidia-a10g"</span> | |
| <span class="hljs-attr">instance_size:</span> <span class="hljs-string">"x1"</span> | |
| <span class="hljs-attr">framework:</span> <span class="hljs-string">"pytorch"</span> | |
| <span class="hljs-attr">endpoint_type:</span> <span class="hljs-string">"protected"</span> | |
| <span class="hljs-attr">namespace:</span> <span class="hljs-literal">null</span> <span class="hljs-comment"># The namespace under which to launch the endpoint. Defaults to the current user's namespace</span> | |
| <span class="hljs-attr">image_url:</span> <span class="hljs-literal">null</span> <span class="hljs-comment"># Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models.</span> | |
| <span class="hljs-attr">env_vars:</span> <span class="hljs-literal">null</span> <span class="hljs-comment"># Optional environment variables to include when launching the endpoint. e.g., \`MAX_INPUT_LENGTH: 2048\`</span>`,wrap:!1}}),I=new f({props:{title:"Text Generation Inference (TGI)",local:"text-generation-inference-tgi",headingTag:"h2"}}),v=new f({props:{title:"Configuration File Example",local:"configuration-file-example",headingTag:"h3"}}),C=new ge({props:{code:"bW9kZWxfcGFyYW1ldGVycyUzQSUwQSUyMCUyMCUyMCUyMGluZmVyZW5jZV9zZXJ2ZXJfYWRkcmVzcyUzQSUyMCUyMiUyMiUwQSUyMCUyMCUyMCUyMGluZmVyZW5jZV9zZXJ2ZXJfYXV0aCUzQSUyMG51bGwlMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUzQSUyMG51bGwlMjAlMjMlMjBPcHRpb25hbCUyQyUyMG9ubHklMjByZXF1aXJlZCUyMGlmJTIwdGhlJTIwVEdJJTIwY29udGFpbmVyJTIwd2FzJTIwbGF1bmNoZWQlMjB3aXRoJTIwbW9kZWxfaWQlMjBwb2ludGluZyUyMHRvJTIwYSUyMGxvY2FsJTIwZGlyZWN0b3J5",highlighted:`<span class="hljs-attr">model_parameters:</span> | |
| <span class="hljs-attr">inference_server_address:</span> <span class="hljs-string">""</span> | |
| <span class="hljs-attr">inference_server_auth:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">model_id:</span> <span class="hljs-literal">null</span> <span class="hljs-comment"># Optional, only required if the TGI container was launched with model_id pointing to a local directory</span>`,wrap:!1}}),x=new f({props:{title:"Key Parameters",local:"key-parameters",headingTag:"h2"}}),Z=new f({props:{title:"Hugging Face Inference Endpoints",local:"hugging-face-inference-endpoints",headingTag:"h3"}}),W=new f({props:{title:"Model Configuration",local:"model-configuration",headingTag:"h4"}}),H=new f({props:{title:"Infrastructure Settings",local:"infrastructure-settings",headingTag:"h4"}}),_=new f({props:{title:"Endpoint Configuration",local:"endpoint-configuration",headingTag:"h4"}}),A=new f({props:{title:"Advanced Settings",local:"advanced-settings",headingTag:"h4"}}),S=new f({props:{title:"Text Generation Inference (TGI)",local:"text-generation-inference-tgi",headingTag:"h3"}}),q=new f({props:{title:"Server Configuration",local:"server-configuration",headingTag:"h4"}}),L=new f({props:{title:"Usage Examples",local:"usage-examples",headingTag:"h2"}}),z=new f({props:{title:"Deploying a New Inference Endpoint",local:"deploying-a-new-inference-endpoint",headingTag:"h3"}}),R=new ge({props:{code:"bGlnaHRldmFsJTIwZW5kcG9pbnQlMjBpbmZlcmVuY2UtZW5kcG9pbnQlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJjb25maWdzJTJGZW5kcG9pbnRfbW9kZWwueWFtbCUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMGdzbThr",highlighted:`lighteval endpoint inference-endpoint \\ | |
| <span class="hljs-string">"configs/endpoint_model.yaml"</span> \\ | |
| gsm8k`,wrap:!1}}),X=new f({props:{title:"Using an Existing TGI Server",local:"using-an-existing-tgi-server",headingTag:"h3"}}),Y=new ge({props:{code:"bGlnaHRldmFsJTIwZW5kcG9pbnQlMjB0Z2klMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJjb25maWdzJTJGdGdpX3NlcnZlci55YW1sJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwZ3NtOGs=",highlighted:`lighteval endpoint tgi \\ | |
| <span class="hljs-string">"configs/tgi_server.yaml"</span> \\ | |
| gsm8k`,wrap:!1}}),N=new f({props:{title:"Reusing an Existing Endpoint",local:"reusing-an-existing-endpoint",headingTag:"h3"}}),Q=new ge({props:{code:"bW9kZWxfcGFyYW1ldGVycyUzQSUwQSUyMCUyMCUyMCUyMHJldXNlX2V4aXN0aW5nJTNBJTIwdHJ1ZSUwQSUyMCUyMCUyMCUyMGVuZHBvaW50X25hbWUlM0ElMjAlMjJteS1leGlzdGluZy1lbmRwb2ludCUyMiUwQSUyMCUyMCUyMCUyMCUyMyUyME90aGVyJTIwcGFyYW1ldGVycyUyMHdpbGwlMjBiZSUyMGlnbm9yZWQlMjB3aGVuJTIwcmV1c2VfZXhpc3RpbmclMjBpcyUyMHRydWU=",highlighted:`<span class="hljs-attr">model_parameters:</span> | |
| <span class="hljs-attr">reuse_existing:</span> <span class="hljs-literal">true</span> | |
| <span class="hljs-attr">endpoint_name:</span> <span class="hljs-string">"my-existing-endpoint"</span> | |
| <span class="hljs-comment"># Other parameters will be ignored when reuse_existing is true</span>`,wrap:!1}}),D=new f({props:{title:"Cost Management",local:"cost-management",headingTag:"h2"}}),P=new f({props:{title:"Inference Endpoints",local:"inference-endpoints",headingTag:"h3"}}),K=new f({props:{title:"TGI Servers",local:"tgi-servers",headingTag:"h3"}}),te=new f({props:{title:"Troubleshooting",local:"troubleshooting",headingTag:"h2"}}),ne=new f({props:{title:"Common Issues",local:"common-issues",headingTag:"h3"}}),ie=new f({props:{title:"Performance Tips",local:"performance-tips",headingTag:"h3"}}),ae=new f({props:{title:"Error Handling",local:"error-handling",headingTag:"h3"}}),me=new At({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/use-huggingface-inference-endpoints-or-tgi-as-backend.mdx"}}),{c(){c=d("meta"),ue=i(),fe=d("p"),Me=i(),a($.$$.fragment),ce=i(),a(y.$$.fragment),$e=i(),h=d("p"),h.textContent=ft,ye=i(),a(T.$$.fragment),he=i(),w=d("p"),w.textContent=dt,Te=i(),a(j.$$.fragment),we=i(),U=d("p"),U.innerHTML=ut,je=i(),a(J.$$.fragment),Ue=i(),a(b.$$.fragment),Je=i(),a(I.$$.fragment),be=i(),G=d("p"),G.textContent=Mt,Ie=i(),a(v.$$.fragment),Ge=i(),a(C.$$.fragment),ve=i(),a(x.$$.fragment),Ce=i(),a(Z.$$.fragment),xe=i(),a(W.$$.fragment),Ze=i(),E=d("ul"),E.innerHTML=ct,We=i(),a(H.$$.fragment),Ee=i(),B=d("ul"),B.innerHTML=$t,He=i(),a(_.$$.fragment),Be=i(),k=d("ul"),k.innerHTML=yt,_e=i(),a(A.$$.fragment),ke=i(),F=d("ul"),F.innerHTML=ht,Ae=i(),a(S.$$.fragment),Fe=i(),a(q.$$.fragment),Se=i(),V=d("ul"),V.innerHTML=Tt,qe=i(),a(L.$$.fragment),Ve=i(),a(z.$$.fragment),Le=i(),a(R.$$.fragment),ze=i(),a(X.$$.fragment),Re=i(),a(Y.$$.fragment),Xe=i(),a(N.$$.fragment),Ye=i(),a(Q.$$.fragment),Ne=i(),a(D.$$.fragment),Qe=i(),a(P.$$.fragment),De=i(),O=d("ul"),O.innerHTML=wt,Pe=i(),a(K.$$.fragment),Oe=i(),ee=d("ul"),ee.innerHTML=jt,Ke=i(),a(te.$$.fragment),et=i(),a(ne.$$.fragment),tt=i(),le=d("ol"),le.innerHTML=Ut,nt=i(),a(ie.$$.fragment),lt=i(),se=d("ul"),se.innerHTML=Jt,it=i(),a(ae.$$.fragment),st=i(),re=d("p"),re.textContent=bt,at=i(),oe=d("ul"),oe.innerHTML=It,rt=i(),pe=d("p"),pe.innerHTML=Gt,ot=i(),a(me.$$.fragment),pt=i(),de=d("p"),this.h()},l(e){const t=Bt("svelte-u9bgzb",document.head);c=u(t,"META",{name:!0,content:!0}),t.forEach(n),ue=s(e),fe=u(e,"P",{}),vt(fe).forEach(n),Me=s(e),r($.$$.fragment,e),ce=s(e),r(y.$$.fragment,e),$e=s(e),h=u(e,"P",{"data-svelte-h":!0}),M(h)!=="svelte-ik3alt"&&(h.textContent=ft),ye=s(e),r(T.$$.fragment,e),he=s(e),w=u(e,"P",{"data-svelte-h":!0}),M(w)!=="svelte-198jur5"&&(w.textContent=dt),Te=s(e),r(j.$$.fragment,e),we=s(e),U=u(e,"P",{"data-svelte-h":!0}),M(U)!=="svelte-13g2grh"&&(U.innerHTML=ut),je=s(e),r(J.$$.fragment,e),Ue=s(e),r(b.$$.fragment,e),Je=s(e),r(I.$$.fragment,e),be=s(e),G=u(e,"P",{"data-svelte-h":!0}),M(G)!=="svelte-s83khs"&&(G.textContent=Mt),Ie=s(e),r(v.$$.fragment,e),Ge=s(e),r(C.$$.fragment,e),ve=s(e),r(x.$$.fragment,e),Ce=s(e),r(Z.$$.fragment,e),xe=s(e),r(W.$$.fragment,e),Ze=s(e),E=u(e,"UL",{"data-svelte-h":!0}),M(E)!=="svelte-catr4m"&&(E.innerHTML=ct),We=s(e),r(H.$$.fragment,e),Ee=s(e),B=u(e,"UL",{"data-svelte-h":!0}),M(B)!=="svelte-wyhyn3"&&(B.innerHTML=$t),He=s(e),r(_.$$.fragment,e),Be=s(e),k=u(e,"UL",{"data-svelte-h":!0}),M(k)!=="svelte-zesftx"&&(k.innerHTML=yt),_e=s(e),r(A.$$.fragment,e),ke=s(e),F=u(e,"UL",{"data-svelte-h":!0}),M(F)!=="svelte-chc4s7"&&(F.innerHTML=ht),Ae=s(e),r(S.$$.fragment,e),Fe=s(e),r(q.$$.fragment,e),Se=s(e),V=u(e,"UL",{"data-svelte-h":!0}),M(V)!=="svelte-x57ldu"&&(V.innerHTML=Tt),qe=s(e),r(L.$$.fragment,e),Ve=s(e),r(z.$$.fragment,e),Le=s(e),r(R.$$.fragment,e),ze=s(e),r(X.$$.fragment,e),Re=s(e),r(Y.$$.fragment,e),Xe=s(e),r(N.$$.fragment,e),Ye=s(e),r(Q.$$.fragment,e),Ne=s(e),r(D.$$.fragment,e),Qe=s(e),r(P.$$.fragment,e),De=s(e),O=u(e,"UL",{"data-svelte-h":!0}),M(O)!=="svelte-1hz2o9r"&&(O.innerHTML=wt),Pe=s(e),r(K.$$.fragment,e),Oe=s(e),ee=u(e,"UL",{"data-svelte-h":!0}),M(ee)!=="svelte-rh1fr3"&&(ee.innerHTML=jt),Ke=s(e),r(te.$$.fragment,e),et=s(e),r(ne.$$.fragment,e),tt=s(e),le=u(e,"OL",{"data-svelte-h":!0}),M(le)!=="svelte-1p1agi9"&&(le.innerHTML=Ut),nt=s(e),r(ie.$$.fragment,e),lt=s(e),se=u(e,"UL",{"data-svelte-h":!0}),M(se)!=="svelte-p8wan0"&&(se.innerHTML=Jt),it=s(e),r(ae.$$.fragment,e),st=s(e),re=u(e,"P",{"data-svelte-h":!0}),M(re)!=="svelte-19pa54u"&&(re.textContent=bt),at=s(e),oe=u(e,"UL",{"data-svelte-h":!0}),M(oe)!=="svelte-1rwk4qm"&&(oe.innerHTML=It),rt=s(e),pe=u(e,"P",{"data-svelte-h":!0}),M(pe)!=="svelte-1o55mx1"&&(pe.innerHTML=Gt),ot=s(e),r(me.$$.fragment,e),pt=s(e),de=u(e,"P",{}),vt(de).forEach(n),this.h()},h(){Ct(c,"name","hf:doc:metadata"),Ct(c,"content",St)},m(e,t){_t(document.head,c),l(e,ue,t),l(e,fe,t),l(e,Me,t),o($,e,t),l(e,ce,t),o(y,e,t),l(e,$e,t),l(e,h,t),l(e,ye,t),o(T,e,t),l(e,he,t),l(e,w,t),l(e,Te,t),o(j,e,t),l(e,we,t),l(e,U,t),l(e,je,t),o(J,e,t),l(e,Ue,t),o(b,e,t),l(e,Je,t),o(I,e,t),l(e,be,t),l(e,G,t),l(e,Ie,t),o(v,e,t),l(e,Ge,t),o(C,e,t),l(e,ve,t),o(x,e,t),l(e,Ce,t),o(Z,e,t),l(e,xe,t),o(W,e,t),l(e,Ze,t),l(e,E,t),l(e,We,t),o(H,e,t),l(e,Ee,t),l(e,B,t),l(e,He,t),o(_,e,t),l(e,Be,t),l(e,k,t),l(e,_e,t),o(A,e,t),l(e,ke,t),l(e,F,t),l(e,Ae,t),o(S,e,t),l(e,Fe,t),o(q,e,t),l(e,Se,t),l(e,V,t),l(e,qe,t),o(L,e,t),l(e,Ve,t),o(z,e,t),l(e,Le,t),o(R,e,t),l(e,ze,t),o(X,e,t),l(e,Re,t),o(Y,e,t),l(e,Xe,t),o(N,e,t),l(e,Ye,t),o(Q,e,t),l(e,Ne,t),o(D,e,t),l(e,Qe,t),o(P,e,t),l(e,De,t),l(e,O,t),l(e,Pe,t),o(K,e,t),l(e,Oe,t),l(e,ee,t),l(e,Ke,t),o(te,e,t),l(e,et,t),o(ne,e,t),l(e,tt,t),l(e,le,t),l(e,nt,t),o(ie,e,t),l(e,lt,t),l(e,se,t),l(e,it,t),o(ae,e,t),l(e,st,t),l(e,re,t),l(e,at,t),l(e,oe,t),l(e,rt,t),l(e,pe,t),l(e,ot,t),o(me,e,t),l(e,pt,t),l(e,de,t),mt=!0},p:Zt,i(e){mt||(p($.$$.fragment,e),p(y.$$.fragment,e),p(T.$$.fragment,e),p(j.$$.fragment,e),p(J.$$.fragment,e),p(b.$$.fragment,e),p(I.$$.fragment,e),p(v.$$.fragment,e),p(C.$$.fragment,e),p(x.$$.fragment,e),p(Z.$$.fragment,e),p(W.$$.fragment,e),p(H.$$.fragment,e),p(_.$$.fragment,e),p(A.$$.fragment,e),p(S.$$.fragment,e),p(q.$$.fragment,e),p(L.$$.fragment,e),p(z.$$.fragment,e),p(R.$$.fragment,e),p(X.$$.fragment,e),p(Y.$$.fragment,e),p(N.$$.fragment,e),p(Q.$$.fragment,e),p(D.$$.fragment,e),p(P.$$.fragment,e),p(K.$$.fragment,e),p(te.$$.fragment,e),p(ne.$$.fragment,e),p(ie.$$.fragment,e),p(ae.$$.fragment,e),p(me.$$.fragment,e),mt=!0)},o(e){m($.$$.fragment,e),m(y.$$.fragment,e),m(T.$$.fragment,e),m(j.$$.fragment,e),m(J.$$.fragment,e),m(b.$$.fragment,e),m(I.$$.fragment,e),m(v.$$.fragment,e),m(C.$$.fragment,e),m(x.$$.fragment,e),m(Z.$$.fragment,e),m(W.$$.fragment,e),m(H.$$.fragment,e),m(_.$$.fragment,e),m(A.$$.fragment,e),m(S.$$.fragment,e),m(q.$$.fragment,e),m(L.$$.fragment,e),m(z.$$.fragment,e),m(R.$$.fragment,e),m(X.$$.fragment,e),m(Y.$$.fragment,e),m(N.$$.fragment,e),m(Q.$$.fragment,e),m(D.$$.fragment,e),m(P.$$.fragment,e),m(K.$$.fragment,e),m(te.$$.fragment,e),m(ne.$$.fragment,e),m(ie.$$.fragment,e),m(ae.$$.fragment,e),m(me.$$.fragment,e),mt=!1},d(e){e&&(n(ue),n(fe),n(Me),n(ce),n($e),n(h),n(ye),n(he),n(w),n(Te),n(we),n(U),n(je),n(Ue),n(Je),n(be),n(G),n(Ie),n(Ge),n(ve),n(Ce),n(xe),n(Ze),n(E),n(We),n(Ee),n(B),n(He),n(Be),n(k),n(_e),n(ke),n(F),n(Ae),n(Fe),n(Se),n(V),n(qe),n(Ve),n(Le),n(ze),n(Re),n(Xe),n(Ye),n(Ne),n(Qe),n(De),n(O),n(Pe),n(Oe),n(ee),n(Ke),n(et),n(tt),n(le),n(nt),n(lt),n(se),n(it),n(st),n(re),n(at),n(oe),n(rt),n(pe),n(ot),n(pt),n(de)),n(c),g($,e),g(y,e),g(T,e),g(j,e),g(J,e),g(b,e),g(I,e),g(v,e),g(C,e),g(x,e),g(Z,e),g(W,e),g(H,e),g(_,e),g(A,e),g(S,e),g(q,e),g(L,e),g(z,e),g(R,e),g(X,e),g(Y,e),g(N,e),g(Q,e),g(D,e),g(P,e),g(K,e),g(te,e),g(ne,e),g(ie,e),g(ae,e),g(me,e)}}}const St='{"title":"Using Hugging Face Inference Endpoints or TGI as Backend","local":"using-hugging-face-inference-endpoints-or-tgi-as-backend","sections":[{"title":"Hugging Face Inference Endpoints","local":"hugging-face-inference-endpoints","sections":[{"title":"Configuration File Example","local":"configuration-file-example","sections":[],"depth":3}],"depth":2},{"title":"Text Generation Inference (TGI)","local":"text-generation-inference-tgi","sections":[{"title":"Configuration File Example","local":"configuration-file-example","sections":[],"depth":3}],"depth":2},{"title":"Key Parameters","local":"key-parameters","sections":[{"title":"Hugging Face Inference Endpoints","local":"hugging-face-inference-endpoints","sections":[{"title":"Model Configuration","local":"model-configuration","sections":[],"depth":4},{"title":"Infrastructure Settings","local":"infrastructure-settings","sections":[],"depth":4},{"title":"Endpoint Configuration","local":"endpoint-configuration","sections":[],"depth":4},{"title":"Advanced Settings","local":"advanced-settings","sections":[],"depth":4}],"depth":3},{"title":"Text Generation Inference (TGI)","local":"text-generation-inference-tgi","sections":[{"title":"Server Configuration","local":"server-configuration","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"Usage Examples","local":"usage-examples","sections":[{"title":"Deploying a New Inference Endpoint","local":"deploying-a-new-inference-endpoint","sections":[],"depth":3},{"title":"Using an Existing TGI Server","local":"using-an-existing-tgi-server","sections":[],"depth":3},{"title":"Reusing an Existing Endpoint","local":"reusing-an-existing-endpoint","sections":[],"depth":3}],"depth":2},{"title":"Cost Management","local":"cost-management","sections":[{"title":"Inference Endpoints","local":"inference-endpoints","sections":[],"depth":3},{"title":"TGI Servers","local":"tgi-servers","sections":[],"depth":3}],"depth":2},{"title":"Troubleshooting","local":"troubleshooting","sections":[{"title":"Common Issues","local":"common-issues","sections":[],"depth":3},{"title":"Performance Tips","local":"performance-tips","sections":[],"depth":3},{"title":"Error Handling","local":"error-handling","sections":[],"depth":3}],"depth":2}],"depth":1}';function qt(gt){return Wt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Xt extends Et{constructor(c){super(),Ht(this,c,qt,Ft,xt,{})}}export{Xt as component}; | |
Xet Storage Details
- Size:
- 23.8 kB
- Xet hash:
- e97d256fb73ed7b7d908ccd87d5bcc0ac78a2469ccd11452056826c626dc63bd
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.