Buckets:

rtrm's picture
download
raw
23.8 kB
import{s as xt,n as Zt,o as Wt}from"../chunks/scheduler.3a17fb72.js";import{S as Et,i as Ht,e as d,s as i,c as a,h as Bt,a as u,d as n,b as s,f as vt,g as r,j as M,k as Ct,l as _t,m as l,n as o,t as p,o as m,p as g}from"../chunks/index.093f8863.js";import{C as kt,H as f,E as At}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.e28c70f3.js";import{C as ge}from"../chunks/CodeBlock.3509844b.js";function Ft(gt){let c,ue,fe,Me,$,ce,y,$e,h,ft=`An alternative to launching the evaluation locally is to serve the model on a
TGI-compatible server/container and then run the evaluation by sending requests
to the server. The command is the same as before, except you specify a path to
a YAML configuration file (detailed below):`,ye,T,he,w,dt=`There are two types of configuration files that can be provided for running on
the server:`,Te,j,we,U,ut=`To launch a model using Hugging Face’s Inference Endpoints, you need to provide
the following file: <code>endpoint_model.yaml</code>. Lighteval will automatically deploy
the endpoint, run the evaluation, and finally delete the endpoint (unless you
specify an endpoint that was already launched, in which case the endpoint won’t
be deleted afterwards).`,je,J,Ue,b,Je,I,be,G,Mt=`To use a model already deployed on a TGI server, for example on Hugging Face’s
serverless inference.`,Ie,v,Ge,C,ve,x,Ce,Z,xe,W,Ze,E,ct="<li><code>model_name</code>: The Hugging Face model ID to deploy</li> <li><code>revision</code>: Model revision (defaults to “main”)</li> <li><code>dtype</code>: Data type for model weights (“float16”, “bfloat16”, “4bit”, “8bit”, etc.)</li> <li><code>framework</code>: Framework to use (“pytorch”, “tensorflow”)</li>",We,H,Ee,B,$t="<li><code>accelerator</code>: Hardware accelerator (“gpu”, “cpu”)</li> <li><code>region</code>: AWS region for deployment</li> <li><code>vendor</code>: Cloud vendor (“aws”, “azure”, “gcp”)</li> <li><code>instance_type</code>: Instance type (e.g., “nvidia-a10g”, “nvidia-t4”)</li> <li><code>instance_size</code>: Instance size (“x1”, “x2”, etc.)</li>",He,_,Be,k,yt="<li><code>endpoint_type</code>: Endpoint access level (“public”, “protected”, “private”)</li> <li><code>namespace</code>: Organization namespace for deployment</li> <li><code>reuse_existing</code>: Whether to reuse an existing endpoint</li> <li><code>endpoint_name</code>: Custom endpoint name (lowercase, no special characters)</li>",_e,A,ke,F,ht="<li><code>image_url</code>: Custom Docker image URL</li> <li><code>env_vars</code>: Environment variables for the endpoint</li>",Ae,S,Fe,q,Se,V,Tt="<li><code>inference_server_address</code>: URL of the TGI server</li> <li><code>inference_server_auth</code>: Authentication credentials</li> <li><code>model_id</code>: Model identifier (if using local model directory)</li>",qe,L,Ve,z,Le,R,ze,X,Re,Y,Xe,N,Ye,Q,Ne,D,Qe,P,De,O,wt='<li>Endpoints are automatically deleted after evaluation (unless <code>reuse_existing: true</code>)</li> <li>Costs are based on instance type and runtime</li> <li>Monitor usage in the <a href="https://huggingface.co/settings/billing" rel="nofollow">Hugging Face billing dashboard</a></li>',Pe,K,Oe,ee,jt="<li>No additional costs beyond your existing server infrastructure</li> <li>Useful for cost-effective evaluation of already-deployed models</li>",Ke,te,et,ne,tt,le,Ut="<li><strong>Endpoint Deployment Failures</strong>: Check instance availability in your region</li> <li><strong>Authentication Errors</strong>: Ensure proper Hugging Face token permissions</li> <li><strong>Model Loading Errors</strong>: Verify model name and revision are correct</li> <li><strong>Resource Constraints</strong>: Choose appropriate instance type for your model size</li>",nt,ie,lt,se,Jt="<li>Use appropriate instance types for your model size</li> <li>Consider using quantized models (4bit, 8bit) for cost savings</li> <li>Reuse existing endpoints for multiple evaluations</li> <li>Use serverless TGI for cost-effective evaluation</li>",it,ae,st,re,bt="Common error messages and solutions:",at,oe,It="<li><strong>“Instance not available”</strong>: Try a different region or instance type</li> <li><strong>“Model not found”</strong>: Check the model name and revision</li> <li><strong>“Insufficient permissions”</strong>: Verify your Hugging Face token has endpoint deployment permissions</li> <li><strong>“Endpoint already exists”</strong>: Use <code>reuse_existing: true</code> or choose a different endpoint name</li>",rt,pe,Gt='For more detailed information about Hugging Face Inference Endpoints, see the <a href="https://huggingface.co/docs/inference-endpoints/" rel="nofollow">official documentation</a>.',ot,me,pt,de,mt;return $=new kt({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),y=new f({props:{title:"Using Hugging Face Inference Endpoints or TGI as Backend",local:"using-hugging-face-inference-endpoints-or-tgi-as-backend",headingTag:"h1"}}),T=new ge({props:{code:"bGlnaHRldmFsJTIwZW5kcG9pbnQlMjAlN0J0Z2klMkNpbmZlcmVuY2UtZW5kcG9pbnQlN0QlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjIlMkZwYXRoJTJGdG8lMkZjb25maWclMkZmaWxlJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTNDdGFza19wYXJhbWV0ZXJzJTNF",highlighted:`lighteval endpoint {tgi,inference-endpoint} \\
<span class="hljs-string">&quot;/path/to/config/file&quot;</span> \\
&lt;task_parameters&gt;`,wrap:!1}}),j=new f({props:{title:"Hugging Face Inference Endpoints",local:"hugging-face-inference-endpoints",headingTag:"h2"}}),J=new f({props:{title:"Configuration File Example",local:"configuration-file-example",headingTag:"h3"}}),b=new ge({props:{code:"bW9kZWxfcGFyYW1ldGVycyUzQSUwQSUyMCUyMCUyMCUyMHJldXNlX2V4aXN0aW5nJTNBJTIwZmFsc2UlMjAlMjMlMjBJZiUyMHRydWUlMkMlMjBpZ25vcmUlMjBhbGwlMjBwYXJhbXMlMjBpbiUyMGluc3RhbmNlJTJDJTIwYW5kJTIwZG9uJ3QlMjBkZWxldGUlMjB0aGUlMjBlbmRwb2ludCUyMGFmdGVyJTIwZXZhbHVhdGlvbiUwQSUyMCUyMCUyMCUyMCUyMyUyMGVuZHBvaW50X25hbWUlM0ElMjAlMjJsbGFtYS0yLTdCLWxpZ2h0ZXZhbCUyMiUyMCUyMyUyME5lZWRzJTIwdG8lMjBiZSUyMGxvd2VyY2FzZSUyMHdpdGhvdXQlMjBzcGVjaWFsJTIwY2hhcmFjdGVycyUwQSUyMCUyMCUyMCUyMG1vZGVsX25hbWUlM0ElMjAlMjJtZXRhLWxsYW1hJTJGTGxhbWEtMi03Yi1oZiUyMiUwQSUyMCUyMCUyMCUyMHJldmlzaW9uJTNBJTIwJTIybWFpbiUyMiUyMCUyMCUyMyUyMERlZmF1bHRzJTIwdG8lMjAlMjJtYWluJTIyJTBBJTIwJTIwJTIwJTIwZHR5cGUlM0ElMjAlMjJmbG9hdDE2JTIyJTIwJTIzJTIwQ2FuJTIwYmUlMjBhbnklMjBvZiUyMCUyMmF3cSUyMiUyQyUyMCUyMmVldHElMjIlMkMlMjAlMjJncHRxJTIyJTJDJTIwJTIyNGJpdCUyMiUyMG9yJTIwJTIyOGJpdCUyMiUyMCh3aWxsJTIwdXNlJTIwYml0c2FuZGJ5dGVzKSUyQyUyMCUyMmJmbG9hdDE2JTIyJTIwb3IlMjAlMjJmbG9hdDE2JTIyJTBBJTIwJTIwJTIwJTIwYWNjZWxlcmF0b3IlM0ElMjAlMjJncHUlMjIlMEElMjAlMjAlMjAlMjByZWdpb24lM0ElMjAlMjJldS13ZXN0LTElMjIlMEElMjAlMjAlMjAlMjB2ZW5kb3IlM0ElMjAlMjJhd3MlMjIlMEElMjAlMjAlMjAlMjBpbnN0YW5jZV90eXBlJTNBJTIwJTIybnZpZGlhLWExMGclMjIlMEElMjAlMjAlMjAlMjBpbnN0YW5jZV9zaXplJTNBJTIwJTIyeDElMjIlMEElMjAlMjAlMjAlMjBmcmFtZXdvcmslM0ElMjAlMjJweXRvcmNoJTIyJTBBJTIwJTIwJTIwJTIwZW5kcG9pbnRfdHlwZSUzQSUyMCUyMnByb3RlY3RlZCUyMiUwQSUyMCUyMCUyMCUyMG5hbWVzcGFjZSUzQSUyMG51bGwlMjAlMjMlMjBUaGUlMjBuYW1lc3BhY2UlMjB1bmRlciUyMHdoaWNoJTIwdG8lMjBsYXVuY2glMjB0aGUlMjBlbmRwb2ludC4lMjBEZWZhdWx0cyUyMHRvJTIwdGhlJTIwY3VycmVudCUyMHVzZXIncyUyMG5hbWVzcGFjZSUwQSUyMCUyMCUyMCUyMGltYWdlX3VybCUzQSUyMG51bGwlMjAlMjMlMjBPcHRpb25hbGx5JTIwc3BlY2lmeSUyMHRoZSUyMGRvY2tlciUyMGltYWdlJTIwdG8lMjB1c2UlMjB3aGVuJTIwbGF1bmNoaW5nJTIwdGhlJTIwZW5kcG9pbnQlMjBtb2RlbC4lMjBFLmcuJTJDJTIwbGF1bmNoaW5nJTIwbW9kZWxzJTIwd2l0aCUyMGxhdGVyJTIwcmVsZWFzZXMlMjBvZiUyMHRoZSUyMFRHSSUyMGNvbnRhaW5lciUyMHdpdGglMjBzdXBwb3J0JTIwZm9yJTIwbmV3ZXIlMjBtb2RlbHMuJTBBJTIwJTIwJTIwJTIwZW52X3ZhcnMlM0ElMjBudWxsJTIwJTIzJTIwT3B0aW9uYWwlMjBlbnZpcm9ubWVudCUyMHZhcmlhYmxlcyUyMHRvJTIwaW5jbHVkZSUyMHdoZW4lMjBsYXVuY2hpbmclMjB0aGUlMjBlbmRwb2ludC4lMjBlLmcuJTJDJTIwJTYwTUFYX0lOUFVUX0xFTkdUSCUzQSUyMDIwNDglNjA=",highlighted:`<span class="hljs-attr">model_parameters:</span>
<span class="hljs-attr">reuse_existing:</span> <span class="hljs-literal">false</span> <span class="hljs-comment"># If true, ignore all params in instance, and don&#x27;t delete the endpoint after evaluation</span>
<span class="hljs-comment"># endpoint_name: &quot;llama-2-7B-lighteval&quot; # Needs to be lowercase without special characters</span>
<span class="hljs-attr">model_name:</span> <span class="hljs-string">&quot;meta-llama/Llama-2-7b-hf&quot;</span>
<span class="hljs-attr">revision:</span> <span class="hljs-string">&quot;main&quot;</span> <span class="hljs-comment"># Defaults to &quot;main&quot;</span>
<span class="hljs-attr">dtype:</span> <span class="hljs-string">&quot;float16&quot;</span> <span class="hljs-comment"># Can be any of &quot;awq&quot;, &quot;eetq&quot;, &quot;gptq&quot;, &quot;4bit&quot; or &quot;8bit&quot; (will use bitsandbytes), &quot;bfloat16&quot; or &quot;float16&quot;</span>
<span class="hljs-attr">accelerator:</span> <span class="hljs-string">&quot;gpu&quot;</span>
<span class="hljs-attr">region:</span> <span class="hljs-string">&quot;eu-west-1&quot;</span>
<span class="hljs-attr">vendor:</span> <span class="hljs-string">&quot;aws&quot;</span>
<span class="hljs-attr">instance_type:</span> <span class="hljs-string">&quot;nvidia-a10g&quot;</span>
<span class="hljs-attr">instance_size:</span> <span class="hljs-string">&quot;x1&quot;</span>
<span class="hljs-attr">framework:</span> <span class="hljs-string">&quot;pytorch&quot;</span>
<span class="hljs-attr">endpoint_type:</span> <span class="hljs-string">&quot;protected&quot;</span>
<span class="hljs-attr">namespace:</span> <span class="hljs-literal">null</span> <span class="hljs-comment"># The namespace under which to launch the endpoint. Defaults to the current user&#x27;s namespace</span>
<span class="hljs-attr">image_url:</span> <span class="hljs-literal">null</span> <span class="hljs-comment"># Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models.</span>
<span class="hljs-attr">env_vars:</span> <span class="hljs-literal">null</span> <span class="hljs-comment"># Optional environment variables to include when launching the endpoint. e.g., \`MAX_INPUT_LENGTH: 2048\`</span>`,wrap:!1}}),I=new f({props:{title:"Text Generation Inference (TGI)",local:"text-generation-inference-tgi",headingTag:"h2"}}),v=new f({props:{title:"Configuration File Example",local:"configuration-file-example",headingTag:"h3"}}),C=new ge({props:{code:"bW9kZWxfcGFyYW1ldGVycyUzQSUwQSUyMCUyMCUyMCUyMGluZmVyZW5jZV9zZXJ2ZXJfYWRkcmVzcyUzQSUyMCUyMiUyMiUwQSUyMCUyMCUyMCUyMGluZmVyZW5jZV9zZXJ2ZXJfYXV0aCUzQSUyMG51bGwlMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUzQSUyMG51bGwlMjAlMjMlMjBPcHRpb25hbCUyQyUyMG9ubHklMjByZXF1aXJlZCUyMGlmJTIwdGhlJTIwVEdJJTIwY29udGFpbmVyJTIwd2FzJTIwbGF1bmNoZWQlMjB3aXRoJTIwbW9kZWxfaWQlMjBwb2ludGluZyUyMHRvJTIwYSUyMGxvY2FsJTIwZGlyZWN0b3J5",highlighted:`<span class="hljs-attr">model_parameters:</span>
<span class="hljs-attr">inference_server_address:</span> <span class="hljs-string">&quot;&quot;</span>
<span class="hljs-attr">inference_server_auth:</span> <span class="hljs-literal">null</span>
<span class="hljs-attr">model_id:</span> <span class="hljs-literal">null</span> <span class="hljs-comment"># Optional, only required if the TGI container was launched with model_id pointing to a local directory</span>`,wrap:!1}}),x=new f({props:{title:"Key Parameters",local:"key-parameters",headingTag:"h2"}}),Z=new f({props:{title:"Hugging Face Inference Endpoints",local:"hugging-face-inference-endpoints",headingTag:"h3"}}),W=new f({props:{title:"Model Configuration",local:"model-configuration",headingTag:"h4"}}),H=new f({props:{title:"Infrastructure Settings",local:"infrastructure-settings",headingTag:"h4"}}),_=new f({props:{title:"Endpoint Configuration",local:"endpoint-configuration",headingTag:"h4"}}),A=new f({props:{title:"Advanced Settings",local:"advanced-settings",headingTag:"h4"}}),S=new f({props:{title:"Text Generation Inference (TGI)",local:"text-generation-inference-tgi",headingTag:"h3"}}),q=new f({props:{title:"Server Configuration",local:"server-configuration",headingTag:"h4"}}),L=new f({props:{title:"Usage Examples",local:"usage-examples",headingTag:"h2"}}),z=new f({props:{title:"Deploying a New Inference Endpoint",local:"deploying-a-new-inference-endpoint",headingTag:"h3"}}),R=new ge({props:{code:"bGlnaHRldmFsJTIwZW5kcG9pbnQlMjBpbmZlcmVuY2UtZW5kcG9pbnQlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJjb25maWdzJTJGZW5kcG9pbnRfbW9kZWwueWFtbCUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMGdzbThr",highlighted:`lighteval endpoint inference-endpoint \\
<span class="hljs-string">&quot;configs/endpoint_model.yaml&quot;</span> \\
gsm8k`,wrap:!1}}),X=new f({props:{title:"Using an Existing TGI Server",local:"using-an-existing-tgi-server",headingTag:"h3"}}),Y=new ge({props:{code:"bGlnaHRldmFsJTIwZW5kcG9pbnQlMjB0Z2klMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJjb25maWdzJTJGdGdpX3NlcnZlci55YW1sJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwZ3NtOGs=",highlighted:`lighteval endpoint tgi \\
<span class="hljs-string">&quot;configs/tgi_server.yaml&quot;</span> \\
gsm8k`,wrap:!1}}),N=new f({props:{title:"Reusing an Existing Endpoint",local:"reusing-an-existing-endpoint",headingTag:"h3"}}),Q=new ge({props:{code:"bW9kZWxfcGFyYW1ldGVycyUzQSUwQSUyMCUyMCUyMCUyMHJldXNlX2V4aXN0aW5nJTNBJTIwdHJ1ZSUwQSUyMCUyMCUyMCUyMGVuZHBvaW50X25hbWUlM0ElMjAlMjJteS1leGlzdGluZy1lbmRwb2ludCUyMiUwQSUyMCUyMCUyMCUyMCUyMyUyME90aGVyJTIwcGFyYW1ldGVycyUyMHdpbGwlMjBiZSUyMGlnbm9yZWQlMjB3aGVuJTIwcmV1c2VfZXhpc3RpbmclMjBpcyUyMHRydWU=",highlighted:`<span class="hljs-attr">model_parameters:</span>
<span class="hljs-attr">reuse_existing:</span> <span class="hljs-literal">true</span>
<span class="hljs-attr">endpoint_name:</span> <span class="hljs-string">&quot;my-existing-endpoint&quot;</span>
<span class="hljs-comment"># Other parameters will be ignored when reuse_existing is true</span>`,wrap:!1}}),D=new f({props:{title:"Cost Management",local:"cost-management",headingTag:"h2"}}),P=new f({props:{title:"Inference Endpoints",local:"inference-endpoints",headingTag:"h3"}}),K=new f({props:{title:"TGI Servers",local:"tgi-servers",headingTag:"h3"}}),te=new f({props:{title:"Troubleshooting",local:"troubleshooting",headingTag:"h2"}}),ne=new f({props:{title:"Common Issues",local:"common-issues",headingTag:"h3"}}),ie=new f({props:{title:"Performance Tips",local:"performance-tips",headingTag:"h3"}}),ae=new f({props:{title:"Error Handling",local:"error-handling",headingTag:"h3"}}),me=new At({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/use-huggingface-inference-endpoints-or-tgi-as-backend.mdx"}}),{c(){c=d("meta"),ue=i(),fe=d("p"),Me=i(),a($.$$.fragment),ce=i(),a(y.$$.fragment),$e=i(),h=d("p"),h.textContent=ft,ye=i(),a(T.$$.fragment),he=i(),w=d("p"),w.textContent=dt,Te=i(),a(j.$$.fragment),we=i(),U=d("p"),U.innerHTML=ut,je=i(),a(J.$$.fragment),Ue=i(),a(b.$$.fragment),Je=i(),a(I.$$.fragment),be=i(),G=d("p"),G.textContent=Mt,Ie=i(),a(v.$$.fragment),Ge=i(),a(C.$$.fragment),ve=i(),a(x.$$.fragment),Ce=i(),a(Z.$$.fragment),xe=i(),a(W.$$.fragment),Ze=i(),E=d("ul"),E.innerHTML=ct,We=i(),a(H.$$.fragment),Ee=i(),B=d("ul"),B.innerHTML=$t,He=i(),a(_.$$.fragment),Be=i(),k=d("ul"),k.innerHTML=yt,_e=i(),a(A.$$.fragment),ke=i(),F=d("ul"),F.innerHTML=ht,Ae=i(),a(S.$$.fragment),Fe=i(),a(q.$$.fragment),Se=i(),V=d("ul"),V.innerHTML=Tt,qe=i(),a(L.$$.fragment),Ve=i(),a(z.$$.fragment),Le=i(),a(R.$$.fragment),ze=i(),a(X.$$.fragment),Re=i(),a(Y.$$.fragment),Xe=i(),a(N.$$.fragment),Ye=i(),a(Q.$$.fragment),Ne=i(),a(D.$$.fragment),Qe=i(),a(P.$$.fragment),De=i(),O=d("ul"),O.innerHTML=wt,Pe=i(),a(K.$$.fragment),Oe=i(),ee=d("ul"),ee.innerHTML=jt,Ke=i(),a(te.$$.fragment),et=i(),a(ne.$$.fragment),tt=i(),le=d("ol"),le.innerHTML=Ut,nt=i(),a(ie.$$.fragment),lt=i(),se=d("ul"),se.innerHTML=Jt,it=i(),a(ae.$$.fragment),st=i(),re=d("p"),re.textContent=bt,at=i(),oe=d("ul"),oe.innerHTML=It,rt=i(),pe=d("p"),pe.innerHTML=Gt,ot=i(),a(me.$$.fragment),pt=i(),de=d("p"),this.h()},l(e){const t=Bt("svelte-u9bgzb",document.head);c=u(t,"META",{name:!0,content:!0}),t.forEach(n),ue=s(e),fe=u(e,"P",{}),vt(fe).forEach(n),Me=s(e),r($.$$.fragment,e),ce=s(e),r(y.$$.fragment,e),$e=s(e),h=u(e,"P",{"data-svelte-h":!0}),M(h)!=="svelte-ik3alt"&&(h.textContent=ft),ye=s(e),r(T.$$.fragment,e),he=s(e),w=u(e,"P",{"data-svelte-h":!0}),M(w)!=="svelte-198jur5"&&(w.textContent=dt),Te=s(e),r(j.$$.fragment,e),we=s(e),U=u(e,"P",{"data-svelte-h":!0}),M(U)!=="svelte-13g2grh"&&(U.innerHTML=ut),je=s(e),r(J.$$.fragment,e),Ue=s(e),r(b.$$.fragment,e),Je=s(e),r(I.$$.fragment,e),be=s(e),G=u(e,"P",{"data-svelte-h":!0}),M(G)!=="svelte-s83khs"&&(G.textContent=Mt),Ie=s(e),r(v.$$.fragment,e),Ge=s(e),r(C.$$.fragment,e),ve=s(e),r(x.$$.fragment,e),Ce=s(e),r(Z.$$.fragment,e),xe=s(e),r(W.$$.fragment,e),Ze=s(e),E=u(e,"UL",{"data-svelte-h":!0}),M(E)!=="svelte-catr4m"&&(E.innerHTML=ct),We=s(e),r(H.$$.fragment,e),Ee=s(e),B=u(e,"UL",{"data-svelte-h":!0}),M(B)!=="svelte-wyhyn3"&&(B.innerHTML=$t),He=s(e),r(_.$$.fragment,e),Be=s(e),k=u(e,"UL",{"data-svelte-h":!0}),M(k)!=="svelte-zesftx"&&(k.innerHTML=yt),_e=s(e),r(A.$$.fragment,e),ke=s(e),F=u(e,"UL",{"data-svelte-h":!0}),M(F)!=="svelte-chc4s7"&&(F.innerHTML=ht),Ae=s(e),r(S.$$.fragment,e),Fe=s(e),r(q.$$.fragment,e),Se=s(e),V=u(e,"UL",{"data-svelte-h":!0}),M(V)!=="svelte-x57ldu"&&(V.innerHTML=Tt),qe=s(e),r(L.$$.fragment,e),Ve=s(e),r(z.$$.fragment,e),Le=s(e),r(R.$$.fragment,e),ze=s(e),r(X.$$.fragment,e),Re=s(e),r(Y.$$.fragment,e),Xe=s(e),r(N.$$.fragment,e),Ye=s(e),r(Q.$$.fragment,e),Ne=s(e),r(D.$$.fragment,e),Qe=s(e),r(P.$$.fragment,e),De=s(e),O=u(e,"UL",{"data-svelte-h":!0}),M(O)!=="svelte-1hz2o9r"&&(O.innerHTML=wt),Pe=s(e),r(K.$$.fragment,e),Oe=s(e),ee=u(e,"UL",{"data-svelte-h":!0}),M(ee)!=="svelte-rh1fr3"&&(ee.innerHTML=jt),Ke=s(e),r(te.$$.fragment,e),et=s(e),r(ne.$$.fragment,e),tt=s(e),le=u(e,"OL",{"data-svelte-h":!0}),M(le)!=="svelte-1p1agi9"&&(le.innerHTML=Ut),nt=s(e),r(ie.$$.fragment,e),lt=s(e),se=u(e,"UL",{"data-svelte-h":!0}),M(se)!=="svelte-p8wan0"&&(se.innerHTML=Jt),it=s(e),r(ae.$$.fragment,e),st=s(e),re=u(e,"P",{"data-svelte-h":!0}),M(re)!=="svelte-19pa54u"&&(re.textContent=bt),at=s(e),oe=u(e,"UL",{"data-svelte-h":!0}),M(oe)!=="svelte-1rwk4qm"&&(oe.innerHTML=It),rt=s(e),pe=u(e,"P",{"data-svelte-h":!0}),M(pe)!=="svelte-1o55mx1"&&(pe.innerHTML=Gt),ot=s(e),r(me.$$.fragment,e),pt=s(e),de=u(e,"P",{}),vt(de).forEach(n),this.h()},h(){Ct(c,"name","hf:doc:metadata"),Ct(c,"content",St)},m(e,t){_t(document.head,c),l(e,ue,t),l(e,fe,t),l(e,Me,t),o($,e,t),l(e,ce,t),o(y,e,t),l(e,$e,t),l(e,h,t),l(e,ye,t),o(T,e,t),l(e,he,t),l(e,w,t),l(e,Te,t),o(j,e,t),l(e,we,t),l(e,U,t),l(e,je,t),o(J,e,t),l(e,Ue,t),o(b,e,t),l(e,Je,t),o(I,e,t),l(e,be,t),l(e,G,t),l(e,Ie,t),o(v,e,t),l(e,Ge,t),o(C,e,t),l(e,ve,t),o(x,e,t),l(e,Ce,t),o(Z,e,t),l(e,xe,t),o(W,e,t),l(e,Ze,t),l(e,E,t),l(e,We,t),o(H,e,t),l(e,Ee,t),l(e,B,t),l(e,He,t),o(_,e,t),l(e,Be,t),l(e,k,t),l(e,_e,t),o(A,e,t),l(e,ke,t),l(e,F,t),l(e,Ae,t),o(S,e,t),l(e,Fe,t),o(q,e,t),l(e,Se,t),l(e,V,t),l(e,qe,t),o(L,e,t),l(e,Ve,t),o(z,e,t),l(e,Le,t),o(R,e,t),l(e,ze,t),o(X,e,t),l(e,Re,t),o(Y,e,t),l(e,Xe,t),o(N,e,t),l(e,Ye,t),o(Q,e,t),l(e,Ne,t),o(D,e,t),l(e,Qe,t),o(P,e,t),l(e,De,t),l(e,O,t),l(e,Pe,t),o(K,e,t),l(e,Oe,t),l(e,ee,t),l(e,Ke,t),o(te,e,t),l(e,et,t),o(ne,e,t),l(e,tt,t),l(e,le,t),l(e,nt,t),o(ie,e,t),l(e,lt,t),l(e,se,t),l(e,it,t),o(ae,e,t),l(e,st,t),l(e,re,t),l(e,at,t),l(e,oe,t),l(e,rt,t),l(e,pe,t),l(e,ot,t),o(me,e,t),l(e,pt,t),l(e,de,t),mt=!0},p:Zt,i(e){mt||(p($.$$.fragment,e),p(y.$$.fragment,e),p(T.$$.fragment,e),p(j.$$.fragment,e),p(J.$$.fragment,e),p(b.$$.fragment,e),p(I.$$.fragment,e),p(v.$$.fragment,e),p(C.$$.fragment,e),p(x.$$.fragment,e),p(Z.$$.fragment,e),p(W.$$.fragment,e),p(H.$$.fragment,e),p(_.$$.fragment,e),p(A.$$.fragment,e),p(S.$$.fragment,e),p(q.$$.fragment,e),p(L.$$.fragment,e),p(z.$$.fragment,e),p(R.$$.fragment,e),p(X.$$.fragment,e),p(Y.$$.fragment,e),p(N.$$.fragment,e),p(Q.$$.fragment,e),p(D.$$.fragment,e),p(P.$$.fragment,e),p(K.$$.fragment,e),p(te.$$.fragment,e),p(ne.$$.fragment,e),p(ie.$$.fragment,e),p(ae.$$.fragment,e),p(me.$$.fragment,e),mt=!0)},o(e){m($.$$.fragment,e),m(y.$$.fragment,e),m(T.$$.fragment,e),m(j.$$.fragment,e),m(J.$$.fragment,e),m(b.$$.fragment,e),m(I.$$.fragment,e),m(v.$$.fragment,e),m(C.$$.fragment,e),m(x.$$.fragment,e),m(Z.$$.fragment,e),m(W.$$.fragment,e),m(H.$$.fragment,e),m(_.$$.fragment,e),m(A.$$.fragment,e),m(S.$$.fragment,e),m(q.$$.fragment,e),m(L.$$.fragment,e),m(z.$$.fragment,e),m(R.$$.fragment,e),m(X.$$.fragment,e),m(Y.$$.fragment,e),m(N.$$.fragment,e),m(Q.$$.fragment,e),m(D.$$.fragment,e),m(P.$$.fragment,e),m(K.$$.fragment,e),m(te.$$.fragment,e),m(ne.$$.fragment,e),m(ie.$$.fragment,e),m(ae.$$.fragment,e),m(me.$$.fragment,e),mt=!1},d(e){e&&(n(ue),n(fe),n(Me),n(ce),n($e),n(h),n(ye),n(he),n(w),n(Te),n(we),n(U),n(je),n(Ue),n(Je),n(be),n(G),n(Ie),n(Ge),n(ve),n(Ce),n(xe),n(Ze),n(E),n(We),n(Ee),n(B),n(He),n(Be),n(k),n(_e),n(ke),n(F),n(Ae),n(Fe),n(Se),n(V),n(qe),n(Ve),n(Le),n(ze),n(Re),n(Xe),n(Ye),n(Ne),n(Qe),n(De),n(O),n(Pe),n(Oe),n(ee),n(Ke),n(et),n(tt),n(le),n(nt),n(lt),n(se),n(it),n(st),n(re),n(at),n(oe),n(rt),n(pe),n(ot),n(pt),n(de)),n(c),g($,e),g(y,e),g(T,e),g(j,e),g(J,e),g(b,e),g(I,e),g(v,e),g(C,e),g(x,e),g(Z,e),g(W,e),g(H,e),g(_,e),g(A,e),g(S,e),g(q,e),g(L,e),g(z,e),g(R,e),g(X,e),g(Y,e),g(N,e),g(Q,e),g(D,e),g(P,e),g(K,e),g(te,e),g(ne,e),g(ie,e),g(ae,e),g(me,e)}}}const St='{"title":"Using Hugging Face Inference Endpoints or TGI as Backend","local":"using-hugging-face-inference-endpoints-or-tgi-as-backend","sections":[{"title":"Hugging Face Inference Endpoints","local":"hugging-face-inference-endpoints","sections":[{"title":"Configuration File Example","local":"configuration-file-example","sections":[],"depth":3}],"depth":2},{"title":"Text Generation Inference (TGI)","local":"text-generation-inference-tgi","sections":[{"title":"Configuration File Example","local":"configuration-file-example","sections":[],"depth":3}],"depth":2},{"title":"Key Parameters","local":"key-parameters","sections":[{"title":"Hugging Face Inference Endpoints","local":"hugging-face-inference-endpoints","sections":[{"title":"Model Configuration","local":"model-configuration","sections":[],"depth":4},{"title":"Infrastructure Settings","local":"infrastructure-settings","sections":[],"depth":4},{"title":"Endpoint Configuration","local":"endpoint-configuration","sections":[],"depth":4},{"title":"Advanced Settings","local":"advanced-settings","sections":[],"depth":4}],"depth":3},{"title":"Text Generation Inference (TGI)","local":"text-generation-inference-tgi","sections":[{"title":"Server Configuration","local":"server-configuration","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"Usage Examples","local":"usage-examples","sections":[{"title":"Deploying a New Inference Endpoint","local":"deploying-a-new-inference-endpoint","sections":[],"depth":3},{"title":"Using an Existing TGI Server","local":"using-an-existing-tgi-server","sections":[],"depth":3},{"title":"Reusing an Existing Endpoint","local":"reusing-an-existing-endpoint","sections":[],"depth":3}],"depth":2},{"title":"Cost Management","local":"cost-management","sections":[{"title":"Inference Endpoints","local":"inference-endpoints","sections":[],"depth":3},{"title":"TGI Servers","local":"tgi-servers","sections":[],"depth":3}],"depth":2},{"title":"Troubleshooting","local":"troubleshooting","sections":[{"title":"Common Issues","local":"common-issues","sections":[],"depth":3},{"title":"Performance Tips","local":"performance-tips","sections":[],"depth":3},{"title":"Error Handling","local":"error-handling","sections":[],"depth":3}],"depth":2}],"depth":1}';function qt(gt){return Wt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Xt extends Et{constructor(c){super(),Ht(this,c,qt,Ft,xt,{})}}export{Xt as component};

Xet Storage Details

Size:
23.8 kB
·
Xet hash:
e97d256fb73ed7b7d908ccd87d5bcc0ac78a2469ccd11452056826c626dc63bd

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.