Buckets:

rtrm's picture
download
raw
45.9 kB
import{s as us,o as ms,n as A}from"../chunks/scheduler.b108d059.js";import{S as hs,i as fs,g as i,s as n,r as u,A as Ms,h as p,f as l,c as a,j as nt,u as m,x as r,k as rs,y as Q,a as s,v as h,d as f,t as M,w as d}from"../chunks/index.008de539.js";import{T as F}from"../chunks/Tip.aeb15ab7.js";import{C as U}from"../chunks/CodeBlock.7b00c886.js";import{H as N,E as ds}from"../chunks/index.2bf9b47d.js";function gs(w){let o,$='Installing the <code>gke-gcloud-auth-plugin</code> does not need to be installed via <code>gcloud</code> specifically, to read more about the alternative installation methods, please visit <a href="https://cloud.google.com/kubernetes-engine/docs/how-to/cluster-access-for-kubectl#install_plugin" rel="nofollow">https://cloud.google.com/kubernetes-engine/docs/how-to/cluster-access-for-kubectl#install_plugin</a>.';return{c(){o=i("p"),o.innerHTML=$},l(c){o=p(c,"P",{"data-svelte-h":!0}),r(o)!=="svelte-sm1xw4"&&(o.innerHTML=$)},m(c,g){s(c,o,g)},p:A,d(c){c&&l(o)}}}function ys(w){let o,$='Important to check before creating the GKE Autopilot Cluster the <a href="https://cloud.google.com/kubernetes-engine/docs/how-to/performance-pods" rel="nofollow">GKE Documentation - Optimize Autopilot Pod performance by choosing a machine series</a>, since not all the versions support GPU accelerators e.g. <code>nvidia-l4</code> is not supported in the GKE cluster versions 1.28.3 or lower.';return{c(){o=i("p"),o.innerHTML=$},l(c){o=p(c,"P",{"data-svelte-h":!0}),r(o)!=="svelte-1cdm5nv"&&(o.innerHTML=$)},m(c,g){s(c,o,g)},p:A,d(c){c&&l(o)}}}function bs(w){let o,$="To select the specific version in your location of the GKE Cluster, you can run the following command:",c,g,T,y,j='For more information please visit <a href="https://cloud.google.com/kubernetes-engine/versioning#specifying_cluster_version" rel="nofollow">https://cloud.google.com/kubernetes-engine/versioning#specifying_cluster_version</a>.',G;return g=new U({props:{code:"Z2Nsb3VkJTIwY29udGFpbmVyJTIwZ2V0LXNlcnZlci1jb25maWclMjAlNUMlMEElMjAlMjAlMjAlMjAtLWZsYXR0ZW4lM0QlMjJjaGFubmVscyUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tZmlsdGVyJTNEJTIyY2hhbm5lbHMuY2hhbm5lbCUzRFNUQUJMRSUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tZm9ybWF0JTNEJTIyeWFtbChjaGFubmVscy5jaGFubmVsJTJDY2hhbm5lbHMuZGVmYXVsdFZlcnNpb24pJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1sb2NhdGlvbiUzRCUyNExPQ0FUSU9O",highlighted:`gcloud container get-server-config \\
--flatten=<span class="hljs-string">&quot;channels&quot;</span> \\
--filter=<span class="hljs-string">&quot;channels.channel=STABLE&quot;</span> \\
--format=<span class="hljs-string">&quot;yaml(channels.channel,channels.defaultVersion)&quot;</span> \\
--location=<span class="hljs-variable">$LOCATION</span>`,wrap:!1}}),{c(){o=i("p"),o.textContent=$,c=n(),u(g.$$.fragment),T=n(),y=i("p"),y.innerHTML=j},l(b){o=p(b,"P",{"data-svelte-h":!0}),r(o)!=="svelte-a3yff9"&&(o.textContent=$),c=a(b),m(g.$$.fragment,b),T=a(b),y=p(b,"P",{"data-svelte-h":!0}),r(y)!=="svelte-1y4gd42"&&(y.innerHTML=j)},m(b,J){s(b,o,J),s(b,c,J),h(g,b,J),s(b,T,J),s(b,y,J),G=!0},p:A,i(b){G||(f(g.$$.fragment,b),G=!0)},o(b){M(g.$$.fragment,b),G=!1},d(b){b&&(l(o),l(c),l(T),l(y)),d(g,b)}}}function $s(w){let o,$="Make sure to set the proper permissions to run the script i.e. <code>chmod +x scripts/upload_model_to_gcs.sh</code>.";return{c(){o=i("p"),o.innerHTML=$},l(c){o=p(c,"P",{"data-svelte-h":!0}),r(o)!=="svelte-1wjj60h"&&(o.innerHTML=$)},m(c,g){s(c,o,g)},p:A,d(c){c&&l(o)}}}function Cs(w){let o,$='To explore all the models that can be served via TGI, you can explore the models tagged with <code>text-generation-inference</code> in the Hub at <a href="https://huggingface.co/models?other=text-generation-inference" rel="nofollow">https://huggingface.co/models?other=text-generation-inference</a>.';return{c(){o=i("p"),o.innerHTML=$},l(c){o=p(c,"P",{"data-svelte-h":!0}),r(o)!=="svelte-1yok7mw"&&(o.innerHTML=$)},m(c,g){s(c,o,g)},p:A,d(c){c&&l(o)}}}function Us(w){let o,$="The Kubernetes deployment may take a few minutes to be ready, so you can check the status of the deployment with the following command:",c,g,T,y,j="Alternatively, you can just wait for the deployment to be ready with the following command:",G,b,J;return g=new U({props:{code:"a3ViZWN0bCUyMGdldCUyMHBvZHMlMjAtLW5hbWVzcGFjZSUyMCUyNE5BTUVTUEFDRQ==",highlighted:'kubectl get pods --namespace <span class="hljs-variable">$NAMESPACE</span>',wrap:!1}}),b=new U({props:{code:"a3ViZWN0bCUyMHdhaXQlMjAtLWZvciUzRGNvbmRpdGlvbiUzREF2YWlsYWJsZSUyMC0tdGltZW91dCUzRDcwMHMlMjAtLW5hbWVzcGFjZSUyMCUyNE5BTUVTUEFDRSUyMGRlcGxveW1lbnQlMkZ0Z2ktZGVwbG95bWVudA==",highlighted:'kubectl <span class="hljs-built_in">wait</span> --<span class="hljs-keyword">for</span>=condition=Available --<span class="hljs-built_in">timeout</span>=700s --namespace <span class="hljs-variable">$NAMESPACE</span> deployment/tgi-deployment',wrap:!1}}),{c(){o=i("p"),o.textContent=$,c=n(),u(g.$$.fragment),T=n(),y=i("p"),y.textContent=j,G=n(),u(b.$$.fragment)},l(C){o=p(C,"P",{"data-svelte-h":!0}),r(o)!=="svelte-qgh43p"&&(o.textContent=$),c=a(C),m(g.$$.fragment,C),T=a(C),y=p(C,"P",{"data-svelte-h":!0}),r(y)!=="svelte-16v3quh"&&(y.textContent=j),G=a(C),m(b.$$.fragment,C)},m(C,v){s(C,o,v),s(C,c,v),h(g,C,v),s(C,T,v),s(C,y,v),s(C,G,v),h(b,C,v),J=!0},p:A,i(C){J||(f(g.$$.fragment,C),f(b.$$.fragment,C),J=!0)},o(C){M(g.$$.fragment,C),M(b.$$.fragment,C),J=!1},d(C){C&&(l(o),l(c),l(T),l(y),l(G)),d(g,C),d(b,C)}}}function Ts(w){let o,$="To generate the <code>inputs</code> with the expected chat template formatting, you could use the following snippet:",c,g,T;return g=new U({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJRd2VuJTJGUXdlbjItN0ItSW5zdHJ1Y3QlMjIpJTBBdG9rZW5pemVyLmFwcGx5X2NoYXRfdGVtcGxhdGUoJTBBJTIwJTIwJTIwJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnN5c3RlbSUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJZb3UlMjBhcmUlMjBhJTIwaGVscGZ1bCUyMGFzc2lzdGFudC4lMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJXaGF0JTIwaXMlMjAyJTJCMiUzRiUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCU1RCUyQyUwQSUyMCUyMCUyMCUyMHRva2VuaXplJTNERmFsc2UlMkMlMEElMjAlMjAlMjAlMjBhZGRfZ2VuZXJhdGlvbl9wcm9tcHQlM0RUcnVlJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;Qwen/Qwen2-7B-Instruct&quot;</span>)
tokenizer.apply_chat_template(
[
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;system&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;You are a helpful assistant.&quot;</span>},
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What is 2+2?&quot;</span>},
],
tokenize=<span class="hljs-literal">False</span>,
add_generation_prompt=<span class="hljs-literal">True</span>,
)`,wrap:!1}}),{c(){o=i("p"),o.innerHTML=$,c=n(),u(g.$$.fragment)},l(y){o=p(y,"P",{"data-svelte-h":!0}),r(o)!=="svelte-mve1xj"&&(o.innerHTML=$),c=a(y),m(g.$$.fragment,y)},m(y,j){s(y,o,j),s(y,c,j),h(g,y,j),T=!0},p:A,i(y){T||(f(g.$$.fragment,y),T=!0)},o(y){M(g.$$.fragment,y),T=!1},d(y){y&&(l(o),l(c)),d(g,y)}}}function ws(w){let o,$='📍 Find the complete example on GitHub <a href="https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tgi-from-gcs-deployment" rel="nofollow">here</a>!';return{c(){o=i("p"),o.innerHTML=$},l(c){o=p(c,"P",{"data-svelte-h":!0}),r(o)!=="svelte-14jmklj"&&(o.innerHTML=$)},m(c,g){s(c,o,g)},p:A,d(c){c&&l(o)}}}function Js(w){let o,$,c,g,T,y,j,G="Qwen2 is the new series of Qwen Large Language Models (LLMs) built by Alibaba Cloud, with both base and instruction-tuned language models ranging from 0.5 to 72 billion parameters, including a Mixture-of-Experts model; the 7B variant sitting in the second place in the 7B size range in the Open LLM Leaderboard by Hugging Face and the 72B one in the first place amongst any size. Text Generation Inference (TGI) is a toolkit developed by Hugging Face for deploying and serving LLMs, with high performance text generation. And, Google Kubernetes Engine (GKE) is a fully-managed Kubernetes service in Google Cloud that can be used to deploy and operate containerized applications at scale using GCP’s infrastructure.",b,J,C="This example showcases how to deploy an LLM from a Google Cloud Storage (GCS) Bucket on a GKE Cluster running a purpose-built container to deploy LLMs in a secure and managed environment with the Hugging Face DLC for TGI.",v,S,at,B,jl="First, you need to install both <code>gcloud</code> and <code>kubectl</code> in your local machine, which are the command-line tools for Google Cloud and Kubernetes, respectively, to interact with the GCP and the GKE Cluster.",ot,L,Gl='<li>To install <code>gcloud</code>, follow the instructions at <a href="https://cloud.google.com/sdk/docs/install" rel="nofollow">Cloud SDK Documentation - Install the gcloud CLI</a>.</li> <li>To install <code>kubectl</code>, follow the instructions at <a href="https://kubernetes.io/docs/tasks/tools/#kubectl" rel="nofollow">Kubernetes Documentation - Install Tools</a>.</li>',it,H,vl="Optionally, to ease the usage of the commands within this tutorial, you need to set the following environment variables for GCP:",pt,Y,ct,z,Nl="Then you need to login into your GCP account and set the project ID to the one you want to use for the deployment of the GKE Cluster.",rt,X,ut,q,Il="Once you are logged in, you need to enable the necessary service APIs in GCP, such as the Google Kubernetes Engine API, the Google Container Registry API, and the Google Container File System API, which are necessary for the deployment of the GKE Cluster and the Hugging Face DLC for TGI.",mt,P,ht,D,kl="Additionally, to use <code>kubectl</code> with the GKE Cluster credentials, you also need to install the <code>gke-gcloud-auth-plugin</code>, that can be installed with <code>gcloud</code> as follows:",ft,O,Mt,I,dt,K,gt,ee,Zl="Once everything is set up, you can proceed with the creation of the GKE Cluster and the node pool, which in this case will be a single GPU node, in order to use the GPU accelerator for high performance inference, also following TGI recommendations based on their internal optimizations for GPUs.",yt,te,xl="To deploy the GKE Cluster, the “Autopilot” mode will be used as it is the recommended one for most of the workloads, since the underlying infrastructure is managed by Google. Alternatively, you can also use the “Standard” mode.",bt,k,$t,le,Ct,Z,Ut,se,Vl='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/main/examples/gke/tgi-from-gcs-deployment/imgs/gke-cluster.png" alt="GKE Cluster in the GCP Console"/>',Tt,ne,El="Once the GKE Cluster is created, you can get the credentials to access it via <code>kubectl</code> with the following command:",wt,ae,Jt,oe,jt,ie,Rl="This is an optional step in the tutorial, since you may want to reuse an existing model on a GCS Bucket, if that is the case, then feel free to jump to the next step of the tutorial on how to configure the IAM for GCS so that you can access the bucket from a pod in the GKE Cluster.",Gt,pe,_l='Otherwise, to upload a model from the Hugging Face Hub to a GCS Bucket, you can use the script <a href="https://github.com/huggingface/Google-Cloud-Containers/blob/main/scripts/upload_model_to_gcs.sh" rel="nofollow">scripts/upload_model_to_gcs.sh</a>, which will download the model from the Hugging Face Hub and upload it to the GCS Bucket (and create the bucket if not created already).',vt,ce,Wl="The <code>gsutil</code> component should be installed via <code>gcloud</code>, and the Python packages <code>huggingface_hub</code> with the extra <code>hf_transfer</code>, and the package <code>crcmod</code> should also be installed.",Nt,re,It,ue,Ql="Then, you can run the script to download the model from the Hugging Face Hub and then upload it to the GCS Bucket:",kt,x,Zt,me,xt,he,Fl='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/main/examples/gke/tgi-from-gcs-deployment/imgs/gcs-bucket.png" alt="GCS Bucket in the GCP Console"/>',Vt,fe,Et,Me,Al="Before you proceed with the deployment of the Hugging Face DLC for TGI on the GKE Cluster, you need to set the IAM permissions for the GCS Bucket so that the pod in the GKE Cluster can access the bucket. To do so, you need to create a namespace and a service account in the GKE Cluster, and then set the IAM permissions for the GCS Bucket that contains the model, either as uploaded from the Hugging Face Hub or as already existing in the GCS Bucket.",Rt,de,Sl="For convenience, as the reference to both the namespace and the service account will be used within the following steps, the environment variables <code>NAMESPACE</code> and <code>SERVICE_ACCOUNT</code> will be set.",_t,ge,Wt,ye,Bl="Then you can create the namespace and the service account in the GKE Cluster, enabling the creation of the IAM permissions for the pods in that namespace to access the GCS Bucket when using that service account.",Qt,be,Ft,$e,Ll="Then you need to add the IAM policy binding to the bucket as follows:",At,Ce,St,Ue,Bt,Te,Hl='Now you can proceed to the Kubernetes deployment of the Hugging Face DLC for TGI, serving the <a href="https://huggingface.co/Qwen/Qwen2-7B-Instruct" rel="nofollow"><code>Qwen/Qwen2-7B-Instruct</code></a> model, from a volume mounted in <code>/data</code>, copied from the GCS Bucket where the model is located.',Lt,V,Ht,we,Yl='The Hugging Face DLC for TGI will be deployed via <code>kubectl</code>, from the configuration files in the <a href="https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tgi-from-gcs-deployment/config/" rel="nofollow"><code>config/</code></a> directory:',Yt,Je,zl='<li><a href="https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tgi-from-gcs-deployment/config/deployment.yaml" rel="nofollow"><code>deployment.yaml</code></a>: contains the deployment details of the pod including the reference to the Hugging Face DLC for TGI setting the <code>MODEL_ID</code> to the model path in the volume mount, in this case <code>/data/Qwen2-7B-Instruct</code>.</li> <li><a href="https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tgi-from-gcs-deployment/config/service.yaml" rel="nofollow"><code>service.yaml</code></a>: contains the service details of the pod, exposing the port 8080 for the TGI service.</li> <li><a href="https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tgi-from-gcs-deployment/config/storageclass.yaml" rel="nofollow"><code>storageclass.yaml</code></a>: contains the storage class details of the pod, defining the storage class for the volume mount.</li> <li>(optional) <a href="https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tgi-from-gcs-deployment/config/ingress.yaml" rel="nofollow"><code>ingress.yaml</code></a>: contains the ingress details of the pod, exposing the service to the external world so that it can be accessed via the ingress IP.</li>',zt,je,Xt,Ge,Xl='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/main/examples/gke/tgi-from-gcs-deployment/imgs/gke-deployment.png" alt="GKE Deployment in the GCP Console"/>',qt,E,Pt,ve,Dt,Ne,ql="To run the inference over the deployed TGI service, you can either:",Ot,R,Ie,tt,Pl="Port-forwarding the deployed TGI service to the port 8080, so as to access via <code>localhost</code> with the command:",Tl,ke,wl,Ze,lt,Dl="Accessing the TGI service via the external IP of the ingress, which is the default scenario here since you have defined the ingress configuration in the <code>config/ingress.yaml</code> file (but it can be skipped in favour of the port-forwarding), that can be retrieved with the following command:",Jl,xe,Kt,Ve,el,Ee,Ol="To send a POST request to the TGI service using <code>cURL</code>, you can run the following command:",tl,Re,ll,_e,Kl="Or send a POST request to the ingress IP instead:",sl,We,nl,Qe,es="Which produces the following output:",al,Fe,ol,_,il,Ae,pl,Se,ts='To run the inference using Python, you can use the <code>openai</code> Python SDK (see the installation notes at <a href="https://platform.openai.com/docs/quickstart" rel="nofollow">https://platform.openai.com/docs/quickstart</a>), setting either the localhost or the ingress IP as the <code>base_url</code> for the client, and then running the following code:',cl,Be,rl,Le,ls="Which produces the following output:",ul,He,ml,Ye,hl,ze,ss="Finally, once you are done using TGI on the GKE Cluster, you can safely delete the GKE Cluster to avoid incurring in unnecessary costs.",fl,Xe,Ml,qe,ns="Alternatively, you can also downscale the replicas of the deployed pod to 0 in case you want to preserve the cluster, since the default GKE Cluster deployed with GKE Autopilot mode is running just a single <code>e2-small</code> instance.",dl,Pe,gl,yl,bl,W,$l,De,Cl,st,Ul;return T=new N({props:{title:"Deploy Qwen2 7B with TGI DLC from GCS on GKE",local:"deploy-qwen2-7b-with-tgi-dlc-from-gcs-on-gke",headingTag:"h1"}}),S=new N({props:{title:"Setup / Configuration",local:"setup--configuration",headingTag:"h2"}}),Y=new U({props:{code:"ZXhwb3J0JTIwUFJPSkVDVF9JRCUzRHlvdXItcHJvamVjdC1pZCUwQWV4cG9ydCUyMExPQ0FUSU9OJTNEeW91ci1sb2NhdGlvbiUwQWV4cG9ydCUyMENMVVNURVJfTkFNRSUzRHlvdXItY2x1c3Rlci1uYW1lJTBBZXhwb3J0JTIwQlVDS0VUX05BTUUlM0R5b3VyLWJ1Y2tldC1uYW1l",highlighted:`<span class="hljs-built_in">export</span> PROJECT_ID=your-project-id
<span class="hljs-built_in">export</span> LOCATION=your-location
<span class="hljs-built_in">export</span> CLUSTER_NAME=your-cluster-name
<span class="hljs-built_in">export</span> BUCKET_NAME=your-bucket-name`,wrap:!1}}),X=new U({props:{code:"Z2Nsb3VkJTIwYXV0aCUyMGxvZ2luJTBBZ2Nsb3VkJTIwYXV0aCUyMGFwcGxpY2F0aW9uLWRlZmF1bHQlMjBsb2dpbiUyMCUyMCUyMyUyMEZvciUyMGxvY2FsJTIwZGV2ZWxvcG1lbnQlMEFnY2xvdWQlMjBjb25maWclMjBzZXQlMjBwcm9qZWN0JTIwJTI0UFJPSkVDVF9JRA==",highlighted:`gcloud auth login
gcloud auth application-default login <span class="hljs-comment"># For local development</span>
gcloud config <span class="hljs-built_in">set</span> project <span class="hljs-variable">$PROJECT_ID</span>`,wrap:!1}}),P=new U({props:{code:"Z2Nsb3VkJTIwc2VydmljZXMlMjBlbmFibGUlMjBjb250YWluZXIuZ29vZ2xlYXBpcy5jb20lMEFnY2xvdWQlMjBzZXJ2aWNlcyUyMGVuYWJsZSUyMGNvbnRhaW5lcnJlZ2lzdHJ5Lmdvb2dsZWFwaXMuY29tJTBBZ2Nsb3VkJTIwc2VydmljZXMlMjBlbmFibGUlMjBjb250YWluZXJmaWxlc3lzdGVtLmdvb2dsZWFwaXMuY29t",highlighted:`gcloud services <span class="hljs-built_in">enable</span> container.googleapis.com
gcloud services <span class="hljs-built_in">enable</span> containerregistry.googleapis.com
gcloud services <span class="hljs-built_in">enable</span> containerfilesystem.googleapis.com`,wrap:!1}}),O=new U({props:{code:"Z2Nsb3VkJTIwY29tcG9uZW50cyUyMGluc3RhbGwlMjBna2UtZ2Nsb3VkLWF1dGgtcGx1Z2lu",highlighted:"gcloud components install gke-gcloud-auth-plugin",wrap:!1}}),I=new F({props:{$$slots:{default:[gs]},$$scope:{ctx:w}}}),K=new N({props:{title:"Create GKE Cluster",local:"create-gke-cluster",headingTag:"h2"}}),k=new F({props:{$$slots:{default:[ys]},$$scope:{ctx:w}}}),le=new U({props:{code:"Z2Nsb3VkJTIwY29udGFpbmVyJTIwY2x1c3RlcnMlMjBjcmVhdGUtYXV0byUyMCUyNENMVVNURVJfTkFNRSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tcHJvamVjdCUzRCUyNFBST0pFQ1RfSUQlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvY2F0aW9uJTNEJTI0TE9DQVRJT04lMjAlNUMlMEElMjAlMjAlMjAlMjAtLXJlbGVhc2UtY2hhbm5lbCUzRHN0YWJsZSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tY2x1c3Rlci12ZXJzaW9uJTNEMS4yOCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbm8tYXV0b3Byb3Zpc2lvbmluZy1lbmFibGUtaW5zZWN1cmUta3ViZWxldC1yZWFkb25seS1wb3J0",highlighted:`gcloud container clusters create-auto <span class="hljs-variable">$CLUSTER_NAME</span> \\
--project=<span class="hljs-variable">$PROJECT_ID</span> \\
--location=<span class="hljs-variable">$LOCATION</span> \\
--release-channel=stable \\
--cluster-version=1.28 \\
--no-autoprovisioning-enable-insecure-kubelet-readonly-port`,wrap:!1}}),Z=new F({props:{$$slots:{default:[bs]},$$scope:{ctx:w}}}),ae=new U({props:{code:"Z2Nsb3VkJTIwY29udGFpbmVyJTIwY2x1c3RlcnMlMjBnZXQtY3JlZGVudGlhbHMlMjAlMjRDTFVTVEVSX05BTUUlMjAtLWxvY2F0aW9uJTNEJTI0TE9DQVRJT04=",highlighted:'gcloud container clusters get-credentials <span class="hljs-variable">$CLUSTER_NAME</span> --location=<span class="hljs-variable">$LOCATION</span>',wrap:!1}}),oe=new N({props:{title:"Optional: Upload a model from the Hugging Face Hub to GCS",local:"optional-upload-a-model-from-the-hugging-face-hub-to-gcs",headingTag:"h2"}}),re=new U({props:{code:"Z2Nsb3VkJTIwY29tcG9uZW50cyUyMGluc3RhbGwlMjBnc3V0aWwlMEFwaXAlMjBpbnN0YWxsJTIwLS11cGdyYWRlJTIwLS1xdWlldCUyMCUyMmh1Z2dpbmdmYWNlX2h1YiU1QmhmX3RyYW5zZmVyJTVEJTIyJTIwY3JjbW9k",highlighted:`gcloud components install gsutil
pip install --upgrade --quiet <span class="hljs-string">&quot;huggingface_hub[hf_transfer]&quot;</span> crcmod`,wrap:!1}}),x=new F({props:{$$slots:{default:[$s]},$$scope:{ctx:w}}}),me=new U({props:{code:"LiUyRnNjcmlwdHMlMkZ1cGxvYWRfbW9kZWxfdG9fZ2NzLnNoJTIwLS1tb2RlbC1pZCUyMFF3ZW4lMkZRd2VuMi03Qi1JbnN0cnVjdCUyMC0tZ2NzJTIwZ3MlM0ElMkYlMkYlMjRCVUNLRVRfTkFNRSUyRlF3ZW4yLTdCLUluc3RydWN0",highlighted:'./scripts/upload_model_to_gcs.sh --model-id Qwen/Qwen2-7B-Instruct --gcs gs://<span class="hljs-variable">$BUCKET_NAME</span>/Qwen2-7B-Instruct',wrap:!1}}),fe=new N({props:{title:"Configure IAM for GCS",local:"configure-iam-for-gcs",headingTag:"h2"}}),ge=new U({props:{code:"ZXhwb3J0JTIwTkFNRVNQQUNFJTNEaGYtZ2tlLW5hbWVzcGFjZSUwQWV4cG9ydCUyMFNFUlZJQ0VfQUNDT1VOVCUzRGhmLWdrZS1zZXJ2aWNlLWFjY291bnQ=",highlighted:`<span class="hljs-built_in">export</span> NAMESPACE=hf-gke-namespace
<span class="hljs-built_in">export</span> SERVICE_ACCOUNT=hf-gke-service-account`,wrap:!1}}),be=new U({props:{code:"a3ViZWN0bCUyMGNyZWF0ZSUyMG5hbWVzcGFjZSUyMCUyNE5BTUVTUEFDRSUwQWt1YmVjdGwlMjBjcmVhdGUlMjBzZXJ2aWNlYWNjb3VudCUyMCUyNFNFUlZJQ0VfQUNDT1VOVCUyMC0tbmFtZXNwYWNlJTIwJTI0TkFNRVNQQUNF",highlighted:`kubectl create namespace <span class="hljs-variable">$NAMESPACE</span>
kubectl create serviceaccount <span class="hljs-variable">$SERVICE_ACCOUNT</span> --namespace <span class="hljs-variable">$NAMESPACE</span>`,wrap:!1}}),Ce=new U({props:{code:"Z2Nsb3VkJTIwc3RvcmFnZSUyMGJ1Y2tldHMlMjBhZGQtaWFtLXBvbGljeS1iaW5kaW5nJTIwJTVDJTBBJTIwJTIwJTIwJTIwZ3MlM0ElMkYlMkYlMjRCVUNLRVRfTkFNRSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbWVtYmVyJTIwJTIycHJpbmNpcGFsJTNBJTJGJTJGaWFtLmdvb2dsZWFwaXMuY29tJTJGcHJvamVjdHMlMkYlMjQoZ2Nsb3VkJTIwcHJvamVjdHMlMjBkZXNjcmliZSUyMCUyNFBST0pFQ1RfSUQlMjAtLWZvcm1hdCUzRCUyMnZhbHVlKHByb2plY3ROdW1iZXIpJTIyKSUyRmxvY2F0aW9ucyUyRmdsb2JhbCUyRndvcmtsb2FkSWRlbnRpdHlQb29scyUyRiUyNFBST0pFQ1RfSUQuc3ZjLmlkLmdvb2clMkZzdWJqZWN0JTJGbnMlMkYlMjROQU1FU1BBQ0UlMkZzYSUyRiUyNFNFUlZJQ0VfQUNDT1VOVCUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tcm9sZSUyMCUyMnJvbGVzJTJGc3RvcmFnZS5vYmplY3RVc2VyJTIy",highlighted:`gcloud storage buckets add-iam-policy-binding \\
gs://<span class="hljs-variable">$BUCKET_NAME</span> \\
--member <span class="hljs-string">&quot;principal://iam.googleapis.com/projects/<span class="hljs-subst">$(gcloud projects describe $PROJECT_ID --format=<span class="hljs-string">&quot;value(projectNumber)&quot;</span>)</span>/locations/global/workloadIdentityPools/<span class="hljs-variable">$PROJECT_ID</span>.svc.id.goog/subject/ns/<span class="hljs-variable">$NAMESPACE</span>/sa/<span class="hljs-variable">$SERVICE_ACCOUNT</span>&quot;</span> \\
--role <span class="hljs-string">&quot;roles/storage.objectUser&quot;</span>`,wrap:!1}}),Ue=new N({props:{title:"Deploy TGI",local:"deploy-tgi",headingTag:"h2"}}),V=new F({props:{$$slots:{default:[Cs]},$$scope:{ctx:w}}}),je=new U({props:{code:"Z2l0JTIwY2xvbmUlMjBodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRkdvb2dsZS1DbG91ZC1Db250YWluZXJzJTBBa3ViZWN0bCUyMGFwcGx5JTIwLWYlMjBHb29nbGUtQ2xvdWQtQ29udGFpbmVycyUyRmV4YW1wbGVzJTJGZ2tlJTJGdGdpLWZyb20tZ2NzLWRlcGxveW1lbnQlMkZjb25maWc=",highlighted:`git <span class="hljs-built_in">clone</span> https://github.com/huggingface/Google-Cloud-Containers
kubectl apply -f Google-Cloud-Containers/examples/gke/tgi-from-gcs-deployment/config`,wrap:!1}}),E=new F({props:{$$slots:{default:[Us]},$$scope:{ctx:w}}}),ve=new N({props:{title:"Inference with TGI",local:"inference-with-tgi",headingTag:"h2"}}),ke=new U({props:{code:"a3ViZWN0bCUyMHBvcnQtZm9yd2FyZCUyMC0tbmFtZXNwYWNlJTIwJTI0TkFNRVNQQUNFJTIwc2VydmljZSUyRnRnaS1zZXJ2aWNlJTIwODA4MCUzQTgwODA=",highlighted:'kubectl port-forward --namespace <span class="hljs-variable">$NAMESPACE</span> service/tgi-service 8080:8080',wrap:!1}}),xe=new U({props:{code:"a3ViZWN0bCUyMGdldCUyMGluZ3Jlc3MlMjAtLW5hbWVzcGFjZSUyMCUyNE5BTUVTUEFDRSUyMHRnaS1pbmdyZXNzJTIwLW8lMjBqc29ucGF0aCUzRCclN0Iuc3RhdHVzLmxvYWRCYWxhbmNlci5pbmdyZXNzJTVCMCU1RC5pcCU3RCc=",highlighted:'kubectl get ingress --namespace <span class="hljs-variable">$NAMESPACE</span> tgi-ingress -o jsonpath=<span class="hljs-string">&#x27;{.status.loadBalancer.ingress[0].ip}&#x27;</span>',wrap:!1}}),Ve=new N({props:{title:"Via cURL",local:"via-curl",headingTag:"h3"}}),Re=new U({props:{code:"Y3VybCUyMGh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDgwJTJGZ2VuZXJhdGUlMjAlNUMlMEElMjAlMjAlMjAlMjAtWCUyMFBPU1QlMjAlNUMlMEElMjAlMjAlMjAlMjAtZCUyMCclN0IlMjJpbnB1dHMlMjIlM0ElMjIlM0MlN0NpbV9zdGFydCU3QyUzRXN5c3RlbSU1Q25Zb3UlMjBhcmUlMjBhJTIwaGVscGZ1bCUyMGFzc2lzdGFudC4lM0MlN0NpbV9lbmQlN0MlM0UlNUNuJTNDJTdDaW1fc3RhcnQlN0MlM0V1c2VyJTVDbldoYXQlMjBpcyUyMDIlMkIyJTNGJTNDJTdDaW1fZW5kJTdDJTNFJTVDbiUzQyU3Q2ltX3N0YXJ0JTdDJTNFYXNzaXN0YW50JTVDbiUyMiUyQyUyMnBhcmFtZXRlcnMlMjIlM0ElN0IlMjJ0ZW1wZXJhdHVyZSUyMiUzQTAuNyUyQyUyMCUyMnRvcF9wJTIyJTNBJTIwMC45NSUyQyUyMCUyMm1heF9uZXdfdG9rZW5zJTIyJTNBJTIwMTI4JTdEJTdEJyUyMCU1QyUwQSUyMCUyMCUyMCUyMC1IJTIwJ0NvbnRlbnQtVHlwZSUzQSUyMGFwcGxpY2F0aW9uJTJGanNvbic=",highlighted:`curl http://localhost:8080/generate \\
-X POST \\
-d <span class="hljs-string">&#x27;{&quot;inputs&quot;:&quot;&lt;|im_start|&gt;system\\nYou are a helpful assistant.&lt;|im_end|&gt;\\n&lt;|im_start|&gt;user\\nWhat is 2+2?&lt;|im_end|&gt;\\n&lt;|im_start|&gt;assistant\\n&quot;,&quot;parameters&quot;:{&quot;temperature&quot;:0.7, &quot;top_p&quot;: 0.95, &quot;max_new_tokens&quot;: 128}}&#x27;</span> \\
-H <span class="hljs-string">&#x27;Content-Type: application/json&#x27;</span>`,wrap:!1}}),We=new U({props:{code:"Y3VybCUyMGh0dHAlM0ElMkYlMkYlMjQoa3ViZWN0bCUyMGdldCUyMGluZ3Jlc3MlMjAtLW5hbWVzcGFjZSUyMCUyNE5BTUVTUEFDRSUyMHRnaS1pbmdyZXNzJTIwLW8lMjBqc29ucGF0aCUzRCclN0Iuc3RhdHVzLmxvYWRCYWxhbmNlci5pbmdyZXNzJTVCMCU1RC5pcCU3RCcpJTJGZ2VuZXJhdGUlMjAlNUMlMEElMjAlMjAlMjAlMjAtWCUyMFBPU1QlMjAlNUMlMEElMjAlMjAlMjAlMjAtZCUyMCclN0IlMjJpbnB1dHMlMjIlM0ElMjIlM0MlN0NpbV9zdGFydCU3QyUzRXN5c3RlbSU1Q25Zb3UlMjBhcmUlMjBhJTIwaGVscGZ1bCUyMGFzc2lzdGFudC4lM0MlN0NpbV9lbmQlN0MlM0UlNUNuJTNDJTdDaW1fc3RhcnQlN0MlM0V1c2VyJTVDbldoYXQlMjBpcyUyMDIlMkIyJTNGJTNDJTdDaW1fZW5kJTdDJTNFJTVDbiUzQyU3Q2ltX3N0YXJ0JTdDJTNFYXNzaXN0YW50JTVDbiUyMiUyQyUyMnBhcmFtZXRlcnMlMjIlM0ElN0IlMjJ0ZW1wZXJhdHVyZSUyMiUzQTAuNyUyQyUyMCUyMnRvcF9wJTIyJTNBJTIwMC45NSUyQyUyMCUyMm1heF9uZXdfdG9rZW5zJTIyJTNBJTIwMTI4JTdEJTdEJyUyMCU1QyUwQSUyMCUyMCUyMCUyMC1IJTIwJ0NvbnRlbnQtVHlwZSUzQSUyMGFwcGxpY2F0aW9uJTJGanNvbic=",highlighted:`curl http://$(kubectl get ingress --namespace <span class="hljs-variable">$NAMESPACE</span> tgi-ingress -o jsonpath=<span class="hljs-string">&#x27;{.status.loadBalancer.ingress[0].ip}&#x27;</span>)/generate \\
-X POST \\
-d <span class="hljs-string">&#x27;{&quot;inputs&quot;:&quot;&lt;|im_start|&gt;system\\nYou are a helpful assistant.&lt;|im_end|&gt;\\n&lt;|im_start|&gt;user\\nWhat is 2+2?&lt;|im_end|&gt;\\n&lt;|im_start|&gt;assistant\\n&quot;,&quot;parameters&quot;:{&quot;temperature&quot;:0.7, &quot;top_p&quot;: 0.95, &quot;max_new_tokens&quot;: 128}}&#x27;</span> \\
-H <span class="hljs-string">&#x27;Content-Type: application/json&#x27;</span>`,wrap:!1}}),Fe=new U({props:{code:"JTdCJTIyZ2VuZXJhdGVkX3RleHQlMjIlM0ElMjIyJTIwJTJCJTIwMiUyMGVxdWFscyUyMDQuJTIyJTdE",highlighted:'<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;generated_text&quot;</span><span class="hljs-punctuation">:</span><span class="hljs-string">&quot;2 + 2 equals 4.&quot;</span><span class="hljs-punctuation">}</span>',wrap:!1}}),_=new F({props:{$$slots:{default:[Ts]},$$scope:{ctx:w}}}),Ae=new N({props:{title:"Via Python",local:"via-python",headingTag:"h3"}}),Be=new U({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGdldF90b2tlbiUwQWZyb20lMjBvcGVuYWklMjBpbXBvcnQlMjBPcGVuQUklMEElMEFjbGllbnQlMjAlM0QlMjBPcGVuQUkoJTBBJTIwJTIwJTIwJTIwYmFzZV91cmwlM0QlMjJodHRwJTNBJTJGJTJGbG9jYWxob3N0JTNBODA4MCUyRnYxJTJGJTIyJTJDJTBBJTIwJTIwJTIwJTIwYXBpX2tleSUzRGdldF90b2tlbigpJTIwb3IlMjAlMjItJTIyJTJDJTBBKSUwQSUwQWNoYXRfY29tcGxldGlvbiUyMCUzRCUyMGNsaWVudC5jaGF0LmNvbXBsZXRpb25zLmNyZWF0ZSglMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMnRnaSUyMiUyQyUwQSUyMCUyMCUyMCUyMG1lc3NhZ2VzJTNEJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnN5c3RlbSUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJZb3UlMjBhcmUlMjBhJTIwaGVscGZ1bCUyMGFzc2lzdGFudC4lMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJXaGF0JTIwaXMlMjAyJTJCMiUzRiUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCU1RCUyQyUwQSUyMCUyMCUyMCUyMG1heF90b2tlbnMlM0QxMjglMkMlMEEp",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> get_token
<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI
client = OpenAI(
base_url=<span class="hljs-string">&quot;http://localhost:8080/v1/&quot;</span>,
api_key=get_token() <span class="hljs-keyword">or</span> <span class="hljs-string">&quot;-&quot;</span>,
)
chat_completion = client.chat.completions.create(
model=<span class="hljs-string">&quot;tgi&quot;</span>,
messages=[
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;system&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;You are a helpful assistant.&quot;</span>},
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What is 2+2?&quot;</span>},
],
max_tokens=<span class="hljs-number">128</span>,
)`,wrap:!1}}),He=new U({props:{code:"Q2hhdENvbXBsZXRpb24oaWQlM0QnJyUyQyUyMGNob2ljZXMlM0QlNUJDaG9pY2UoZmluaXNoX3JlYXNvbiUzRCdlb3NfdG9rZW4nJTJDJTIwaW5kZXglM0QwJTJDJTIwbWVzc2FnZSUzRENoYXRDb21wbGV0aW9uTWVzc2FnZShjb250ZW50JTNEJzIlMjAlMkIlMjAyJTIwZXF1YWxzJTIwNC4nJTJDJTIwcm9sZSUzRCdhc3Npc3RhbnQnJTJDJTIwZnVuY3Rpb25fY2FsbCUzRE5vbmUlMkMlMjB0b29sX2NhbGxzJTNETm9uZSklMkMlMjBsb2dwcm9icyUzRE5vbmUpJTVEJTJDJTIwY3JlYXRlZCUzRDE3MTk5OTYzNTklMkMlMjBtb2RlbCUzRCclMkZkYXRhJTJGUXdlbjItN0ItSW5zdHJ1Y3QnJTJDJTIwb2JqZWN0JTNEJ2NoYXQuY29tcGxldGlvbiclMkMlMjBzeXN0ZW1fZmluZ2VycHJpbnQlM0QnMi4xLjAtbmF0aXZlJyUyQyUyMHVzYWdlJTNEQ29tcGxldGlvblVzYWdlKGNvbXBsZXRpb25fdG9rZW5zJTNEOSUyQyUyMHByb21wdF90b2tlbnMlM0QyNiUyQyUyMHRvdGFsX3Rva2VucyUzRDM1KSk=",highlighted:'<span class="hljs-constructor">ChatCompletion(<span class="hljs-params">id</span>=&#x27;&#x27;, <span class="hljs-params">choices</span>=[Choice(<span class="hljs-params">finish_reason</span>=&#x27;<span class="hljs-params">eos_token</span>&#x27;, <span class="hljs-params">index</span>=0, <span class="hljs-params">message</span>=ChatCompletionMessage(<span class="hljs-params">content</span>=&#x27;2 + 2 <span class="hljs-params">equals</span> 4.&#x27;, <span class="hljs-params">role</span>=&#x27;<span class="hljs-params">assistant</span>&#x27;, <span class="hljs-params">function_call</span>=None, <span class="hljs-params">tool_calls</span>=None)</span>, logprobs=None)], created=<span class="hljs-number">1719996359</span>, model=&#x27;/data/Qwen2-<span class="hljs-number">7</span>B-Instruct&#x27;, <span class="hljs-keyword">object</span>=&#x27;chat.completion&#x27;, system_fingerprint=&#x27;<span class="hljs-number">2.1</span>.<span class="hljs-number">0</span>-native&#x27;, usage=<span class="hljs-constructor">CompletionUsage(<span class="hljs-params">completion_tokens</span>=9, <span class="hljs-params">prompt_tokens</span>=26, <span class="hljs-params">total_tokens</span>=35)</span>)',wrap:!1}}),Ye=new N({props:{title:"Delete GKE Cluster",local:"delete-gke-cluster",headingTag:"h2"}}),Xe=new U({props:{code:"Z2Nsb3VkJTIwY29udGFpbmVyJTIwY2x1c3RlcnMlMjBkZWxldGUlMjAlMjRDTFVTVEVSX05BTUUlMjAtLWxvY2F0aW9uJTNEJTI0TE9DQVRJT04=",highlighted:'gcloud container clusters delete <span class="hljs-variable">$CLUSTER_NAME</span> --location=<span class="hljs-variable">$LOCATION</span>',wrap:!1}}),Pe=new U({props:{code:"a3ViZWN0bCUyMHNjYWxlJTIwLS1yZXBsaWNhcyUzRDAlMjAtLW5hbWVzcGFjZSUyMCUyNE5BTUVTUEFDRSUyMGRlcGxveW1lbnQlMkZ0Z2ktZGVwbG95bWVudA==",highlighted:'kubectl scale --replicas=0 --namespace <span class="hljs-variable">$NAMESPACE</span> deployment/tgi-deployment',wrap:!1}}),W=new F({props:{$$slots:{default:[ws]},$$scope:{ctx:w}}}),De=new ds({props:{source:"https://github.com/huggingface/Google-Cloud-Containers/blob/main/docs/source/examples/gke-tgi-from-gcs-deployment.mdx"}}),{c(){o=i("meta"),$=n(),c=i("p"),g=n(),u(T.$$.fragment),y=n(),j=i("p"),j.textContent=G,b=n(),J=i("p"),J.textContent=C,v=n(),u(S.$$.fragment),at=n(),B=i("p"),B.innerHTML=jl,ot=n(),L=i("ul"),L.innerHTML=Gl,it=n(),H=i("p"),H.textContent=vl,pt=n(),u(Y.$$.fragment),ct=n(),z=i("p"),z.textContent=Nl,rt=n(),u(X.$$.fragment),ut=n(),q=i("p"),q.textContent=Il,mt=n(),u(P.$$.fragment),ht=n(),D=i("p"),D.innerHTML=kl,ft=n(),u(O.$$.fragment),Mt=n(),u(I.$$.fragment),dt=n(),u(K.$$.fragment),gt=n(),ee=i("p"),ee.textContent=Zl,yt=n(),te=i("p"),te.textContent=xl,bt=n(),u(k.$$.fragment),$t=n(),u(le.$$.fragment),Ct=n(),u(Z.$$.fragment),Ut=n(),se=i("p"),se.innerHTML=Vl,Tt=n(),ne=i("p"),ne.innerHTML=El,wt=n(),u(ae.$$.fragment),Jt=n(),u(oe.$$.fragment),jt=n(),ie=i("p"),ie.textContent=Rl,Gt=n(),pe=i("p"),pe.innerHTML=_l,vt=n(),ce=i("p"),ce.innerHTML=Wl,Nt=n(),u(re.$$.fragment),It=n(),ue=i("p"),ue.textContent=Ql,kt=n(),u(x.$$.fragment),Zt=n(),u(me.$$.fragment),xt=n(),he=i("p"),he.innerHTML=Fl,Vt=n(),u(fe.$$.fragment),Et=n(),Me=i("p"),Me.textContent=Al,Rt=n(),de=i("p"),de.innerHTML=Sl,_t=n(),u(ge.$$.fragment),Wt=n(),ye=i("p"),ye.textContent=Bl,Qt=n(),u(be.$$.fragment),Ft=n(),$e=i("p"),$e.textContent=Ll,At=n(),u(Ce.$$.fragment),St=n(),u(Ue.$$.fragment),Bt=n(),Te=i("p"),Te.innerHTML=Hl,Lt=n(),u(V.$$.fragment),Ht=n(),we=i("p"),we.innerHTML=Yl,Yt=n(),Je=i("ul"),Je.innerHTML=zl,zt=n(),u(je.$$.fragment),Xt=n(),Ge=i("p"),Ge.innerHTML=Xl,qt=n(),u(E.$$.fragment),Pt=n(),u(ve.$$.fragment),Dt=n(),Ne=i("p"),Ne.textContent=ql,Ot=n(),R=i("ul"),Ie=i("li"),tt=i("p"),tt.innerHTML=Pl,Tl=n(),u(ke.$$.fragment),wl=n(),Ze=i("li"),lt=i("p"),lt.innerHTML=Dl,Jl=n(),u(xe.$$.fragment),Kt=n(),u(Ve.$$.fragment),el=n(),Ee=i("p"),Ee.innerHTML=Ol,tl=n(),u(Re.$$.fragment),ll=n(),_e=i("p"),_e.textContent=Kl,sl=n(),u(We.$$.fragment),nl=n(),Qe=i("p"),Qe.textContent=es,al=n(),u(Fe.$$.fragment),ol=n(),u(_.$$.fragment),il=n(),u(Ae.$$.fragment),pl=n(),Se=i("p"),Se.innerHTML=ts,cl=n(),u(Be.$$.fragment),rl=n(),Le=i("p"),Le.textContent=ls,ul=n(),u(He.$$.fragment),ml=n(),u(Ye.$$.fragment),hl=n(),ze=i("p"),ze.textContent=ss,fl=n(),u(Xe.$$.fragment),Ml=n(),qe=i("p"),qe.innerHTML=ns,dl=n(),u(Pe.$$.fragment),gl=n(),yl=i("hr"),bl=n(),u(W.$$.fragment),$l=n(),u(De.$$.fragment),Cl=n(),st=i("p"),this.h()},l(e){const t=Ms("svelte-u9bgzb",document.head);o=p(t,"META",{name:!0,content:!0}),t.forEach(l),$=a(e),c=p(e,"P",{}),nt(c).forEach(l),g=a(e),m(T.$$.fragment,e),y=a(e),j=p(e,"P",{"data-svelte-h":!0}),r(j)!=="svelte-hix6p"&&(j.textContent=G),b=a(e),J=p(e,"P",{"data-svelte-h":!0}),r(J)!=="svelte-2nq9zq"&&(J.textContent=C),v=a(e),m(S.$$.fragment,e),at=a(e),B=p(e,"P",{"data-svelte-h":!0}),r(B)!=="svelte-1qnmo38"&&(B.innerHTML=jl),ot=a(e),L=p(e,"UL",{"data-svelte-h":!0}),r(L)!=="svelte-a8ymuw"&&(L.innerHTML=Gl),it=a(e),H=p(e,"P",{"data-svelte-h":!0}),r(H)!=="svelte-1i8n6rq"&&(H.textContent=vl),pt=a(e),m(Y.$$.fragment,e),ct=a(e),z=p(e,"P",{"data-svelte-h":!0}),r(z)!=="svelte-1g39t6p"&&(z.textContent=Nl),rt=a(e),m(X.$$.fragment,e),ut=a(e),q=p(e,"P",{"data-svelte-h":!0}),r(q)!=="svelte-1c0v0sp"&&(q.textContent=Il),mt=a(e),m(P.$$.fragment,e),ht=a(e),D=p(e,"P",{"data-svelte-h":!0}),r(D)!=="svelte-wjaltb"&&(D.innerHTML=kl),ft=a(e),m(O.$$.fragment,e),Mt=a(e),m(I.$$.fragment,e),dt=a(e),m(K.$$.fragment,e),gt=a(e),ee=p(e,"P",{"data-svelte-h":!0}),r(ee)!=="svelte-cdzit7"&&(ee.textContent=Zl),yt=a(e),te=p(e,"P",{"data-svelte-h":!0}),r(te)!=="svelte-l7bdz6"&&(te.textContent=xl),bt=a(e),m(k.$$.fragment,e),$t=a(e),m(le.$$.fragment,e),Ct=a(e),m(Z.$$.fragment,e),Ut=a(e),se=p(e,"P",{"data-svelte-h":!0}),r(se)!=="svelte-11jpxhk"&&(se.innerHTML=Vl),Tt=a(e),ne=p(e,"P",{"data-svelte-h":!0}),r(ne)!=="svelte-f29e6h"&&(ne.innerHTML=El),wt=a(e),m(ae.$$.fragment,e),Jt=a(e),m(oe.$$.fragment,e),jt=a(e),ie=p(e,"P",{"data-svelte-h":!0}),r(ie)!=="svelte-1yxvryk"&&(ie.textContent=Rl),Gt=a(e),pe=p(e,"P",{"data-svelte-h":!0}),r(pe)!=="svelte-5qrnrc"&&(pe.innerHTML=_l),vt=a(e),ce=p(e,"P",{"data-svelte-h":!0}),r(ce)!=="svelte-zucy2s"&&(ce.innerHTML=Wl),Nt=a(e),m(re.$$.fragment,e),It=a(e),ue=p(e,"P",{"data-svelte-h":!0}),r(ue)!=="svelte-u73w5r"&&(ue.textContent=Ql),kt=a(e),m(x.$$.fragment,e),Zt=a(e),m(me.$$.fragment,e),xt=a(e),he=p(e,"P",{"data-svelte-h":!0}),r(he)!=="svelte-fs7ubo"&&(he.innerHTML=Fl),Vt=a(e),m(fe.$$.fragment,e),Et=a(e),Me=p(e,"P",{"data-svelte-h":!0}),r(Me)!=="svelte-1kvpjr"&&(Me.textContent=Al),Rt=a(e),de=p(e,"P",{"data-svelte-h":!0}),r(de)!=="svelte-4fsaj7"&&(de.innerHTML=Sl),_t=a(e),m(ge.$$.fragment,e),Wt=a(e),ye=p(e,"P",{"data-svelte-h":!0}),r(ye)!=="svelte-gi3hy9"&&(ye.textContent=Bl),Qt=a(e),m(be.$$.fragment,e),Ft=a(e),$e=p(e,"P",{"data-svelte-h":!0}),r($e)!=="svelte-1wspkh7"&&($e.textContent=Ll),At=a(e),m(Ce.$$.fragment,e),St=a(e),m(Ue.$$.fragment,e),Bt=a(e),Te=p(e,"P",{"data-svelte-h":!0}),r(Te)!=="svelte-1ouytp9"&&(Te.innerHTML=Hl),Lt=a(e),m(V.$$.fragment,e),Ht=a(e),we=p(e,"P",{"data-svelte-h":!0}),r(we)!=="svelte-qga36z"&&(we.innerHTML=Yl),Yt=a(e),Je=p(e,"UL",{"data-svelte-h":!0}),r(Je)!=="svelte-1hd7bdg"&&(Je.innerHTML=zl),zt=a(e),m(je.$$.fragment,e),Xt=a(e),Ge=p(e,"P",{"data-svelte-h":!0}),r(Ge)!=="svelte-1atglf6"&&(Ge.innerHTML=Xl),qt=a(e),m(E.$$.fragment,e),Pt=a(e),m(ve.$$.fragment,e),Dt=a(e),Ne=p(e,"P",{"data-svelte-h":!0}),r(Ne)!=="svelte-1wzt8py"&&(Ne.textContent=ql),Ot=a(e),R=p(e,"UL",{});var Oe=nt(R);Ie=p(Oe,"LI",{});var Ke=nt(Ie);tt=p(Ke,"P",{"data-svelte-h":!0}),r(tt)!=="svelte-k3movz"&&(tt.innerHTML=Pl),Tl=a(Ke),m(ke.$$.fragment,Ke),Ke.forEach(l),wl=a(Oe),Ze=p(Oe,"LI",{});var et=nt(Ze);lt=p(et,"P",{"data-svelte-h":!0}),r(lt)!=="svelte-1uskw9x"&&(lt.innerHTML=Dl),Jl=a(et),m(xe.$$.fragment,et),et.forEach(l),Oe.forEach(l),Kt=a(e),m(Ve.$$.fragment,e),el=a(e),Ee=p(e,"P",{"data-svelte-h":!0}),r(Ee)!=="svelte-1ks8djk"&&(Ee.innerHTML=Ol),tl=a(e),m(Re.$$.fragment,e),ll=a(e),_e=p(e,"P",{"data-svelte-h":!0}),r(_e)!=="svelte-1u6gmmt"&&(_e.textContent=Kl),sl=a(e),m(We.$$.fragment,e),nl=a(e),Qe=p(e,"P",{"data-svelte-h":!0}),r(Qe)!=="svelte-ddr8r5"&&(Qe.textContent=es),al=a(e),m(Fe.$$.fragment,e),ol=a(e),m(_.$$.fragment,e),il=a(e),m(Ae.$$.fragment,e),pl=a(e),Se=p(e,"P",{"data-svelte-h":!0}),r(Se)!=="svelte-w0229l"&&(Se.innerHTML=ts),cl=a(e),m(Be.$$.fragment,e),rl=a(e),Le=p(e,"P",{"data-svelte-h":!0}),r(Le)!=="svelte-ddr8r5"&&(Le.textContent=ls),ul=a(e),m(He.$$.fragment,e),ml=a(e),m(Ye.$$.fragment,e),hl=a(e),ze=p(e,"P",{"data-svelte-h":!0}),r(ze)!=="svelte-1laf0m8"&&(ze.textContent=ss),fl=a(e),m(Xe.$$.fragment,e),Ml=a(e),qe=p(e,"P",{"data-svelte-h":!0}),r(qe)!=="svelte-qo8r7n"&&(qe.innerHTML=ns),dl=a(e),m(Pe.$$.fragment,e),gl=a(e),yl=p(e,"HR",{}),bl=a(e),m(W.$$.fragment,e),$l=a(e),m(De.$$.fragment,e),Cl=a(e),st=p(e,"P",{}),nt(st).forEach(l),this.h()},h(){rs(o,"name","hf:doc:metadata"),rs(o,"content",js)},m(e,t){Q(document.head,o),s(e,$,t),s(e,c,t),s(e,g,t),h(T,e,t),s(e,y,t),s(e,j,t),s(e,b,t),s(e,J,t),s(e,v,t),h(S,e,t),s(e,at,t),s(e,B,t),s(e,ot,t),s(e,L,t),s(e,it,t),s(e,H,t),s(e,pt,t),h(Y,e,t),s(e,ct,t),s(e,z,t),s(e,rt,t),h(X,e,t),s(e,ut,t),s(e,q,t),s(e,mt,t),h(P,e,t),s(e,ht,t),s(e,D,t),s(e,ft,t),h(O,e,t),s(e,Mt,t),h(I,e,t),s(e,dt,t),h(K,e,t),s(e,gt,t),s(e,ee,t),s(e,yt,t),s(e,te,t),s(e,bt,t),h(k,e,t),s(e,$t,t),h(le,e,t),s(e,Ct,t),h(Z,e,t),s(e,Ut,t),s(e,se,t),s(e,Tt,t),s(e,ne,t),s(e,wt,t),h(ae,e,t),s(e,Jt,t),h(oe,e,t),s(e,jt,t),s(e,ie,t),s(e,Gt,t),s(e,pe,t),s(e,vt,t),s(e,ce,t),s(e,Nt,t),h(re,e,t),s(e,It,t),s(e,ue,t),s(e,kt,t),h(x,e,t),s(e,Zt,t),h(me,e,t),s(e,xt,t),s(e,he,t),s(e,Vt,t),h(fe,e,t),s(e,Et,t),s(e,Me,t),s(e,Rt,t),s(e,de,t),s(e,_t,t),h(ge,e,t),s(e,Wt,t),s(e,ye,t),s(e,Qt,t),h(be,e,t),s(e,Ft,t),s(e,$e,t),s(e,At,t),h(Ce,e,t),s(e,St,t),h(Ue,e,t),s(e,Bt,t),s(e,Te,t),s(e,Lt,t),h(V,e,t),s(e,Ht,t),s(e,we,t),s(e,Yt,t),s(e,Je,t),s(e,zt,t),h(je,e,t),s(e,Xt,t),s(e,Ge,t),s(e,qt,t),h(E,e,t),s(e,Pt,t),h(ve,e,t),s(e,Dt,t),s(e,Ne,t),s(e,Ot,t),s(e,R,t),Q(R,Ie),Q(Ie,tt),Q(Ie,Tl),h(ke,Ie,null),Q(R,wl),Q(R,Ze),Q(Ze,lt),Q(Ze,Jl),h(xe,Ze,null),s(e,Kt,t),h(Ve,e,t),s(e,el,t),s(e,Ee,t),s(e,tl,t),h(Re,e,t),s(e,ll,t),s(e,_e,t),s(e,sl,t),h(We,e,t),s(e,nl,t),s(e,Qe,t),s(e,al,t),h(Fe,e,t),s(e,ol,t),h(_,e,t),s(e,il,t),h(Ae,e,t),s(e,pl,t),s(e,Se,t),s(e,cl,t),h(Be,e,t),s(e,rl,t),s(e,Le,t),s(e,ul,t),h(He,e,t),s(e,ml,t),h(Ye,e,t),s(e,hl,t),s(e,ze,t),s(e,fl,t),h(Xe,e,t),s(e,Ml,t),s(e,qe,t),s(e,dl,t),h(Pe,e,t),s(e,gl,t),s(e,yl,t),s(e,bl,t),h(W,e,t),s(e,$l,t),h(De,e,t),s(e,Cl,t),s(e,st,t),Ul=!0},p(e,[t]){const Oe={};t&2&&(Oe.$$scope={dirty:t,ctx:e}),I.$set(Oe);const Ke={};t&2&&(Ke.$$scope={dirty:t,ctx:e}),k.$set(Ke);const et={};t&2&&(et.$$scope={dirty:t,ctx:e}),Z.$set(et);const as={};t&2&&(as.$$scope={dirty:t,ctx:e}),x.$set(as);const os={};t&2&&(os.$$scope={dirty:t,ctx:e}),V.$set(os);const is={};t&2&&(is.$$scope={dirty:t,ctx:e}),E.$set(is);const ps={};t&2&&(ps.$$scope={dirty:t,ctx:e}),_.$set(ps);const cs={};t&2&&(cs.$$scope={dirty:t,ctx:e}),W.$set(cs)},i(e){Ul||(f(T.$$.fragment,e),f(S.$$.fragment,e),f(Y.$$.fragment,e),f(X.$$.fragment,e),f(P.$$.fragment,e),f(O.$$.fragment,e),f(I.$$.fragment,e),f(K.$$.fragment,e),f(k.$$.fragment,e),f(le.$$.fragment,e),f(Z.$$.fragment,e),f(ae.$$.fragment,e),f(oe.$$.fragment,e),f(re.$$.fragment,e),f(x.$$.fragment,e),f(me.$$.fragment,e),f(fe.$$.fragment,e),f(ge.$$.fragment,e),f(be.$$.fragment,e),f(Ce.$$.fragment,e),f(Ue.$$.fragment,e),f(V.$$.fragment,e),f(je.$$.fragment,e),f(E.$$.fragment,e),f(ve.$$.fragment,e),f(ke.$$.fragment,e),f(xe.$$.fragment,e),f(Ve.$$.fragment,e),f(Re.$$.fragment,e),f(We.$$.fragment,e),f(Fe.$$.fragment,e),f(_.$$.fragment,e),f(Ae.$$.fragment,e),f(Be.$$.fragment,e),f(He.$$.fragment,e),f(Ye.$$.fragment,e),f(Xe.$$.fragment,e),f(Pe.$$.fragment,e),f(W.$$.fragment,e),f(De.$$.fragment,e),Ul=!0)},o(e){M(T.$$.fragment,e),M(S.$$.fragment,e),M(Y.$$.fragment,e),M(X.$$.fragment,e),M(P.$$.fragment,e),M(O.$$.fragment,e),M(I.$$.fragment,e),M(K.$$.fragment,e),M(k.$$.fragment,e),M(le.$$.fragment,e),M(Z.$$.fragment,e),M(ae.$$.fragment,e),M(oe.$$.fragment,e),M(re.$$.fragment,e),M(x.$$.fragment,e),M(me.$$.fragment,e),M(fe.$$.fragment,e),M(ge.$$.fragment,e),M(be.$$.fragment,e),M(Ce.$$.fragment,e),M(Ue.$$.fragment,e),M(V.$$.fragment,e),M(je.$$.fragment,e),M(E.$$.fragment,e),M(ve.$$.fragment,e),M(ke.$$.fragment,e),M(xe.$$.fragment,e),M(Ve.$$.fragment,e),M(Re.$$.fragment,e),M(We.$$.fragment,e),M(Fe.$$.fragment,e),M(_.$$.fragment,e),M(Ae.$$.fragment,e),M(Be.$$.fragment,e),M(He.$$.fragment,e),M(Ye.$$.fragment,e),M(Xe.$$.fragment,e),M(Pe.$$.fragment,e),M(W.$$.fragment,e),M(De.$$.fragment,e),Ul=!1},d(e){e&&(l($),l(c),l(g),l(y),l(j),l(b),l(J),l(v),l(at),l(B),l(ot),l(L),l(it),l(H),l(pt),l(ct),l(z),l(rt),l(ut),l(q),l(mt),l(ht),l(D),l(ft),l(Mt),l(dt),l(gt),l(ee),l(yt),l(te),l(bt),l($t),l(Ct),l(Ut),l(se),l(Tt),l(ne),l(wt),l(Jt),l(jt),l(ie),l(Gt),l(pe),l(vt),l(ce),l(Nt),l(It),l(ue),l(kt),l(Zt),l(xt),l(he),l(Vt),l(Et),l(Me),l(Rt),l(de),l(_t),l(Wt),l(ye),l(Qt),l(Ft),l($e),l(At),l(St),l(Bt),l(Te),l(Lt),l(Ht),l(we),l(Yt),l(Je),l(zt),l(Xt),l(Ge),l(qt),l(Pt),l(Dt),l(Ne),l(Ot),l(R),l(Kt),l(el),l(Ee),l(tl),l(ll),l(_e),l(sl),l(nl),l(Qe),l(al),l(ol),l(il),l(pl),l(Se),l(cl),l(rl),l(Le),l(ul),l(ml),l(hl),l(ze),l(fl),l(Ml),l(qe),l(dl),l(gl),l(yl),l(bl),l($l),l(Cl),l(st)),l(o),d(T,e),d(S,e),d(Y,e),d(X,e),d(P,e),d(O,e),d(I,e),d(K,e),d(k,e),d(le,e),d(Z,e),d(ae,e),d(oe,e),d(re,e),d(x,e),d(me,e),d(fe,e),d(ge,e),d(be,e),d(Ce,e),d(Ue,e),d(V,e),d(je,e),d(E,e),d(ve,e),d(ke),d(xe),d(Ve,e),d(Re,e),d(We,e),d(Fe,e),d(_,e),d(Ae,e),d(Be,e),d(He,e),d(Ye,e),d(Xe,e),d(Pe,e),d(W,e),d(De,e)}}}const js='{"title":"Deploy Qwen2 7B with TGI DLC from GCS on GKE","local":"deploy-qwen2-7b-with-tgi-dlc-from-gcs-on-gke","sections":[{"title":"Setup / Configuration","local":"setup--configuration","sections":[],"depth":2},{"title":"Create GKE Cluster","local":"create-gke-cluster","sections":[],"depth":2},{"title":"Optional: Upload a model from the Hugging Face Hub to GCS","local":"optional-upload-a-model-from-the-hugging-face-hub-to-gcs","sections":[],"depth":2},{"title":"Configure IAM for GCS","local":"configure-iam-for-gcs","sections":[],"depth":2},{"title":"Deploy TGI","local":"deploy-tgi","sections":[],"depth":2},{"title":"Inference with TGI","local":"inference-with-tgi","sections":[{"title":"Via cURL","local":"via-curl","sections":[],"depth":3},{"title":"Via Python","local":"via-python","sections":[],"depth":3}],"depth":2},{"title":"Delete GKE Cluster","local":"delete-gke-cluster","sections":[],"depth":2}],"depth":1}';function Gs(w){return ms(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class xs extends hs{constructor(o){super(),fs(this,o,Gs,Js,us,{})}}export{xs as component};

Xet Storage Details

Size:
45.9 kB
·
Xet hash:
f39ae117d84ad3d8d4d4a1dc292a8d123def88f8595c6b556087c334bf4b9805

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.