Buckets:
| import{s as Ms,o as us,n as Xe}from"../chunks/scheduler.b108d059.js";import{S as hs,i as gs,g as o,s as n,r as M,A as ms,h as p,f as l,c as a,j as rs,u,x as r,k as cs,y as ds,a as s,v as h,d as g,t as m,w as d}from"../chunks/index.008de539.js";import{T as Pe}from"../chunks/Tip.aeb15ab7.js";import{C as f}from"../chunks/CodeBlock.3968c746.js";import{H as U,E as ys}from"../chunks/EditOnGithub.d1c48e3d.js";function ws(T){let i,w="Some configuration steps such as the <code>gcloud</code>, <code>kubectl</code>, and <code>gke-cloud-auth-plugin</code> installation are not required if running the example within the Google Cloud Cloud Shell, as the spawned shell already comes with those dependencies installed; as well as logged in within the current account and project selected on Google Cloud.";return{c(){i=o("p"),i.innerHTML=w},l(c){i=p(c,"P",{"data-svelte-h":!0}),r(i)!=="svelte-qsry4x"&&(i.innerHTML=w)},m(c,j){s(c,i,j)},p:Xe,d(c){c&&l(i)}}}function js(T){let i,w='Installing the <code>gke-gcloud-auth-plugin</code> does not need to be installed via <code>gcloud</code> specifically, to read more about the alternative installation methods, please visit the <a href="https://cloud.google.com/kubernetes-engine/docs/how-to/cluster-access-for-kubectl#install_plugin" rel="nofollow">GKE Documentation - Install kubectl and configure cluster access</a>.';return{c(){i=o("p"),i.innerHTML=w},l(c){i=p(c,"P",{"data-svelte-h":!0}),r(i)!=="svelte-stf3bx"&&(i.innerHTML=w)},m(c,j){s(c,i,j)},p:Xe,d(c){c&&l(i)}}}function fs(T){let i,w='Before creating the GKE Autopilot Cluster on a different version than the one pinned below, you should read the <a href="https://cloud.google.com/kubernetes-engine/docs/how-to/performance-pods" rel="nofollow">GKE Documentation - Optimize Autopilot Pod performance by choosing a machine series</a> page, as not all the Kubernetes versions available on GKE support GPU accelerators (e.g. <code>nvidia-l4</code> is not supported on GKE for Kubernetes 1.28.3 or lower).';return{c(){i=o("p"),i.innerHTML=w},l(c){i=p(c,"P",{"data-svelte-h":!0}),r(i)!=="svelte-alua40"&&(i.innerHTML=w)},m(c,j){s(c,i,j)},p:Xe,d(c){c&&l(i)}}}function Ts(T){let i,w="If you want to change the Kubernetes version running on the GKE Cluster, you can do so, but make sure to check which are the latest supported Kubernetes versions in the location where you want to create the cluster on, with the following command:",c,j,b,C,I='Additionally, note that you can also use the “RAPID” channel instead of the “STABLE” if you require any Kubernetes feature not shipped yet within the latest Kubernetes version released on the “STABLE” channel, even though using the “STABLE” channel is recommended. For more information please visit the <a href="https://cloud.google.com/kubernetes-engine/versioning#specifying_cluster_version" rel="nofollow">GKE Documentation - Specifying cluster version</a>.',E;return j=new f({props:{code:"Z2Nsb3VkJTIwY29udGFpbmVyJTIwZ2V0LXNlcnZlci1jb25maWclMjAlNUMlMEElMjAlMjAlMjAlMjAtLWZsYXR0ZW4lM0QlMjJjaGFubmVscyUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tZmlsdGVyJTNEJTIyY2hhbm5lbHMuY2hhbm5lbCUzRFNUQUJMRSUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tZm9ybWF0JTNEJTIyeWFtbChjaGFubmVscy5jaGFubmVsJTJDY2hhbm5lbHMuZGVmYXVsdFZlcnNpb24pJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1sb2NhdGlvbiUzRCUyNExPQ0FUSU9O",highlighted:`gcloud container get-server-config \\ | |
| --flatten=<span class="hljs-string">"channels"</span> \\ | |
| --filter=<span class="hljs-string">"channels.channel=STABLE"</span> \\ | |
| --format=<span class="hljs-string">"yaml(channels.channel,channels.defaultVersion)"</span> \\ | |
| --location=<span class="hljs-variable">$LOCATION</span>`,wrap:!1}}),{c(){i=o("p"),i.textContent=w,c=n(),M(j.$$.fragment),b=n(),C=o("p"),C.innerHTML=I},l(y){i=p(y,"P",{"data-svelte-h":!0}),r(i)!=="svelte-12vnky8"&&(i.textContent=w),c=a(y),u(j.$$.fragment,y),b=a(y),C=p(y,"P",{"data-svelte-h":!0}),r(C)!=="svelte-iklir6"&&(C.innerHTML=I)},m(y,J){s(y,i,J),s(y,c,J),h(j,y,J),s(y,b,J),s(y,C,J),E=!0},p:Xe,i(y){E||(g(j.$$.fragment,y),E=!0)},o(y){m(j.$$.fragment,y),E=!1},d(y){y&&(l(i),l(c),l(b),l(C)),d(j,y)}}}function Js(T){let i,w="Note that the <code>/v1/chat/completions</code> endpoint cannot be used, and will result in a “chat template error not found”, as the model is pre-trained and not fine-tuned for e.g. chat conversations, and does not have a chat template defined that can be applied within the <code>v1/chat/completions</code> endpoint following the OpenAI OpenAPI specification.";return{c(){i=o("p"),i.innerHTML=w},l(c){i=p(c,"P",{"data-svelte-h":!0}),r(i)!=="svelte-cvogri"&&(i.innerHTML=w)},m(c,j){s(c,i,j)},p:Xe,d(c){c&&l(i)}}}function Us(T){let i,w='📍 Find the complete example on GitHub <a href="https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/deploy-paligemma-2-with-tgi" rel="nofollow">here</a>!';return{c(){i=o("p"),i.innerHTML=w},l(c){i=p(c,"P",{"data-svelte-h":!0}),r(i)!=="svelte-1k8vf3m"&&(i.innerHTML=w)},m(c,j){s(c,i,j)},p:Xe,d(c){c&&l(i)}}}function bs(T){let i,w,c,j,b,C,I,E="PaliGemma 2 is the latest multilingual vision-language model released by Google, that combines the Gemma2 language model with the SigLIP vision model, enabling it to process both images and text inputs to generate text outputs for various tasks including captioning, visual question answering, and object detection. Text Generation Inference (TGI) is a toolkit developed by Hugging Face for deploying and serving LLMs, with high performance text generation. Google Kubernetes Engine (GKE) is a fully-managed Kubernetes service in Google Cloud that can be used to deploy and operate containerized applications at scale using Google Cloud infrastructure.",y,J,jl="This example showcases how to deploy Google PaliGemma2 from the Hugging Face Hub on a GKE Cluster, running a purpose-built container to deploy LLMs and VLMs in a secure and managed environment with the Hugging Face DLC for TGI. Additionally, this example also presents different scenarios or use-cases where PaliGemma2 can be used.",ze,B,Ke,$,De,W,fl="Optionally, to avoid duplicating the following values within this example, for convenience you should set the following environment variable with your own Google Cloud values:",Oe,x,et,N,tt,H,Tl="First, you need to install both <code>gcloud</code> and <code>kubectl</code> in your local machine, which are the command-line tools for Google Cloud and Kubernetes, respectively, to interact with the Google Cloud and the GKE Cluster.",lt,L,Jl='<li>To install <code>gcloud</code>, follow the instructions at <a href="https://cloud.google.com/sdk/docs/install" rel="nofollow">Cloud SDK Documentation - Install the gcloud CLI</a>.</li> <li>To install <code>kubectl</code>, follow the instructions at <a href="https://kubernetes.io/docs/tasks/tools/#kubectl" rel="nofollow">Kubernetes Documentation - Install Tools</a>.</li>',st,V,Ul="Additionally, to use <code>kubectl</code> with the GKE Cluster credentials, you also need to install the <code>gke-gcloud-auth-plugin</code>, that can be installed with <code>gcloud</code> as follows:",nt,S,at,G,it,R,ot,Y,bl="Then you need to login into your Google Cloud account and set the project ID to the one you want to use for the deployment of the GKE Cluster.",pt,_,rt,F,Cl="Once you are logged in, you need to enable the necessary service APIs in Google Cloud, such as the Google Kubernetes Engine API, the Google Container Registry API, and the Google Container File System API, which are necessary for the deployment of the GKE Cluster and the Hugging Face DLC for TGI.",ct,Q,Mt,P,ut,X,Il='As <a href="https://huggingface.co/google/paligemma2-3b-pt-224" rel="nofollow"><code>google/paligemma2-3b-pt-224</code></a> is a gated model, as well as the rest of the PaliGemma2 released weights on the Hugging Face Hub (see them all in <a href="https://huggingface.co/collections/google/paligemma-2-release-67500e1e1dbfdd4dee27ba48" rel="nofollow">the Google PaliGemma2 Collection on the Hub</a>), you need to first accept their gating / licensing in the model card, in order to be able to download the weights.',ht,q,$l='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/main/examples/gke/deploy-paligemma-2-with-tgi/imgs/model-gating.png" alt="PaliGemma2 Gating on the Hugging Face Hub"/>',gt,z,Gl='Once you have been granted access to the PaliGemma2 model on the Hub, you should be able to generate either a fine-grained or a read-access token to be able to download <a href="https://huggingface.co/google/paligemma2-3b-pt-224" rel="nofollow"><code>google/paligemma2-3b-pt-224</code></a> model weights (or every model under the <a href="https://huggingface.co/google" rel="nofollow"><code>google</code></a> organization on the Hub), or to all the models your account has access to, respectively. To generate access tokens for the Hugging Face Hub you can follow the instructions at <a href="https://huggingface.co/docs/hub/en/security-tokens" rel="nofollow">Hugging Face Hub Documentation - User access tokens</a>.',mt,K,vl="After the access token is generated, the recommended way of setting it is via the Python CLI <code>huggingface-cli</code> that comes with the <code>huggingface_hub</code> Python SDK, that can be installed as follows:",dt,D,yt,O,Al="And then login in with the generated access token with read-access over the gated/private model as:",wt,ee,jt,te,ft,le,Zl="To deploy the GKE Cluster, the “Autopilot” mode will be used as it is the recommended one for most of the workloads, since the underlying infrastructure is managed by Google; meaning that there’s no need to create a node pool in advance or set up their ingress. Alternatively, you can also use the “Standard” mode, but that may require more configuration steps and being more aware / knowledgeable of Kubernetes.",Tt,v,Jt,se,Ut,A,bt,ne,kl='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/main/examples/gke/deploy-paligemma-2-with-tgi/imgs/gke-cluster.png" alt="GKE Cluster in the Google Cloud Console"/>',Ct,ae,It,ie,El="Once the GKE Cluster is created, you need to get the credentials to access it via <code>kubectl</code>:",$t,oe,Gt,pe,Bl="Then you will be ready to use <code>kubectl</code> commands that will be calling the Kubernetes Cluster you just created on GKE.",vt,re,At,ce,Wl='As <a href="https://huggingface.co/google/paligemma2-3b-pt-224" rel="nofollow"><code>google/paligemma2-3b-pt-224</code></a> is a gated model and requires a Hugging Face Hub access token to download the weights <a href="#paligemma2-gating-and-hugging-face-access-token">as mentioned before</a>, you need to set a Kubernetes secret with the Hugging Face Hub token previously generated, with the following command (assuming that you have the <code>huggingface_hub</code> Python SDK installed):',Zt,Me,kt,ue,xl="Alternatively, even if not recommended, you can also directly set the access token pasting it within the <code>kubectl</code> command as follows (make sure to replace that with your own token):",Et,he,Bt,ge,Nl='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/main/examples/gke/deploy-paligemma-2-with-tgi/imgs/gke-secrets.png" alt="GKE Secret in the Google Cloud Console"/>',Wt,me,Hl='More information on how to set Kubernetes secrets in a GKE Cluster check the <a href="https://cloud.google.com/secret-manager/docs/secret-manager-managed-csi-component" rel="nofollow">GKE Documentation - Specifying cluster version</a>.',xt,de,Nt,ye,Ll='Now you can proceed to the Kubernetes deployment of the Hugging Face DLC for TGI, serving the <a href="https://huggingface.co/google/paligemma2-3b-pt-224" rel="nofollow"><code>google/paligemma2-3b-pt-224</code></a> model from the Hugging Face Hub. To explore all the models from the Hugging Face Hub that can be served with TGI, you can explore <a href="https://huggingface.co/models?other=text-generation-inference" rel="nofollow">the models tagged with <code>text-generation-inference</code> in the Hub</a>.',Ht,we,Vl="PaliGemma2 will be deployed from the following Kubernetes Deployment Manifest (including the Service):",Lt,je,Vt,fe,Sl="And that’s it, TGI is now reachable and healthy on GKE!",St,Te,Rt,Je,Rl='Before sending the <code>curl</code> request for inference, you need to note that the PaliGemma variant that you are serving is <a href="https://huggingface.co/google/paligemma2-3b-pt-224" rel="nofollow"><code>google/paligemma2-3b-pt-224</code></a> i.e. the pre-trained variant, meaning that’s not particularly usable out of the box for any task, but just to transfer well to other tasks after the fine-tuning; anyway, it’s pre-trained on a set of given tasks following the previous <a href="https://arxiv.org/abs/2209.06794" rel="nofollow">PaLI: A Jointly-Scaled Multilingual Language-Image Model</a> works, which are the following and, so on, the supported prompt formats that will work out of the box via the <code>/generate</code> endpoint:',Yt,Ue,Yl="<li><code>caption {lang}</code>: Simple captioning objective on datasets like WebLI and CC3M-35L</li> <li><code>ocr</code>: Transcription of text on the image using a public OCR system</li> <li><code>answer en {question}</code>: Generated VQA on CC3M-35L and object-centric questions on OpenImages</li> <li><code>question {lang} {English answer}</code>: Generated VQG on CC3M-35L in 35 languages for given English answers</li> <li><code>detect {thing} ; {thing} ; ...</code>: Multi-object detection on generated open-world data</li> <li><code>segment {thing} ; {thing} ; ...</code>: Multi-object instance segmentation on generated open-world data</li> <li><code>caption <ymin><xmin><ymax><xmax></code>: Grounded captioning of content within a specified box</li>",_t,be,_l="The PaliGemma and PaliGemma2 papers use the <code>\\n</code> i.e. the line-break, as the separator token from the image(s) + suffix (input) and the prefix (output); which is automatically included within the <code>PaliGemmaProcessor</code> in Transformers, but needs to be manually provided to the <code>/generate</code> endpoint on TGI.",Ft,Ce,Fl="Besides that, the images should be provided following the Markdown formatting for image rendering i.e. <code></code>, and the image needs to be publicly accessible; or provided as its base64 encoding if not hosted within a publicly accessible URL.",Qt,Ie,Ql="This means that the prompt formatting expected on the <code>/generate</code> method is either:",Pt,$e,Pl="<li><code><PROMPT>\\n</code> if the image is provided via URL.</li> <li><code><PROMPT>\\n</code> if the image is provided as its base64 encoding.</li>",Xt,Ge,Xl="Read more information about the technical details and implementation of PaliGemma on the papers / technical reports released by Google:",qt,ve,ql='<li><a href="https://arxiv.org/abs/2407.07726" rel="nofollow">PaliGemma: A versatile 3B VLM for transfer</a></li> <li><a href="https://arxiv.org/abs/2412.03555" rel="nofollow">PaliGemma 2: A Family of Versatile VLMs for Transfer</a></li>',zt,Z,Kt,Ae,Dt,Ze,zl="To send a POST request to the TGI service using <code>cURL</code>, you can run the following command:",Ot,ke,el,Ee,Kl='<thead><tr><th>Image</th> <th>Input</th> <th>Output</th></tr></thead> <tbody><tr><td><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png"/></td> <td>caption en</td> <td>image of a man in a spacesuit</td></tr></tbody>',tl,Be,ll,We,Dl="You can install it via pip as <code>pip install --upgrade --quiet huggingface_hub</code>, and then run the following snippet to mimic the cURL command above i.e. sending requests to the Generate API:",sl,xe,nl,Ne,Ol="Or, if you don’t have a public URL with the image hosted, you can also send the base64 encoding of the image from the image file as it follows:",al,He,il,Le,es="Both producing the following output:",ol,Ve,pl,Se,rl,Re,ts="Finally, once you are done using TGI on the GKE Cluster, you can safely delete the GKE Cluster to avoid incurring in unnecessary costs.",cl,Ye,Ml,_e,ls="Alternatively, you can also downscale the replicas of the deployed pod to 0 in case you want to preserve the cluster, since the default GKE Cluster deployed with GKE Autopilot mode is running just a single <code>e2-small</code> instance.",ul,Fe,hl,gl,ml,k,dl,Qe,yl,qe,wl;return b=new U({props:{title:"Deploy PaliGemma2 with TGI DLC on GKE",local:"deploy-paligemma2-with-tgi-dlc-on-gke",headingTag:"h1"}}),B=new U({props:{title:"Setup / Configuration",local:"setup--configuration",headingTag:"h2"}}),$=new Pe({props:{$$slots:{default:[ws]},$$scope:{ctx:T}}}),x=new f({props:{code:"ZXhwb3J0JTIwUFJPSkVDVF9JRCUzRHlvdXItcHJvamVjdC1pZCUwQWV4cG9ydCUyMExPQ0FUSU9OJTNEeW91ci1sb2NhdGlvbiUwQWV4cG9ydCUyMENMVVNURVJfTkFNRSUzRHlvdXItY2x1c3Rlci1uYW1l",highlighted:`<span class="hljs-built_in">export</span> PROJECT_ID=your-project-id | |
| <span class="hljs-built_in">export</span> LOCATION=your-location | |
| <span class="hljs-built_in">export</span> CLUSTER_NAME=your-cluster-name`,wrap:!1}}),N=new U({props:{title:"Requirements",local:"requirements",headingTag:"h3"}}),S=new f({props:{code:"Z2Nsb3VkJTIwY29tcG9uZW50cyUyMGluc3RhbGwlMjBna2UtZ2Nsb3VkLWF1dGgtcGx1Z2lu",highlighted:"gcloud components install gke-gcloud-auth-plugin",wrap:!1}}),G=new Pe({props:{$$slots:{default:[js]},$$scope:{ctx:T}}}),R=new U({props:{title:"Login and API enablement",local:"login-and-api-enablement",headingTag:"h3"}}),_=new f({props:{code:"Z2Nsb3VkJTIwYXV0aCUyMGxvZ2luJTBBZ2Nsb3VkJTIwYXV0aCUyMGFwcGxpY2F0aW9uLWRlZmF1bHQlMjBsb2dpbiUyMCUyMCUyMyUyMFJlcXVpcmVkJTIwZm9yJTIwbG9jYWwlMjBkZXZlbG9wbWVudCUwQWdjbG91ZCUyMGNvbmZpZyUyMHNldCUyMHByb2plY3QlMjAlMjRQUk9KRUNUX0lE",highlighted:`gcloud auth login | |
| gcloud auth application-default login <span class="hljs-comment"># Required for local development</span> | |
| gcloud config <span class="hljs-built_in">set</span> project <span class="hljs-variable">$PROJECT_ID</span>`,wrap:!1}}),Q=new f({props:{code:"Z2Nsb3VkJTIwc2VydmljZXMlMjBlbmFibGUlMjBjb250YWluZXIuZ29vZ2xlYXBpcy5jb20lMEFnY2xvdWQlMjBzZXJ2aWNlcyUyMGVuYWJsZSUyMGNvbnRhaW5lcnJlZ2lzdHJ5Lmdvb2dsZWFwaXMuY29tJTBBZ2Nsb3VkJTIwc2VydmljZXMlMjBlbmFibGUlMjBjb250YWluZXJmaWxlc3lzdGVtLmdvb2dsZWFwaXMuY29t",highlighted:`gcloud services <span class="hljs-built_in">enable</span> container.googleapis.com | |
| gcloud services <span class="hljs-built_in">enable</span> containerregistry.googleapis.com | |
| gcloud services <span class="hljs-built_in">enable</span> containerfilesystem.googleapis.com`,wrap:!1}}),P=new U({props:{title:"PaliGemma2 gating and Hugging Face access token",local:"paligemma2-gating-and-hugging-face-access-token",headingTag:"h3"}}),D=new f({props:{code:"cGlwJTIwaW5zdGFsbCUyMC0tdXBncmFkZSUyMC0tcXVpZXQlMjBodWdnaW5nZmFjZV9odWI=",highlighted:"pip install --upgrade --quiet huggingface_hub",wrap:!1}}),ee=new f({props:{code:"aHVnZ2luZ2ZhY2UtY2xpJTIwbG9naW4=",highlighted:"huggingface-cli login",wrap:!1}}),te=new U({props:{title:"Create GKE Cluster",local:"create-gke-cluster",headingTag:"h2"}}),v=new Pe({props:{$$slots:{default:[fs]},$$scope:{ctx:T}}}),se=new f({props:{code:"Z2Nsb3VkJTIwY29udGFpbmVyJTIwY2x1c3RlcnMlMjBjcmVhdGUtYXV0byUyMCUyNENMVVNURVJfTkFNRSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tcHJvamVjdCUzRCUyNFBST0pFQ1RfSUQlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvY2F0aW9uJTNEJTI0TE9DQVRJT04lMjAlNUMlMEElMjAlMjAlMjAlMjAtLXJlbGVhc2UtY2hhbm5lbCUzRHN0YWJsZSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tY2x1c3Rlci12ZXJzaW9uJTNEMS4zMCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbm8tYXV0b3Byb3Zpc2lvbmluZy1lbmFibGUtaW5zZWN1cmUta3ViZWxldC1yZWFkb25seS1wb3J0",highlighted:`gcloud container clusters create-auto <span class="hljs-variable">$CLUSTER_NAME</span> \\ | |
| --project=<span class="hljs-variable">$PROJECT_ID</span> \\ | |
| --location=<span class="hljs-variable">$LOCATION</span> \\ | |
| --release-channel=stable \\ | |
| --cluster-version=1.30 \\ | |
| --no-autoprovisioning-enable-insecure-kubelet-readonly-port`,wrap:!1}}),A=new Pe({props:{$$slots:{default:[Ts]},$$scope:{ctx:T}}}),ae=new U({props:{title:"Get GKE Cluster Credentials",local:"get-gke-cluster-credentials",headingTag:"h2"}}),oe=new f({props:{code:"Z2Nsb3VkJTIwY29udGFpbmVyJTIwY2x1c3RlcnMlMjBnZXQtY3JlZGVudGlhbHMlMjAlMjRDTFVTVEVSX05BTUUlMjAtLWxvY2F0aW9uJTNEJTI0TE9DQVRJT04=",highlighted:'gcloud container clusters get-credentials <span class="hljs-variable">$CLUSTER_NAME</span> --location=<span class="hljs-variable">$LOCATION</span>',wrap:!1}}),re=new U({props:{title:"Set Hugging Face Secrets on GKE",local:"set-hugging-face-secrets-on-gke",headingTag:"h2"}}),Me=new f({props:{code:"a3ViZWN0bCUyMGNyZWF0ZSUyMHNlY3JldCUyMGdlbmVyaWMlMjBoZi1zZWNyZXQlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWZyb20tbGl0ZXJhbCUzRGhmX3Rva2VuJTNEJTI0KHB5dGhvbiUyMC1jJTIwJTIyZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGdldF90b2tlbiUzQiUyMHByaW50KGdldF90b2tlbigpKSUyMiklMjAlNUMlMEElMjAlMjAlMjAlMjAtLWRyeS1ydW4lM0RjbGllbnQlMjAtbyUyMHlhbWwlMjAlN0MlMjBrdWJlY3RsJTIwYXBwbHklMjAtZiUyMC0=",highlighted:`kubectl create secret generic hf-secret \\ | |
| --from-literal=hf_token=$(python -c <span class="hljs-string">"from huggingface_hub import get_token; print(get_token())"</span>) \\ | |
| --dry-run=client -o yaml | kubectl apply -f -`,wrap:!1}}),he=new f({props:{code:"a3ViZWN0bCUyMGNyZWF0ZSUyMHNlY3JldCUyMGdlbmVyaWMlMjBoZi1zZWNyZXQlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWZyb20tbGl0ZXJhbCUzRGhmX3Rva2VuJTNEaGZfKioqJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1kcnktcnVuJTNEY2xpZW50JTIwLW8lMjB5YW1sJTIwJTdDJTIwa3ViZWN0bCUyMGFwcGx5JTIwLWYlMjAt",highlighted:`kubectl create secret generic hf-secret \\ | |
| --from-literal=hf_token=hf_*** \\ | |
| --dry-run=client -o yaml | kubectl apply -f -`,wrap:!1}}),de=new U({props:{title:"Deploy TGI on GKE",local:"deploy-tgi-on-gke",headingTag:"h2"}}),je=new f({props:{code:"YXBpVmVyc2lvbiUzQSUyMGFwcHMlMkZ2MSUwQWtpbmQlM0ElMjBEZXBsb3ltZW50JTBBbWV0YWRhdGElM0ElMEElMjAlMjBuYW1lJTNBJTIwdGdpJTBBc3BlYyUzQSUwQSUyMCUyMHJlcGxpY2FzJTNBJTIwMSUwQSUyMCUyMHNlbGVjdG9yJTNBJTBBJTIwJTIwJTIwJTIwbWF0Y2hMYWJlbHMlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjBhcHAlM0ElMjB0Z2klMEElMjAlMjB0ZW1wbGF0ZSUzQSUwQSUyMCUyMCUyMCUyMG1ldGFkYXRhJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwbGFiZWxzJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwYXBwJTNBJTIwdGdpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaGYuY28lMkZtb2RlbCUzQSUyMGdvb2dsZS0tcGFsaWdlbW1hMi0zYi1wdC0yMjQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBoZi5jbyUyRnRhc2slM0ElMjB0ZXh0LWdlbmVyYXRpb24lMEElMjAlMjAlMjAlMjBzcGVjJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwY29udGFpbmVycyUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMC0lMjBuYW1lJTNBJTIwdGdpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaW1hZ2UlM0ElMjAlMjJ1cy1jZW50cmFsMS1kb2NrZXIucGtnLmRldiUyRmdjcC1wYXJ0bmVyc2hpcC00MTIxMDglMkZkZWVwLWxlYXJuaW5nLWltYWdlcyUyRmh1Z2dpbmdmYWNlLXRleHQtZ2VuZXJhdGlvbi1pbmZlcmVuY2UtZ3B1LjMuMC4xJTIyJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwaW1hZ2UlM0ElMjAlMjJ1cy1kb2NrZXIucGtnLmRldiUyRmRlZXBsZWFybmluZy1wbGF0Zm9ybS1yZWxlYXNlJTJGZ2NyLmlvJTJGaHVnZ2luZ2ZhY2UtdGV4dC1nZW5lcmF0aW9uLWluZmVyZW5jZS1jdTEyNC4zLTAudWJ1bnR1MjIwNC5weTMxMSUyMiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHJlc291cmNlcyUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHJlcXVlc3RzJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbnZpZGlhLmNvbSUyRmdwdSUzQSUyMDElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsaW1pdHMlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBudmlkaWEuY29tJTJGZ3B1JTNBJTIwMSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGVudiUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMC0lMjBuYW1lJTNBJTIwTU9ERUxfSUQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB2YWx1ZSUzQSUyMCUyMmdvb2dsZSUyRnBhbGlnZW1tYTItM2ItcHQtMjI0JTIyJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwLSUyMG5hbWUlM0ElMjBOVU1fU0hBUkQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB2YWx1ZSUzQSUyMCUyMjElMjIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAtJTIwbmFtZSUzQSUyMFBPUlQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB2YWx1ZSUzQSUyMCUyMjgwODAlMjIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAtJTIwbmFtZSUzQSUyMEhGX1RPS0VOJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdmFsdWVGcm9tJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2VjcmV0S2V5UmVmJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbmFtZSUzQSUyMGhmLXNlY3JldCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGtleSUzQSUyMGhmX3Rva2VuJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdm9sdW1lTW91bnRzJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwLSUyMG1vdW50UGF0aCUzQSUyMCUyRmRldiUyRnNobSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG5hbWUlM0ElMjBkc2htJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwLSUyMG1vdW50UGF0aCUzQSUyMCUyRnRtcCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG5hbWUlM0ElMjB0bXAlMEElMjAlMjAlMjAlMjAlMjAlMjB2b2x1bWVzJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwLSUyMG5hbWUlM0ElMjBkc2htJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZW1wdHlEaXIlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtZWRpdW0lM0ElMjBNZW1vcnklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzaXplTGltaXQlM0ElMjAxR2klMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAtJTIwbmFtZSUzQSUyMHRtcCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGVtcHR5RGlyJTNBJTIwJTdCJTdEJTBBJTIwJTIwJTIwJTIwJTIwJTIwbm9kZVNlbGVjdG9yJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY2xvdWQuZ29vZ2xlLmNvbSUyRmdrZS1hY2NlbGVyYXRvciUzQSUyMG52aWRpYS1sNCUwQUZpbmFsbHklMkMlMjB0byUyMG1ha2UlMjBzdXJlJTIwdGhhdCUyMHRoZSUyMHNlcnZpY2UlMjBpcyUyMGhlYWx0aHklMjBhbmQlMjByZWFjaGFibGUlMjB2aWElMjBlaXRoZXIlMjAlNjBsb2NhbGhvc3QlNjAlMjBvciUyMHRoZSUyMGluZ3Jlc3MlMjBJUCUyMChkZXBlbmRpbmclMjBvbiUyMGhvdyUyMHlvdSUyMGV4cG9zZWQlMjB0aGUlMjBzZXJ2aWNlJTIwYXMlMjBvZiUyMHRoZSUyMHN0ZXAlMjBhYm92ZSklMkMlMjB5b3UlMjBjYW4lMjBzZW5kJTIwdGhlJTIwZm9sbG93aW5nJTIwJTYwY3VybCU2MCUyMGNvbW1hbmQlM0ElMEElMEElNjAlNjAlNjBiYXNoJTBBY3VybCUyMGh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDgwJTJGaGVhbHRo",highlighted:`<span class="hljs-attr">apiVersion:</span> <span class="hljs-string">apps/v1</span> | |
| <span class="hljs-attr">kind:</span> <span class="hljs-string">Deployment</span> | |
| <span class="hljs-attr">metadata:</span> | |
| <span class="hljs-attr">name:</span> <span class="hljs-string">tgi</span> | |
| <span class="hljs-attr">spec:</span> | |
| <span class="hljs-attr">replicas:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">selector:</span> | |
| <span class="hljs-attr">matchLabels:</span> | |
| <span class="hljs-attr">app:</span> <span class="hljs-string">tgi</span> | |
| <span class="hljs-attr">template:</span> | |
| <span class="hljs-attr">metadata:</span> | |
| <span class="hljs-attr">labels:</span> | |
| <span class="hljs-attr">app:</span> <span class="hljs-string">tgi</span> | |
| <span class="hljs-attr">hf.co/model:</span> <span class="hljs-string">google--paligemma2-3b-pt-224</span> | |
| <span class="hljs-attr">hf.co/task:</span> <span class="hljs-string">text-generation</span> | |
| <span class="hljs-attr">spec:</span> | |
| <span class="hljs-attr">containers:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">name:</span> <span class="hljs-string">tgi</span> | |
| <span class="hljs-attr">image:</span> <span class="hljs-string">"us-central1-docker.pkg.dev/gcp-partnership-412108/deep-learning-images/huggingface-text-generation-inference-gpu.3.0.1"</span> | |
| <span class="hljs-comment"># image: "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.3-0.ubuntu2204.py311"</span> | |
| <span class="hljs-attr">resources:</span> | |
| <span class="hljs-attr">requests:</span> | |
| <span class="hljs-attr">nvidia.com/gpu:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">limits:</span> | |
| <span class="hljs-attr">nvidia.com/gpu:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">env:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">name:</span> <span class="hljs-string">MODEL_ID</span> | |
| <span class="hljs-attr">value:</span> <span class="hljs-string">"google/paligemma2-3b-pt-224"</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">name:</span> <span class="hljs-string">NUM_SHARD</span> | |
| <span class="hljs-attr">value:</span> <span class="hljs-string">"1"</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">name:</span> <span class="hljs-string">PORT</span> | |
| <span class="hljs-attr">value:</span> <span class="hljs-string">"8080"</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">name:</span> <span class="hljs-string">HF_TOKEN</span> | |
| <span class="hljs-attr">valueFrom:</span> | |
| <span class="hljs-attr">secretKeyRef:</span> | |
| <span class="hljs-attr">name:</span> <span class="hljs-string">hf-secret</span> | |
| <span class="hljs-attr">key:</span> <span class="hljs-string">hf_token</span> | |
| <span class="hljs-attr">volumeMounts:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">mountPath:</span> <span class="hljs-string">/dev/shm</span> | |
| <span class="hljs-attr">name:</span> <span class="hljs-string">dshm</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">mountPath:</span> <span class="hljs-string">/tmp</span> | |
| <span class="hljs-attr">name:</span> <span class="hljs-string">tmp</span> | |
| <span class="hljs-attr">volumes:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">name:</span> <span class="hljs-string">dshm</span> | |
| <span class="hljs-attr">emptyDir:</span> | |
| <span class="hljs-attr">medium:</span> <span class="hljs-string">Memory</span> | |
| <span class="hljs-attr">sizeLimit:</span> <span class="hljs-string">1Gi</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">name:</span> <span class="hljs-string">tmp</span> | |
| <span class="hljs-attr">emptyDir:</span> {} | |
| <span class="hljs-attr">nodeSelector:</span> | |
| <span class="hljs-attr">cloud.google.com/gke-accelerator:</span> <span class="hljs-string">nvidia-l4</span> | |
| <span class="hljs-string">Finally,</span> <span class="hljs-string">to</span> <span class="hljs-string">make</span> <span class="hljs-string">sure</span> <span class="hljs-string">that</span> <span class="hljs-string">the</span> <span class="hljs-string">service</span> <span class="hljs-string">is</span> <span class="hljs-string">healthy</span> <span class="hljs-string">and</span> <span class="hljs-string">reachable</span> <span class="hljs-string">via</span> <span class="hljs-string">either</span> <span class="hljs-string">\`localhost\`</span> <span class="hljs-string">or</span> <span class="hljs-string">the</span> <span class="hljs-string">ingress</span> <span class="hljs-string">IP</span> <span class="hljs-string">(depending</span> <span class="hljs-string">on</span> <span class="hljs-string">how</span> <span class="hljs-string">you</span> <span class="hljs-string">exposed</span> <span class="hljs-string">the</span> <span class="hljs-string">service</span> <span class="hljs-string">as</span> <span class="hljs-string">of</span> <span class="hljs-string">the</span> <span class="hljs-string">step</span> <span class="hljs-string">above),</span> <span class="hljs-string">you</span> <span class="hljs-string">can</span> <span class="hljs-string">send</span> <span class="hljs-string">the</span> <span class="hljs-string">following</span> <span class="hljs-string">\`curl\`</span> <span class="hljs-attr">command:</span> | |
| <span class="hljs-string">\`\`\`bash</span> | |
| <span class="hljs-string">curl</span> <span class="hljs-string">http://localhost:8080/health</span>`,wrap:!1}}),Te=new U({props:{title:"Inference with TGI on GKE",local:"inference-with-tgi-on-gke",headingTag:"h2"}}),Z=new Pe({props:{$$slots:{default:[Js]},$$scope:{ctx:T}}}),Ae=new U({props:{title:"Via cURL",local:"via-curl",headingTag:"h3"}}),ke=new f({props:{code:"Y3VybCUyMGh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDgwJTJGZ2VuZXJhdGUlMjAlNUMlMEElMjAlMjAlMjAlMjAtZCUyMCclN0IlMjJpbnB1dHMlMjIlM0ElMjIhJTVCJTVEKGh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmh1Z2dpbmdmYWNlJTJGZG9jdW1lbnRhdGlvbi1pbWFnZXMlMkZyZXNvbHZlJTJGbWFpbiUyRnRyYW5zZm9ybWVycyUyRnJhYmJpdC5wbmcpY2FwdGlvbiUyMGVuJTVDbiUyMiUyQyUyMnBhcmFtZXRlcnMlMjIlM0ElN0IlMjJtYXhfbmV3X3Rva2VucyUyMiUzQTEyOCUyQyUyMnNlZWQlMjIlM0E0MiU3RCU3RCclMjAlNUMlMEElMjAlMjAlMjAlMjAtSCUyMCdDb250ZW50LVR5cGUlM0ElMjBhcHBsaWNhdGlvbiUyRmpzb24n",highlighted:`curl http://localhost:8080/generate \\ | |
| -d <span class="hljs-string">'{"inputs":"caption en\\n","parameters":{"max_new_tokens":128,"seed":42}}'</span> \\ | |
| -H <span class="hljs-string">'Content-Type: application/json'</span>`,wrap:!1}}),Be=new U({props:{title:"Via Python",local:"via-python",headingTag:"h3"}}),xe=new f({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMEluZmVyZW5jZUNsaWVudCUwQSUwQWNsaWVudCUyMCUzRCUyMEluZmVyZW5jZUNsaWVudCglMjJodHRwJTNBJTJGJTJGbG9jYWxob3N0JTNBODA4MCUyMiUyQyUyMGFwaV9rZXklM0QlMjItJTIyKSUwQSUwQWdlbmVyYXRpb24lMjAlM0QlMjBjbGllbnQudGV4dF9nZW5lcmF0aW9uKCUwQSUyMCUyMCUyMCUyMHByb21wdCUzRCUyMiElNUIlNUQoaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRhdGFzZXRzJTJGaHVnZ2luZ2ZhY2UlMkZkb2N1bWVudGF0aW9uLWltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGdHJhbnNmb3JtZXJzJTJGcmFiYml0LnBuZyljYXB0aW9uJTIwZW4lNUNuJTIyJTJDJTBBJTIwJTIwJTIwJTIwbWF4X25ld190b2tlbnMlM0QxMjglMkMlMEElMjAlMjAlMjAlMjBzZWVkJTNENDIlMkMlMEEp",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient | |
| client = InferenceClient(<span class="hljs-string">"http://localhost:8080"</span>, api_key=<span class="hljs-string">"-"</span>) | |
| generation = client.text_generation( | |
| prompt=<span class="hljs-string">"caption en\\n"</span>, | |
| max_new_tokens=<span class="hljs-number">128</span>, | |
| seed=<span class="hljs-number">42</span>, | |
| )`,wrap:!1}}),He=new f({props:{code:"aW1wb3J0JTIwYmFzZTY0JTBBZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMEluZmVyZW5jZUNsaWVudCUwQSUwQWNsaWVudCUyMCUzRCUyMEluZmVyZW5jZUNsaWVudCglMjJodHRwJTNBJTJGJTJGbG9jYWxob3N0JTNBODA4MCUyMiUyQyUyMGFwaV9rZXklM0QlMjItJTIyKSUwQSUwQXdpdGglMjBvcGVuKCUyMiUyRnBhdGglMkZ0byUyRmltYWdlLnBuZyUyMiUyQyUyMCUyMnJiJTIyKSUyMGFzJTIwZiUzQSUwQSUyMCUyMCUyMCUyMGI2NF9pbWFnZSUyMCUzRCUyMGJhc2U2NC5iNjRlbmNvZGUoZi5yZWFkKCkpLmRlY29kZSglMjJ1dGYtOCUyMiklMEElMEFnZW5lcmF0aW9uJTIwJTNEJTIwY2xpZW50LnRleHRfZ2VuZXJhdGlvbiglMEElMjAlMjAlMjAlMjBwcm9tcHQlM0RmJTIyISU1QiU1RChkYXRhJTNBaW1hZ2UlMkZwbmclM0JiYXNlNjQlMkMlN0JiNjRfaW1hZ2UlN0QpY2FwdGlvbiUyMGVuJTVDbiUyMiUyQyUwQSUyMCUyMCUyMCUyMG1heF9uZXdfdG9rZW5zJTNEMTI4JTJDJTBBJTIwJTIwJTIwJTIwc2VlZCUzRDQyJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">import</span> base64 | |
| <span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient | |
| client = InferenceClient(<span class="hljs-string">"http://localhost:8080"</span>, api_key=<span class="hljs-string">"-"</span>) | |
| <span class="hljs-keyword">with</span> <span class="hljs-built_in">open</span>(<span class="hljs-string">"/path/to/image.png"</span>, <span class="hljs-string">"rb"</span>) <span class="hljs-keyword">as</span> f: | |
| b64_image = base64.b64encode(f.read()).decode(<span class="hljs-string">"utf-8"</span>) | |
| generation = client.text_generation( | |
| prompt=<span class="hljs-string">f"caption en\\n"</span>, | |
| max_new_tokens=<span class="hljs-number">128</span>, | |
| seed=<span class="hljs-number">42</span>, | |
| )`,wrap:!1}}),Ve=new f({props:{code:"JTdCJTIyZ2VuZXJhdGVkX3RleHQlMjIlM0ElMjAlMjJpbWFnZSUyMG9mJTIwYSUyMG1hbiUyMGluJTIwYSUyMHNwYWNlc3VpdCUyMiU3RA==",highlighted:'<span class="hljs-punctuation">{</span><span class="hljs-attr">"generated_text"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"image of a man in a spacesuit"</span><span class="hljs-punctuation">}</span>',wrap:!1}}),Se=new U({props:{title:"Delete GKE Cluster",local:"delete-gke-cluster",headingTag:"h2"}}),Ye=new f({props:{code:"Z2Nsb3VkJTIwY29udGFpbmVyJTIwY2x1c3RlcnMlMjBkZWxldGUlMjAlMjRDTFVTVEVSX05BTUUlMjAtLWxvY2F0aW9uJTNEJTI0TE9DQVRJT04=",highlighted:'gcloud container clusters delete <span class="hljs-variable">$CLUSTER_NAME</span> --location=<span class="hljs-variable">$LOCATION</span>',wrap:!1}}),Fe=new f({props:{code:"a3ViZWN0bCUyMHNjYWxlJTIwLS1yZXBsaWNhcyUzRDAlMjBkZXBsb3ltZW50JTJGdGdp",highlighted:"kubectl scale --replicas=0 deployment/tgi",wrap:!1}}),k=new Pe({props:{$$slots:{default:[Us]},$$scope:{ctx:T}}}),Qe=new ys({props:{source:"https://github.com/huggingface/Google-Cloud-Containers/blob/main/docs/source/examples/gke-deploy-paligemma-2-with-tgi.mdx"}}),{c(){i=o("meta"),w=n(),c=o("p"),j=n(),M(b.$$.fragment),C=n(),I=o("p"),I.textContent=E,y=n(),J=o("p"),J.textContent=jl,ze=n(),M(B.$$.fragment),Ke=n(),M($.$$.fragment),De=n(),W=o("p"),W.textContent=fl,Oe=n(),M(x.$$.fragment),et=n(),M(N.$$.fragment),tt=n(),H=o("p"),H.innerHTML=Tl,lt=n(),L=o("ul"),L.innerHTML=Jl,st=n(),V=o("p"),V.innerHTML=Ul,nt=n(),M(S.$$.fragment),at=n(),M(G.$$.fragment),it=n(),M(R.$$.fragment),ot=n(),Y=o("p"),Y.textContent=bl,pt=n(),M(_.$$.fragment),rt=n(),F=o("p"),F.textContent=Cl,ct=n(),M(Q.$$.fragment),Mt=n(),M(P.$$.fragment),ut=n(),X=o("p"),X.innerHTML=Il,ht=n(),q=o("p"),q.innerHTML=$l,gt=n(),z=o("p"),z.innerHTML=Gl,mt=n(),K=o("p"),K.innerHTML=vl,dt=n(),M(D.$$.fragment),yt=n(),O=o("p"),O.textContent=Al,wt=n(),M(ee.$$.fragment),jt=n(),M(te.$$.fragment),ft=n(),le=o("p"),le.textContent=Zl,Tt=n(),M(v.$$.fragment),Jt=n(),M(se.$$.fragment),Ut=n(),M(A.$$.fragment),bt=n(),ne=o("p"),ne.innerHTML=kl,Ct=n(),M(ae.$$.fragment),It=n(),ie=o("p"),ie.innerHTML=El,$t=n(),M(oe.$$.fragment),Gt=n(),pe=o("p"),pe.innerHTML=Bl,vt=n(),M(re.$$.fragment),At=n(),ce=o("p"),ce.innerHTML=Wl,Zt=n(),M(Me.$$.fragment),kt=n(),ue=o("p"),ue.innerHTML=xl,Et=n(),M(he.$$.fragment),Bt=n(),ge=o("p"),ge.innerHTML=Nl,Wt=n(),me=o("p"),me.innerHTML=Hl,xt=n(),M(de.$$.fragment),Nt=n(),ye=o("p"),ye.innerHTML=Ll,Ht=n(),we=o("p"),we.textContent=Vl,Lt=n(),M(je.$$.fragment),Vt=n(),fe=o("p"),fe.textContent=Sl,St=n(),M(Te.$$.fragment),Rt=n(),Je=o("p"),Je.innerHTML=Rl,Yt=n(),Ue=o("ul"),Ue.innerHTML=Yl,_t=n(),be=o("p"),be.innerHTML=_l,Ft=n(),Ce=o("p"),Ce.innerHTML=Fl,Qt=n(),Ie=o("p"),Ie.innerHTML=Ql,Pt=n(),$e=o("ul"),$e.innerHTML=Pl,Xt=n(),Ge=o("p"),Ge.textContent=Xl,qt=n(),ve=o("ul"),ve.innerHTML=ql,zt=n(),M(Z.$$.fragment),Kt=n(),M(Ae.$$.fragment),Dt=n(),Ze=o("p"),Ze.innerHTML=zl,Ot=n(),M(ke.$$.fragment),el=n(),Ee=o("table"),Ee.innerHTML=Kl,tl=n(),M(Be.$$.fragment),ll=n(),We=o("p"),We.innerHTML=Dl,sl=n(),M(xe.$$.fragment),nl=n(),Ne=o("p"),Ne.textContent=Ol,al=n(),M(He.$$.fragment),il=n(),Le=o("p"),Le.textContent=es,ol=n(),M(Ve.$$.fragment),pl=n(),M(Se.$$.fragment),rl=n(),Re=o("p"),Re.textContent=ts,cl=n(),M(Ye.$$.fragment),Ml=n(),_e=o("p"),_e.innerHTML=ls,ul=n(),M(Fe.$$.fragment),hl=n(),gl=o("hr"),ml=n(),M(k.$$.fragment),dl=n(),M(Qe.$$.fragment),yl=n(),qe=o("p"),this.h()},l(e){const t=ms("svelte-u9bgzb",document.head);i=p(t,"META",{name:!0,content:!0}),t.forEach(l),w=a(e),c=p(e,"P",{}),rs(c).forEach(l),j=a(e),u(b.$$.fragment,e),C=a(e),I=p(e,"P",{"data-svelte-h":!0}),r(I)!=="svelte-pxwl0u"&&(I.textContent=E),y=a(e),J=p(e,"P",{"data-svelte-h":!0}),r(J)!=="svelte-ngimmo"&&(J.textContent=jl),ze=a(e),u(B.$$.fragment,e),Ke=a(e),u($.$$.fragment,e),De=a(e),W=p(e,"P",{"data-svelte-h":!0}),r(W)!=="svelte-1xlynj2"&&(W.textContent=fl),Oe=a(e),u(x.$$.fragment,e),et=a(e),u(N.$$.fragment,e),tt=a(e),H=p(e,"P",{"data-svelte-h":!0}),r(H)!=="svelte-10sq42i"&&(H.innerHTML=Tl),lt=a(e),L=p(e,"UL",{"data-svelte-h":!0}),r(L)!=="svelte-a8ymuw"&&(L.innerHTML=Jl),st=a(e),V=p(e,"P",{"data-svelte-h":!0}),r(V)!=="svelte-wjaltb"&&(V.innerHTML=Ul),nt=a(e),u(S.$$.fragment,e),at=a(e),u(G.$$.fragment,e),it=a(e),u(R.$$.fragment,e),ot=a(e),Y=p(e,"P",{"data-svelte-h":!0}),r(Y)!=="svelte-16bzws9"&&(Y.textContent=bl),pt=a(e),u(_.$$.fragment,e),rt=a(e),F=p(e,"P",{"data-svelte-h":!0}),r(F)!=="svelte-qf40b1"&&(F.textContent=Cl),ct=a(e),u(Q.$$.fragment,e),Mt=a(e),u(P.$$.fragment,e),ut=a(e),X=p(e,"P",{"data-svelte-h":!0}),r(X)!=="svelte-1neaylk"&&(X.innerHTML=Il),ht=a(e),q=p(e,"P",{"data-svelte-h":!0}),r(q)!=="svelte-et2grx"&&(q.innerHTML=$l),gt=a(e),z=p(e,"P",{"data-svelte-h":!0}),r(z)!=="svelte-x00guz"&&(z.innerHTML=Gl),mt=a(e),K=p(e,"P",{"data-svelte-h":!0}),r(K)!=="svelte-ofpc39"&&(K.innerHTML=vl),dt=a(e),u(D.$$.fragment,e),yt=a(e),O=p(e,"P",{"data-svelte-h":!0}),r(O)!=="svelte-14d59d1"&&(O.textContent=Al),wt=a(e),u(ee.$$.fragment,e),jt=a(e),u(te.$$.fragment,e),ft=a(e),le=p(e,"P",{"data-svelte-h":!0}),r(le)!=="svelte-wncghb"&&(le.textContent=Zl),Tt=a(e),u(v.$$.fragment,e),Jt=a(e),u(se.$$.fragment,e),Ut=a(e),u(A.$$.fragment,e),bt=a(e),ne=p(e,"P",{"data-svelte-h":!0}),r(ne)!=="svelte-1fxepf7"&&(ne.innerHTML=kl),Ct=a(e),u(ae.$$.fragment,e),It=a(e),ie=p(e,"P",{"data-svelte-h":!0}),r(ie)!=="svelte-39r131"&&(ie.innerHTML=El),$t=a(e),u(oe.$$.fragment,e),Gt=a(e),pe=p(e,"P",{"data-svelte-h":!0}),r(pe)!=="svelte-1dfmmkd"&&(pe.innerHTML=Bl),vt=a(e),u(re.$$.fragment,e),At=a(e),ce=p(e,"P",{"data-svelte-h":!0}),r(ce)!=="svelte-104x4ob"&&(ce.innerHTML=Wl),Zt=a(e),u(Me.$$.fragment,e),kt=a(e),ue=p(e,"P",{"data-svelte-h":!0}),r(ue)!=="svelte-1p4hk0w"&&(ue.innerHTML=xl),Et=a(e),u(he.$$.fragment,e),Bt=a(e),ge=p(e,"P",{"data-svelte-h":!0}),r(ge)!=="svelte-11bk0be"&&(ge.innerHTML=Nl),Wt=a(e),me=p(e,"P",{"data-svelte-h":!0}),r(me)!=="svelte-eei15e"&&(me.innerHTML=Hl),xt=a(e),u(de.$$.fragment,e),Nt=a(e),ye=p(e,"P",{"data-svelte-h":!0}),r(ye)!=="svelte-qglupd"&&(ye.innerHTML=Ll),Ht=a(e),we=p(e,"P",{"data-svelte-h":!0}),r(we)!=="svelte-1c2x6aq"&&(we.textContent=Vl),Lt=a(e),u(je.$$.fragment,e),Vt=a(e),fe=p(e,"P",{"data-svelte-h":!0}),r(fe)!=="svelte-m2xrt7"&&(fe.textContent=Sl),St=a(e),u(Te.$$.fragment,e),Rt=a(e),Je=p(e,"P",{"data-svelte-h":!0}),r(Je)!=="svelte-wwh5x3"&&(Je.innerHTML=Rl),Yt=a(e),Ue=p(e,"UL",{"data-svelte-h":!0}),r(Ue)!=="svelte-16s3jl1"&&(Ue.innerHTML=Yl),_t=a(e),be=p(e,"P",{"data-svelte-h":!0}),r(be)!=="svelte-1knwg44"&&(be.innerHTML=_l),Ft=a(e),Ce=p(e,"P",{"data-svelte-h":!0}),r(Ce)!=="svelte-1bspqdk"&&(Ce.innerHTML=Fl),Qt=a(e),Ie=p(e,"P",{"data-svelte-h":!0}),r(Ie)!=="svelte-1cbnxk6"&&(Ie.innerHTML=Ql),Pt=a(e),$e=p(e,"UL",{"data-svelte-h":!0}),r($e)!=="svelte-14kayok"&&($e.innerHTML=Pl),Xt=a(e),Ge=p(e,"P",{"data-svelte-h":!0}),r(Ge)!=="svelte-69c408"&&(Ge.textContent=Xl),qt=a(e),ve=p(e,"UL",{"data-svelte-h":!0}),r(ve)!=="svelte-16wd6xj"&&(ve.innerHTML=ql),zt=a(e),u(Z.$$.fragment,e),Kt=a(e),u(Ae.$$.fragment,e),Dt=a(e),Ze=p(e,"P",{"data-svelte-h":!0}),r(Ze)!=="svelte-1ks8djk"&&(Ze.innerHTML=zl),Ot=a(e),u(ke.$$.fragment,e),el=a(e),Ee=p(e,"TABLE",{"data-svelte-h":!0}),r(Ee)!=="svelte-109osuj"&&(Ee.innerHTML=Kl),tl=a(e),u(Be.$$.fragment,e),ll=a(e),We=p(e,"P",{"data-svelte-h":!0}),r(We)!=="svelte-mtyeq7"&&(We.innerHTML=Dl),sl=a(e),u(xe.$$.fragment,e),nl=a(e),Ne=p(e,"P",{"data-svelte-h":!0}),r(Ne)!=="svelte-753cja"&&(Ne.textContent=Ol),al=a(e),u(He.$$.fragment,e),il=a(e),Le=p(e,"P",{"data-svelte-h":!0}),r(Le)!=="svelte-1unobg3"&&(Le.textContent=es),ol=a(e),u(Ve.$$.fragment,e),pl=a(e),u(Se.$$.fragment,e),rl=a(e),Re=p(e,"P",{"data-svelte-h":!0}),r(Re)!=="svelte-1laf0m8"&&(Re.textContent=ts),cl=a(e),u(Ye.$$.fragment,e),Ml=a(e),_e=p(e,"P",{"data-svelte-h":!0}),r(_e)!=="svelte-qo8r7n"&&(_e.innerHTML=ls),ul=a(e),u(Fe.$$.fragment,e),hl=a(e),gl=p(e,"HR",{}),ml=a(e),u(k.$$.fragment,e),dl=a(e),u(Qe.$$.fragment,e),yl=a(e),qe=p(e,"P",{}),rs(qe).forEach(l),this.h()},h(){cs(i,"name","hf:doc:metadata"),cs(i,"content",Cs)},m(e,t){ds(document.head,i),s(e,w,t),s(e,c,t),s(e,j,t),h(b,e,t),s(e,C,t),s(e,I,t),s(e,y,t),s(e,J,t),s(e,ze,t),h(B,e,t),s(e,Ke,t),h($,e,t),s(e,De,t),s(e,W,t),s(e,Oe,t),h(x,e,t),s(e,et,t),h(N,e,t),s(e,tt,t),s(e,H,t),s(e,lt,t),s(e,L,t),s(e,st,t),s(e,V,t),s(e,nt,t),h(S,e,t),s(e,at,t),h(G,e,t),s(e,it,t),h(R,e,t),s(e,ot,t),s(e,Y,t),s(e,pt,t),h(_,e,t),s(e,rt,t),s(e,F,t),s(e,ct,t),h(Q,e,t),s(e,Mt,t),h(P,e,t),s(e,ut,t),s(e,X,t),s(e,ht,t),s(e,q,t),s(e,gt,t),s(e,z,t),s(e,mt,t),s(e,K,t),s(e,dt,t),h(D,e,t),s(e,yt,t),s(e,O,t),s(e,wt,t),h(ee,e,t),s(e,jt,t),h(te,e,t),s(e,ft,t),s(e,le,t),s(e,Tt,t),h(v,e,t),s(e,Jt,t),h(se,e,t),s(e,Ut,t),h(A,e,t),s(e,bt,t),s(e,ne,t),s(e,Ct,t),h(ae,e,t),s(e,It,t),s(e,ie,t),s(e,$t,t),h(oe,e,t),s(e,Gt,t),s(e,pe,t),s(e,vt,t),h(re,e,t),s(e,At,t),s(e,ce,t),s(e,Zt,t),h(Me,e,t),s(e,kt,t),s(e,ue,t),s(e,Et,t),h(he,e,t),s(e,Bt,t),s(e,ge,t),s(e,Wt,t),s(e,me,t),s(e,xt,t),h(de,e,t),s(e,Nt,t),s(e,ye,t),s(e,Ht,t),s(e,we,t),s(e,Lt,t),h(je,e,t),s(e,Vt,t),s(e,fe,t),s(e,St,t),h(Te,e,t),s(e,Rt,t),s(e,Je,t),s(e,Yt,t),s(e,Ue,t),s(e,_t,t),s(e,be,t),s(e,Ft,t),s(e,Ce,t),s(e,Qt,t),s(e,Ie,t),s(e,Pt,t),s(e,$e,t),s(e,Xt,t),s(e,Ge,t),s(e,qt,t),s(e,ve,t),s(e,zt,t),h(Z,e,t),s(e,Kt,t),h(Ae,e,t),s(e,Dt,t),s(e,Ze,t),s(e,Ot,t),h(ke,e,t),s(e,el,t),s(e,Ee,t),s(e,tl,t),h(Be,e,t),s(e,ll,t),s(e,We,t),s(e,sl,t),h(xe,e,t),s(e,nl,t),s(e,Ne,t),s(e,al,t),h(He,e,t),s(e,il,t),s(e,Le,t),s(e,ol,t),h(Ve,e,t),s(e,pl,t),h(Se,e,t),s(e,rl,t),s(e,Re,t),s(e,cl,t),h(Ye,e,t),s(e,Ml,t),s(e,_e,t),s(e,ul,t),h(Fe,e,t),s(e,hl,t),s(e,gl,t),s(e,ml,t),h(k,e,t),s(e,dl,t),h(Qe,e,t),s(e,yl,t),s(e,qe,t),wl=!0},p(e,[t]){const ss={};t&2&&(ss.$$scope={dirty:t,ctx:e}),$.$set(ss);const ns={};t&2&&(ns.$$scope={dirty:t,ctx:e}),G.$set(ns);const as={};t&2&&(as.$$scope={dirty:t,ctx:e}),v.$set(as);const is={};t&2&&(is.$$scope={dirty:t,ctx:e}),A.$set(is);const os={};t&2&&(os.$$scope={dirty:t,ctx:e}),Z.$set(os);const ps={};t&2&&(ps.$$scope={dirty:t,ctx:e}),k.$set(ps)},i(e){wl||(g(b.$$.fragment,e),g(B.$$.fragment,e),g($.$$.fragment,e),g(x.$$.fragment,e),g(N.$$.fragment,e),g(S.$$.fragment,e),g(G.$$.fragment,e),g(R.$$.fragment,e),g(_.$$.fragment,e),g(Q.$$.fragment,e),g(P.$$.fragment,e),g(D.$$.fragment,e),g(ee.$$.fragment,e),g(te.$$.fragment,e),g(v.$$.fragment,e),g(se.$$.fragment,e),g(A.$$.fragment,e),g(ae.$$.fragment,e),g(oe.$$.fragment,e),g(re.$$.fragment,e),g(Me.$$.fragment,e),g(he.$$.fragment,e),g(de.$$.fragment,e),g(je.$$.fragment,e),g(Te.$$.fragment,e),g(Z.$$.fragment,e),g(Ae.$$.fragment,e),g(ke.$$.fragment,e),g(Be.$$.fragment,e),g(xe.$$.fragment,e),g(He.$$.fragment,e),g(Ve.$$.fragment,e),g(Se.$$.fragment,e),g(Ye.$$.fragment,e),g(Fe.$$.fragment,e),g(k.$$.fragment,e),g(Qe.$$.fragment,e),wl=!0)},o(e){m(b.$$.fragment,e),m(B.$$.fragment,e),m($.$$.fragment,e),m(x.$$.fragment,e),m(N.$$.fragment,e),m(S.$$.fragment,e),m(G.$$.fragment,e),m(R.$$.fragment,e),m(_.$$.fragment,e),m(Q.$$.fragment,e),m(P.$$.fragment,e),m(D.$$.fragment,e),m(ee.$$.fragment,e),m(te.$$.fragment,e),m(v.$$.fragment,e),m(se.$$.fragment,e),m(A.$$.fragment,e),m(ae.$$.fragment,e),m(oe.$$.fragment,e),m(re.$$.fragment,e),m(Me.$$.fragment,e),m(he.$$.fragment,e),m(de.$$.fragment,e),m(je.$$.fragment,e),m(Te.$$.fragment,e),m(Z.$$.fragment,e),m(Ae.$$.fragment,e),m(ke.$$.fragment,e),m(Be.$$.fragment,e),m(xe.$$.fragment,e),m(He.$$.fragment,e),m(Ve.$$.fragment,e),m(Se.$$.fragment,e),m(Ye.$$.fragment,e),m(Fe.$$.fragment,e),m(k.$$.fragment,e),m(Qe.$$.fragment,e),wl=!1},d(e){e&&(l(w),l(c),l(j),l(C),l(I),l(y),l(J),l(ze),l(Ke),l(De),l(W),l(Oe),l(et),l(tt),l(H),l(lt),l(L),l(st),l(V),l(nt),l(at),l(it),l(ot),l(Y),l(pt),l(rt),l(F),l(ct),l(Mt),l(ut),l(X),l(ht),l(q),l(gt),l(z),l(mt),l(K),l(dt),l(yt),l(O),l(wt),l(jt),l(ft),l(le),l(Tt),l(Jt),l(Ut),l(bt),l(ne),l(Ct),l(It),l(ie),l($t),l(Gt),l(pe),l(vt),l(At),l(ce),l(Zt),l(kt),l(ue),l(Et),l(Bt),l(ge),l(Wt),l(me),l(xt),l(Nt),l(ye),l(Ht),l(we),l(Lt),l(Vt),l(fe),l(St),l(Rt),l(Je),l(Yt),l(Ue),l(_t),l(be),l(Ft),l(Ce),l(Qt),l(Ie),l(Pt),l($e),l(Xt),l(Ge),l(qt),l(ve),l(zt),l(Kt),l(Dt),l(Ze),l(Ot),l(el),l(Ee),l(tl),l(ll),l(We),l(sl),l(nl),l(Ne),l(al),l(il),l(Le),l(ol),l(pl),l(rl),l(Re),l(cl),l(Ml),l(_e),l(ul),l(hl),l(gl),l(ml),l(dl),l(yl),l(qe)),l(i),d(b,e),d(B,e),d($,e),d(x,e),d(N,e),d(S,e),d(G,e),d(R,e),d(_,e),d(Q,e),d(P,e),d(D,e),d(ee,e),d(te,e),d(v,e),d(se,e),d(A,e),d(ae,e),d(oe,e),d(re,e),d(Me,e),d(he,e),d(de,e),d(je,e),d(Te,e),d(Z,e),d(Ae,e),d(ke,e),d(Be,e),d(xe,e),d(He,e),d(Ve,e),d(Se,e),d(Ye,e),d(Fe,e),d(k,e),d(Qe,e)}}}const Cs='{"title":"Deploy PaliGemma2 with TGI DLC on GKE","local":"deploy-paligemma2-with-tgi-dlc-on-gke","sections":[{"title":"Setup / Configuration","local":"setup--configuration","sections":[{"title":"Requirements","local":"requirements","sections":[],"depth":3},{"title":"Login and API enablement","local":"login-and-api-enablement","sections":[],"depth":3},{"title":"PaliGemma2 gating and Hugging Face access token","local":"paligemma2-gating-and-hugging-face-access-token","sections":[],"depth":3}],"depth":2},{"title":"Create GKE Cluster","local":"create-gke-cluster","sections":[],"depth":2},{"title":"Get GKE Cluster Credentials","local":"get-gke-cluster-credentials","sections":[],"depth":2},{"title":"Set Hugging Face Secrets on GKE","local":"set-hugging-face-secrets-on-gke","sections":[],"depth":2},{"title":"Deploy TGI on GKE","local":"deploy-tgi-on-gke","sections":[],"depth":2},{"title":"Inference with TGI on GKE","local":"inference-with-tgi-on-gke","sections":[{"title":"Via cURL","local":"via-curl","sections":[],"depth":3},{"title":"Via Python","local":"via-python","sections":[],"depth":3}],"depth":2},{"title":"Delete GKE Cluster","local":"delete-gke-cluster","sections":[],"depth":2}],"depth":1}';function Is(T){return us(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ks extends hs{constructor(i){super(),gs(this,i,Is,bs,Ms,{})}}export{ks as component}; | |
Xet Storage Details
- Size:
- 49.9 kB
- Xet hash:
- a951c4c00cc0cf4af0bea650f3a48786e806da64328d3888f5967846d7674972
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.