Buckets:

rtrm's picture
download
raw
54.7 kB
import{s as ps,o as Ms,n as _}from"../chunks/scheduler.b108d059.js";import{S as ds,i as ys,g as i,s,r as p,A as hs,h as u,f as l,c as o,j as R,u as M,x as r,k as cs,y as f,a as n,v as d,d as y,t as h,w as m}from"../chunks/index.008de539.js";import{T as k}from"../chunks/Tip.aeb15ab7.js";import{C as g}from"../chunks/CodeBlock.7b00c886.js";import{H as J,E as ms}from"../chunks/index.2bf9b47d.js";function Us(w){let a,U='To access GPU on Cloud Run, <a href="https://cloud.google.com/run/quotas#increase" rel="nofollow">request a quota increase</a> for <code>Total Nvidia L4 GPU allocation, per project per region</code>. At the time of writing this example, NVIDIA L4 GPUs (24GiB VRAM) are the only available GPUs on Cloud Run; enabling automatic scaling up to 7 instances by default (more available via quota), as well as scaling down to zero instances when there are no requests.';return{c(){a=i("p"),a.innerHTML=U},l(c){a=u(c,"P",{"data-svelte-h":!0}),r(a)!=="svelte-o2ltdj"&&(a.innerHTML=U)},m(c,C){n(c,a,C)},p:_,d(c){c&&l(a)}}}function Cs(w){let a,U='Optionally, you can include the arguments <code>--vpc-egress=all-traffic</code> and <code>--subnet=default</code>, as there is external traffic being sent to the public internet, so in order to speed the network, you need to route all traffic through the VPC network by setting those flags. Note that besides setting the flags, you need to set up Google Cloud NAT to reach the public internet, which is a paid product. Find more information in <a href="https://cloud.google.com/run/docs/configuring/networking-best-practices" rel="nofollow">Cloud Run Documentation - Networking best practices</a>.',c,C,v;return C=new g({props:{code:"Z2Nsb3VkJTIwY29tcHV0ZSUyMHJvdXRlcnMlMjBjcmVhdGUlMjBuYXQtcm91dGVyJTIwLS1uZXR3b3JrJTNEZGVmYXVsdCUyMC0tcmVnaW9uJTNEJTI0TE9DQVRJT04lMEFnY2xvdWQlMjBjb21wdXRlJTIwcm91dGVycyUyMG5hdHMlMjBjcmVhdGUlMjB2bS1uYXQlMjAtLXJvdXRlciUzRG5hdC1yb3V0ZXIlMjAtLXJlZ2lvbiUzRCUyNExPQ0FUSU9OJTIwLS1hdXRvLWFsbG9jYXRlLW5hdC1leHRlcm5hbC1pcHMlMjAtLW5hdC1hbGwtc3VibmV0LWlwLXJhbmdlcw==",highlighted:`gcloud compute routers create nat-router --network=default --region=<span class="hljs-variable">$LOCATION</span>
gcloud compute routers nats create vm-nat --router=nat-router --region=<span class="hljs-variable">$LOCATION</span> --auto-allocate-nat-external-ips --nat-all-subnet-ip-ranges`,wrap:!1}}),{c(){a=i("p"),a.innerHTML=U,c=s(),p(C.$$.fragment)},l(T){a=u(T,"P",{"data-svelte-h":!0}),r(a)!=="svelte-1o1t3au"&&(a.innerHTML=U),c=o(T),M(C.$$.fragment,T)},m(T,I){n(T,a,I),n(T,c,I),d(C,T,I),v=!0},p:_,i(T){v||(y(C.$$.fragment,T),v=!0)},o(T){h(C.$$.fragment,T),v=!1},d(T){T&&(l(a),l(c)),m(C,T)}}}function fs(w){let a,U="The first time you deploy a new container on Cloud Run it will take around 5 minutes to deploy as it needs to import it from the Google Cloud Artifact Registry, but on the follow up deployments it will take less time as the image has been already imported before.";return{c(){a=i("p"),a.textContent=U},l(c){a=u(c,"P",{"data-svelte-h":!0}),r(a)!=="svelte-18e6hwd"&&(a.textContent=U)},m(c,C){n(c,a,C)},p:_,d(c){c&&l(a)}}}function gs(w){let a,U='The alternatives mentioned below are for development scenarios, and should not be used in production-ready scenarios as is. The approach below is following the guide defined in <a href="https://cloud.google.com/run/docs/authenticating/developers" rel="nofollow">Cloud Run Documentation - Authenticate Developers</a>; but you can find every other guide as mentioned above in <a href="https://cloud.google.com/run/docs/authenticating/overview" rel="nofollow">Cloud Run Documentation - Authentication overview</a>.';return{c(){a=i("p"),a.innerHTML=U},l(c){a=u(c,"P",{"data-svelte-h":!0}),r(a)!=="svelte-1qznp3b"&&(a.innerHTML=U)},m(c,C){n(c,a,C)},p:_,d(c){c&&l(a)}}}function Ts(w){let a,U="Note that the examples below are using the <code>/v1/chat/completions</code> TGI endpoint with is OpenAI-compatible, meaning that both <code>cURL</code> and Python are just some proposals, but any OpenAI-compatible client can be used instead.";return{c(){a=i("p"),a.innerHTML=U},l(c){a=u(c,"P",{"data-svelte-h":!0}),r(a)!=="svelte-11wzgde"&&(a.innerHTML=U)},m(c,C){n(c,a,C)},p:_,d(c){c&&l(a)}}}function ws(w){let a,U='The access token is short-lived and will expire, by default after 1 hour. If you want to extend the token lifetime beyond the default, you must create and organization policy and use the <code>--lifetime</code> argument when creating the token. Refer to <a href="https://cloud.google.com/resource-manager/docs/organization-policy/restricting-service-accounts#extend_oauth_ttl" rel="nofollow">Access token lifetime</a> to learn more. Otherwise, you can also generate a new token by running the same command again.';return{c(){a=i("p"),a.innerHTML=U},l(c){a=u(c,"P",{"data-svelte-h":!0}),r(a)!=="svelte-181n0at"&&(a.innerHTML=U)},m(c,C){n(c,a,C)},p:_,d(c){c&&l(a)}}}function Js(w){let a,U="Note that the examples below are using the <code>/v1/chat/completions</code> TGI endpoint with is OpenAI-compatible, meaning that both <code>cURL</code> and Python are just some proposals, but any OpenAI-compatible client can be used instead.";return{c(){a=i("p"),a.innerHTML=U},l(c){a=u(c,"P",{"data-svelte-h":!0}),r(a)!=="svelte-11wzgde"&&(a.innerHTML=U)},m(c,C){n(c,a,C)},p:_,d(c){c&&l(a)}}}function $s(w){let a,U='📍 Find the complete example on GitHub <a href="https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/cloud-run/deploy-gemma-2-on-cloud-run" rel="nofollow">here</a>!';return{c(){a=i("p"),a.innerHTML=U},l(c){a=u(c,"P",{"data-svelte-h":!0}),r(a)!=="svelte-g9vp8q"&&(a.innerHTML=U)},m(c,C){n(c,a,C)},p:_,d(c){c&&l(a)}}}function bs(w){let a,U,c,C,v,T,I,Un="Gemma 2 is an advanced, lightweight open model that enhances performance and efficiency while building on the research and technology of its predecessor and the Gemini models developed by Google DeepMind and other teams across Google. Text Generation Inference (TGI) is a toolkit developed by Hugging Face for deploying and serving LLMs, with high performance text generation. Google Cloud Run is a serverless container platform that allows developers to deploy and manage containerized applications without managing infrastructure, enabling automatic scaling and billing only for usage.",It,W,Cn='This example showcases how to deploy Gemma2 9B Instruct model quantized to INT4 using AWQ from the Hugging Face Hub with the Hugging Face DLC for TGI on Google Cloud Run with GPU support (<a href="https://cloud.google.com/products#product-launch-stages" rel="nofollow">in preview</a>).',Vt,E,Rt,B,Et,q,fn='First, you need to install <code>gcloud</code> in your local machine, which is the command-line tool for Google Cloud, following the instructions at <a href="https://cloud.google.com/sdk/docs/install" rel="nofollow">Cloud SDK Documentation - Install the gcloud CLI</a>.',At,X,gn="Optionally, to ease the usage of the commands within this tutorial, you need to set the following environment variables for GCP:",Nt,F,St,H,Tn="Then you need to login into your Google Cloud account and set the project ID you want to use to deploy Cloud Run.",Zt,P,Qt,Y,wn="Once you are logged in, you need to enable the Cloud Run API, which is required for the Hugging Face DLC for TGI deployment on Cloud Run.",Lt,D,Gt,z,xt,O,Jn="Once you are all set, you can call the <code>gcloud beta run deploy</code> command (still on beta because GPU support is on preview, as mentioned above).",kt,K,$n="The <code>gcloud beta run deploy</code> command needs you to specify the following parameters:",_t,ee,bn='<li><code>--image</code>: The container image URI to deploy.</li> <li><code>--args</code>: The arguments to pass to the container entrypoint, being <code>text-generation-launcher</code> for the Hugging Face DLC for TGI. Read more about the supported arguments in <a href="https://huggingface.co/docs/text-generation-inference/en/basic_tutorials/launcher" rel="nofollow">Text-generation-launcher arguments</a>.<ul><li><code>--model-id</code>: The model ID to use, in this case, <a href="https://huggingface.co/hugging-quants/gemma-2-9b-it-AWQ-INT4" rel="nofollow"><code>hugging-quants/gemma-2-9b-it-AWQ-INT4</code></a>.</li> <li><code>--quantize</code>: The quantization method to use, in this case, <code>awq</code>. If not specified, it will be retrieved from the <code>quantization_config-&gt;quant_method</code> in the <code>config.json</code> file.</li> <li><code>--max-concurrent-requests</code>: The maximum amount of concurrent requests for this particular deployment. Having a low limit will refuse clients requests instead of having them wait for too long and is usually good to handle back pressure correctly. Set to 64, but default is 128.</li></ul></li> <li><code>--port</code>: The port the container listens to.</li> <li><code>--cpu</code> and <code>--memory</code>: The number of CPUs and amount of memory to allocate to the container. Needs to be set to 4 and 16Gi (16 GiB), respectively; as that’s the minimum requirement for using the GPU.</li> <li><code>--no-cpu-throttling</code>: Disables CPU throttling, which is required for using the GPU.</li> <li><code>--gpu</code> and <code>--gpu-type</code>: The number of GPUs and the GPU type to use. Needs to be set to 1 and <code>nvidia-l4</code>, respectively; as at the time of writing this tutorial, those are the only available options as Cloud Run on GPUs is still under preview.</li> <li><code>--max-instances</code>: The maximum number of instances to run, set to 3, but default maximum value is 7. Alternatively, one could set it to 1 too, but that could eventually lead to downtime during infrastructure migrations, so anything above 1 is recommended.</li> <li><code>--concurrency</code>: the maximum number of concurrent requests per instance, set to 64. The value is not arbitrary, but determined after running and evaluating the results of <a href="https://github.com/huggingface/text-generation-inference/tree/main/benchmark" rel="nofollow"><code>text-generation-benchmark</code></a>, as the most optimal balance between throughput and latency; where the current default for TGI being 128 is a bit too much. Note that this value is also aligned with the <a href="https://huggingface.co/docs/text-generation-inference/en/basic_tutorials/launcher#maxconcurrentrequests" rel="nofollow"><code>--max-concurrent-requests</code></a> argument in TGI.</li> <li><code>--region</code>: The region to deploy the Cloud Run service.</li> <li><code>--no-allow-unauthenticated</code>: Disables unauthenticated access to the service, which is a good practice as adds an authentication layer managed by Google Cloud IAM.</li>',Wt,A,Bt,te,vn="Finally, you can run the <code>gcloud beta run deploy</code> command to deploy TGI on Cloud Run as:",qt,le,Xt,ne,jn="Or as it follows if you created the Cloud NAT:",Ft,se,Ht,oe,In='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/main/examples/cloud-run/deploy-gemma-2-on-cloud-run/imgs/cloud-run-deployment.png" alt="Cloud Run Deployment"/>',Pt,ae,Vn='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/main/examples/cloud-run/deploy-gemma-2-on-cloud-run/imgs/cloud-run-details.png" alt="Cloud Run Deployment Details"/>',Yt,N,Dt,ie,zt,ue,Rn='Once deployed, you can send requests to the service via any of the supported TGI endpoints, check <a href="https://huggingface.github.io/text-generation-inference/" rel="nofollow">TGI’s OpenAPI Specification</a> to see all the available endpoints and their respective parameters.',Ot,re,En="All Cloud Run services are deployed privately by default, which means that they can’t be accessed without providing authentication credentials in the request headers. These services are secured by IAM and are only callable by Project Owners, Project Editors, and Cloud Run Admins and Cloud Run Invokers.",Kt,ce,An='In this case, a couple of alternatives to enable developer access will be showcased; while the other use cases are out of the scope of this example, as those are either not secure due to the authentication being disabled (for public access scenarios), or require additional setup for production-ready scenarios (<a href="https://cloud.google.com/run/docs/authenticating/service-to-service" rel="nofollow">service-to-service authentication</a>, <a href="https://cloud.google.com/run/docs/authenticating/end-users" rel="nofollow">end-user access</a>).',el,S,tl,pe,ll,Me,Nn="Cloud Run Proxy runs a server on localhost that proxies requests to the specified Cloud Run Service with credentials attached; which is useful for testing and experimentation.",nl,de,sl,ye,Sn='Then you can send requests to the deployed service on Cloud Run, using the <a href="http://localhost:8080" rel="nofollow">http://localhost:8080</a> URL, with no authentication, exposed by the proxy as shown in the examples below.',ol,Z,al,he,il,me,Zn="To send a POST request to the TGI service using <code>cURL</code>, you can run the following command:",ul,Ue,rl,Ce,cl,fe,Qn="To run the inference using Python, you can either use the <code>huggingface_hub</code> Python SDK (recommended) or the <code>openai</code> Python SDK.",pl,ge,Ml,Te,Ln="You can install it via <code>pip</code> as <code>pip install --upgrade --quiet huggingface_hub</code>, and then run:",dl,we,yl,Je,hl,$e,Gn="You can install it via <code>pip</code> as <code>pip install --upgrade openai</code>, and then run:",ml,be,Ul,ve,Cl,je,xn="Cloud Run Service has an unique URL assigned that can be used to send requests from anywhere, using the Google Cloud Credentials with Cloud Run Invoke access to the service; which is the recommended approach as it’s more secure and consistent than using the Cloud Run Proxy.",fl,Ie,kn="The URL of the Cloud Run service can be obtained via the following command (assigned to the <code>SERVICE_URL</code> variable for convenience):",gl,Ve,Tl,Re,_n="Then you can send requests to the deployed service on Cloud Run, using the <code>SERVICE_URL</code> and any Google Cloud Credentials with Cloud Run Invoke access. For setting up the credentials there are multiple approaches, some of those are listed below:",wl,Q,$,ft,Wn="Using the default identity token from the Google Cloud SDK:",sn,gt,Bn="<li>Via <code>gcloud</code> as:</li>",on,Ee,an,Tt,qn="<li>Via Python as:</li>",un,Ae,rn,wt,Xn="<p>Using a Service Account with Cloud Run Invoke access, which can either be done with any of the following approaches:</p> <ul><li>Create a Service Account before the Cloud Run Service was created, and then set the <code>--service-account</code> flag to the Service Account email when creating the Cloud Run Service. And use an Access Token for that Service Account only using <code>gcloud auth print-access-token --impersonate-service-account=SERVICE_ACCOUNT_EMAIL</code>.</li> <li>Create a Service Account after the Cloud Run Service was created, and then update the Cloud Run Service to use the Service Account. And use an Access Token for that Service Account only using <code>gcloud auth print-access-token --impersonate-service-account=SERVICE_ACCOUNT_EMAIL</code>.</li></ul>",Jl,Ne,Fn="The recommended approach is to use a Service Account (SA), as the access can be controlled better and the permissions are more granular; as the Cloud Run Service was not created using a SA, which is another nice option, you need to now create the SA, gran it the necessary permissions, update the Cloud Run Service to use the SA, and then generate an access token to set as the authentication token within the requests, that can be revoked later once you are done using it.",$l,j,Se,Jt,Hn="Set the <code>SERVICE_ACCOUNT_NAME</code> environment variable for convenience:",cn,Ze,pn,Qe,$t,Pn="Create the Service Account:",Mn,Le,dn,Ge,bt,Yn="Grant the Service Account the Cloud Run Invoker role:",yn,xe,hn,ke,vt,Dn="Generate the Access Token for the Service Account:",mn,_e,bl,L,vl,We,zn="Now you can already dive into the different alternatives for sending the requests to the deployed Cloud Run Service using the <code>SERVICE_URL</code> AND <code>ACCESS_TOKEN</code> as described above.",jl,G,Il,Be,Vl,qe,On="To send a POST request to the TGI service using <code>cURL</code>, you can run the following command:",Rl,Xe,El,Fe,Al,He,Kn="To run the inference using Python, you can either use the <code>huggingface_hub</code> Python SDK (recommended) or the <code>openai</code> Python SDK.",Nl,Pe,Sl,Ye,es="You can install it via <code>pip</code> as <code>pip install --upgrade --quiet huggingface_hub</code>, and then run:",Zl,De,Ql,ze,Ll,Oe,ts="You can install it via <code>pip</code> as <code>pip install --upgrade openai</code>, and then run:",Gl,Ke,xl,et,kl,tt,ls="Finally, once you are done using TGI on the Cloud Run Service, you can safely delete it to avoid incurring in unnecessary costs e.g. if the Cloud Run services are inadvertently invoked more times than your monthly Cloud Run invoke allocation in the free tier.",_l,lt,ns='To delete the Cloud Run Service you can either go to the Google Cloud Console at <a href="https://console.cloud.google.com/run" rel="nofollow">https://console.cloud.google.com/run</a> and delete it manually; or use the Google Cloud SDK via <code>gcloud</code> as follows:',Wl,nt,Bl,st,ss='Additionally, if you followed the steps in <a href="#via-cloud-run-service-url">via Cloud Run Service URL</a> and generated a Service Account and an access token, you can either remove the Service Account, or just revoke the access token if it is still valid.',ql,ot,os="<li>(recommended) Revoke the Access Token as:</li>",Xl,at,Fl,it,as="<li>(optional) Delete the Service Account as:</li>",Hl,ut,Pl,rt,is="Finally, if you decided to enable the VPC network via Cloud NAT, you can also remove the Cloud NAT (which is a paid product) as:",Yl,ct,Dl,pt,zl,Mt,us='<li><a href="https://cloud.google.com/run/docs" rel="nofollow">Cloud Run Documentation - Overview</a></li> <li><a href="https://cloud.google.com/run/docs/configuring/services/gpu" rel="nofollow">Cloud Run Documentation - GPU services</a></li> <li><a href="https://cloud.google.com/blog/products/application-development/run-your-ai-inference-applications-on-cloud-run-with-nvidia-gpus" rel="nofollow">Google Cloud Blog - Run your AI inference applications on Cloud Run with NVIDIA GPUs</a></li>',Ol,Kl,en,x,tn,dt,ln,jt,nn;return v=new J({props:{title:"Deploy Gemma2 9B with TGI DLC on Cloud Run",local:"deploy-gemma2-9b-with-tgi-dlc-on-cloud-run",headingTag:"h1"}}),E=new k({props:{$$slots:{default:[Us]},$$scope:{ctx:w}}}),B=new J({props:{title:"Setup / Configuration",local:"setup--configuration",headingTag:"h2"}}),F=new g({props:{code:"ZXhwb3J0JTIwUFJPSkVDVF9JRCUzRHlvdXItcHJvamVjdC1pZCUwQWV4cG9ydCUyMExPQ0FUSU9OJTNEdXMtY2VudHJhbDElMjAlMjAlMjMlMjBvciUyMGFueSUyMGxvY2F0aW9uJTIwd2hlcmUlMjBDbG91ZCUyMFJ1biUyMG9mZmVycyUyMEdQVXMlM0ElMjBodHRwcyUzQSUyRiUyRmNsb3VkLmdvb2dsZS5jb20lMkZydW4lMkZkb2NzJTJGbG9jYXRpb25zJTIzZ3B1JTBBZXhwb3J0JTIwQ09OVEFJTkVSX1VSSSUzRHVzLWRvY2tlci5wa2cuZGV2JTJGZGVlcGxlYXJuaW5nLXBsYXRmb3JtLXJlbGVhc2UlMkZnY3IuaW8lMkZodWdnaW5nZmFjZS10ZXh0LWdlbmVyYXRpb24taW5mZXJlbmNlLWN1MTI0LjItMy51YnVudHUyMjA0LnB5MzExJTBBZXhwb3J0JTIwU0VSVklDRV9OQU1FJTNEZ2VtbWEyLXRnaQ==",highlighted:`<span class="hljs-built_in">export</span> PROJECT_ID=your-project-id
<span class="hljs-built_in">export</span> LOCATION=us-central1 <span class="hljs-comment"># or any location where Cloud Run offers GPUs: https://cloud.google.com/run/docs/locations#gpu</span>
<span class="hljs-built_in">export</span> CONTAINER_URI=us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311
<span class="hljs-built_in">export</span> SERVICE_NAME=gemma2-tgi`,wrap:!1}}),P=new g({props:{code:"Z2Nsb3VkJTIwYXV0aCUyMGxvZ2luJTBBZ2Nsb3VkJTIwYXV0aCUyMGFwcGxpY2F0aW9uLWRlZmF1bHQlMjBsb2dpbiUyMCUyMCUyMyUyMEZvciUyMGxvY2FsJTIwZGV2ZWxvcG1lbnQlMEFnY2xvdWQlMjBjb25maWclMjBzZXQlMjBwcm9qZWN0JTIwJTI0UFJPSkVDVF9JRA==",highlighted:`gcloud auth login
gcloud auth application-default login <span class="hljs-comment"># For local development</span>
gcloud config <span class="hljs-built_in">set</span> project <span class="hljs-variable">$PROJECT_ID</span>`,wrap:!1}}),D=new g({props:{code:"Z2Nsb3VkJTIwc2VydmljZXMlMjBlbmFibGUlMjBydW4uZ29vZ2xlYXBpcy5jb20=",highlighted:'gcloud services <span class="hljs-built_in">enable</span> run.googleapis.com',wrap:!1}}),z=new J({props:{title:"Deploy TGI on Cloud Run",local:"deploy-tgi-on-cloud-run",headingTag:"h2"}}),A=new k({props:{$$slots:{default:[Cs]},$$scope:{ctx:w}}}),le=new g({props:{code:"Z2Nsb3VkJTIwYmV0YSUyMHJ1biUyMGRlcGxveSUyMCUyNFNFUlZJQ0VfTkFNRSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0taW1hZ2UlM0QlMjRDT05UQUlORVJfVVJJJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1hcmdzJTNEJTIyLS1tb2RlbC1pZCUzRGh1Z2dpbmctcXVhbnRzJTJGZ2VtbWEtMi05Yi1pdC1BV1EtSU5UNCUyQy0tbWF4LWNvbmN1cnJlbnQtcmVxdWVzdHMlM0Q2NCUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tc2V0LWVudi12YXJzJTNESEZfSFVCX0VOQUJMRV9IRl9UUkFOU0ZFUiUzRDElMjAlNUMlMEElMjAlMjAlMjAlMjAtLXBvcnQlM0Q4MDgwJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1jcHUlM0Q4JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1tZW1vcnklM0QzMkdpJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1uby1jcHUtdGhyb3R0bGluZyUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tZ3B1JTNEMSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tZ3B1LXR5cGUlM0RudmlkaWEtbDQlMjAlNUMlMEElMjAlMjAlMjAlMjAtLW1heC1pbnN0YW5jZXMlM0QzJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1jb25jdXJyZW5jeSUzRDY0JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1yZWdpb24lM0QlMjRMT0NBVElPTiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbm8tYWxsb3ctdW5hdXRoZW50aWNhdGVk",highlighted:`gcloud beta run deploy <span class="hljs-variable">$SERVICE_NAME</span> \\
--image=<span class="hljs-variable">$CONTAINER_URI</span> \\
--args=<span class="hljs-string">&quot;--model-id=hugging-quants/gemma-2-9b-it-AWQ-INT4,--max-concurrent-requests=64&quot;</span> \\
--set-env-vars=HF_HUB_ENABLE_HF_TRANSFER=1 \\
--port=8080 \\
--cpu=8 \\
--memory=32Gi \\
--no-cpu-throttling \\
--gpu=1 \\
--gpu-type=nvidia-l4 \\
--max-instances=3 \\
--concurrency=64 \\
--region=<span class="hljs-variable">$LOCATION</span> \\
--no-allow-unauthenticated`,wrap:!1}}),se=new g({props:{code:"Z2Nsb3VkJTIwYmV0YSUyMHJ1biUyMGRlcGxveSUyMCUyNFNFUlZJQ0VfTkFNRSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0taW1hZ2UlM0QlMjRDT05UQUlORVJfVVJJJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1hcmdzJTNEJTIyLS1tb2RlbC1pZCUzRGh1Z2dpbmctcXVhbnRzJTJGZ2VtbWEtMi05Yi1pdC1BV1EtSU5UNCUyQy0tbWF4LWNvbmN1cnJlbnQtcmVxdWVzdHMlM0Q2NCUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tc2V0LWVudi12YXJzJTNESEZfSFVCX0VOQUJMRV9IRl9UUkFOU0ZFUiUzRDElMjAlNUMlMEElMjAlMjAlMjAlMjAtLXBvcnQlM0Q4MDgwJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1jcHUlM0Q4JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1tZW1vcnklM0QzMkdpJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1uby1jcHUtdGhyb3R0bGluZyUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tZ3B1JTNEMSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tZ3B1LXR5cGUlM0RudmlkaWEtbDQlMjAlNUMlMEElMjAlMjAlMjAlMjAtLW1heC1pbnN0YW5jZXMlM0QzJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1jb25jdXJyZW5jeSUzRDY0JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1yZWdpb24lM0QlMjRMT0NBVElPTiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbm8tYWxsb3ctdW5hdXRoZW50aWNhdGVkJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS12cGMtZWdyZXNzJTNEYWxsLXRyYWZmaWMlMjAlNUMlMEElMjAlMjAlMjAlMjAtLXN1Ym5ldCUzRGRlZmF1bHQ=",highlighted:`gcloud beta run deploy <span class="hljs-variable">$SERVICE_NAME</span> \\
--image=<span class="hljs-variable">$CONTAINER_URI</span> \\
--args=<span class="hljs-string">&quot;--model-id=hugging-quants/gemma-2-9b-it-AWQ-INT4,--max-concurrent-requests=64&quot;</span> \\
--set-env-vars=HF_HUB_ENABLE_HF_TRANSFER=1 \\
--port=8080 \\
--cpu=8 \\
--memory=32Gi \\
--no-cpu-throttling \\
--gpu=1 \\
--gpu-type=nvidia-l4 \\
--max-instances=3 \\
--concurrency=64 \\
--region=<span class="hljs-variable">$LOCATION</span> \\
--no-allow-unauthenticated \\
--vpc-egress=all-traffic \\
--subnet=default`,wrap:!1}}),N=new k({props:{$$slots:{default:[fs]},$$scope:{ctx:w}}}),ie=new J({props:{title:"Inference on Cloud Run",local:"inference-on-cloud-run",headingTag:"h2"}}),S=new k({props:{$$slots:{default:[gs]},$$scope:{ctx:w}}}),pe=new J({props:{title:"Via Cloud Run Proxy",local:"via-cloud-run-proxy",headingTag:"h3"}}),de=new g({props:{code:"Z2Nsb3VkJTIwcnVuJTIwc2VydmljZXMlMjBwcm94eSUyMCUyNFNFUlZJQ0VfTkFNRSUyMC0tcmVnaW9uJTIwJTI0TE9DQVRJT04=",highlighted:'gcloud run services proxy <span class="hljs-variable">$SERVICE_NAME</span> --region <span class="hljs-variable">$LOCATION</span>',wrap:!1}}),Z=new k({props:{$$slots:{default:[Ts]},$$scope:{ctx:w}}}),he=new J({props:{title:"cURL",local:"curl",headingTag:"h4"}}),Ue=new g({props:{code:"Y3VybCUyMGh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDgwJTJGdjElMkZjaGF0JTJGY29tcGxldGlvbnMlMjAlNUMlMEElMjAlMjAlMjAlMjAtWCUyMFBPU1QlMjAlNUMlMEElMjAlMjAlMjAlMjAtSCUyMCdDb250ZW50LVR5cGUlM0ElMjBhcHBsaWNhdGlvbiUyRmpzb24nJTIwJTVDJTBBJTIwJTIwJTIwJTIwLWQlMjAnJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybW9kZWwlMjIlM0ElMjAlMjJ0Z2klMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJtZXNzYWdlcyUyMiUzQSUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMldoYXQlMjBpcyUyMERlZXAlMjBMZWFybmluZyUzRiUyMiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3RCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm1heF90b2tlbnMlMjIlM0ElMjAxMjglMEElMjAlMjAlMjAlMjAlN0Qn",highlighted:`curl http://localhost:8080/v1/chat/completions \\
-X POST \\
-H <span class="hljs-string">&#x27;Content-Type: application/json&#x27;</span> \\
-d <span class="hljs-string">&#x27;{
&quot;model&quot;: &quot;tgi&quot;,
&quot;messages&quot;: [
{
&quot;role&quot;: &quot;user&quot;,
&quot;content&quot;: &quot;What is Deep Learning?&quot;
}
],
&quot;max_tokens&quot;: 128
}&#x27;</span>`,wrap:!1}}),Ce=new J({props:{title:"Python",local:"python",headingTag:"h4"}}),ge=new J({props:{title:"huggingface_hub",local:"huggingfacehub",headingTag:"h5"}}),we=new g({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMEluZmVyZW5jZUNsaWVudCUwQSUwQWNsaWVudCUyMCUzRCUyMEluZmVyZW5jZUNsaWVudChiYXNlX3VybCUzRCUyMmh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDgwJTIyJTJDJTIwYXBpX2tleSUzRCUyMi0lMjIpJTBBJTBBY2hhdF9jb21wbGV0aW9uJTIwJTNEJTIwY2xpZW50LmNoYXQuY29tcGxldGlvbnMuY3JlYXRlKCUwQSUyMCUyMG1vZGVsJTNEJTIyaHVnZ2luZy1xdWFudHMlMkZnZW1tYS0yLTliLWl0LUFXUS1JTlQ0JTIyJTJDJTBBJTIwJTIwbWVzc2FnZXMlM0QlNUIlMEElMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJXaGF0JTIwaXMlMjBEZWVwJTIwTGVhcm5pbmclM0YlMjIlN0QlMkMlMEElMjAlMjAlNUQlMkMlMEElMjAlMjBtYXhfdG9rZW5zJTNEMTI4JTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient
client = InferenceClient(base_url=<span class="hljs-string">&quot;http://localhost:8080&quot;</span>, api_key=<span class="hljs-string">&quot;-&quot;</span>)
chat_completion = client.chat.completions.create(
model=<span class="hljs-string">&quot;hugging-quants/gemma-2-9b-it-AWQ-INT4&quot;</span>,
messages=[
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What is Deep Learning?&quot;</span>},
],
max_tokens=<span class="hljs-number">128</span>,
)`,wrap:!1}}),Je=new J({props:{title:"openai",local:"openai",headingTag:"h5"}}),be=new g({props:{code:"ZnJvbSUyMG9wZW5haSUyMGltcG9ydCUyME9wZW5BSSUwQSUwQWNsaWVudCUyMCUzRCUyME9wZW5BSSglMEElMjAlMjAlMjAlMjBiYXNlX3VybCUzRCUyMmh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDgwJTJGdjElMkYlMjIlMkMlMEElMjAlMjAlMjAlMjBhcGlfa2V5JTNEJTIyLSUyMiUyQyUwQSklMEElMEFjaGF0X2NvbXBsZXRpb24lMjAlM0QlMjBjbGllbnQuY2hhdC5jb21wbGV0aW9ucy5jcmVhdGUoJTBBJTIwJTIwJTIwJTIwbW9kZWwlM0QlMjJ0Z2klMjIlMkMlMEElMjAlMjAlMjAlMjBtZXNzYWdlcyUzRCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMldoYXQlMjBpcyUyMERlZXAlMjBMZWFybmluZyUzRiUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCU1RCUyQyUwQSUyMCUyMCUyMCUyMG1heF90b2tlbnMlM0QxMjglMkMlMEEp",highlighted:`<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI
client = OpenAI(
base_url=<span class="hljs-string">&quot;http://localhost:8080/v1/&quot;</span>,
api_key=<span class="hljs-string">&quot;-&quot;</span>,
)
chat_completion = client.chat.completions.create(
model=<span class="hljs-string">&quot;tgi&quot;</span>,
messages=[
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What is Deep Learning?&quot;</span>},
],
max_tokens=<span class="hljs-number">128</span>,
)`,wrap:!1}}),ve=new J({props:{title:"(recommended) Via Cloud Run Service URL",local:"recommended-via-cloud-run-service-url",headingTag:"h3"}}),Ve=new g({props:{code:"U0VSVklDRV9VUkwlM0QlMjQoZ2Nsb3VkJTIwcnVuJTIwc2VydmljZXMlMjBkZXNjcmliZSUyMCUyNFNFUlZJQ0VfTkFNRSUyMC0tcmVnaW9uJTIwJTI0TE9DQVRJT04lMjAtLWZvcm1hdCUyMCd2YWx1ZShzdGF0dXMudXJsKScp",highlighted:'SERVICE_URL=$(gcloud run services describe <span class="hljs-variable">$SERVICE_NAME</span> --region <span class="hljs-variable">$LOCATION</span> --format <span class="hljs-string">&#x27;value(status.url)&#x27;</span>)',wrap:!1}}),Ee=new g({props:{code:"Z2Nsb3VkJTIwYXV0aCUyMHByaW50LWlkZW50aXR5LXRva2Vu",highlighted:"gcloud auth print-identity-token",wrap:!1}}),Ae=new g({props:{code:"aW1wb3J0JTIwZ29vZ2xlLmF1dGglMEFmcm9tJTIwZ29vZ2xlLmF1dGgudHJhbnNwb3J0LnJlcXVlc3RzJTIwaW1wb3J0JTIwUmVxdWVzdCUyMGFzJTIwR29vZ2xlQXV0aFJlcXVlc3QlMEElMEFhdXRoX3JlcSUyMCUzRCUyMEdvb2dsZUF1dGhSZXF1ZXN0KCklMEFjcmVkcyUyQyUyMF8lMjAlM0QlMjBnb29nbGUuYXV0aC5kZWZhdWx0KCklMEFjcmVkcy5yZWZyZXNoKGF1dGhfcmVxKSUwQSUwQWlkX3Rva2VuJTIwJTNEJTIwY3JlZHMuaWRfdG9rZW4=",highlighted:`<span class="hljs-keyword">import</span> google.auth
<span class="hljs-keyword">from</span> google.auth.transport.requests <span class="hljs-keyword">import</span> Request <span class="hljs-keyword">as</span> GoogleAuthRequest
auth_req = GoogleAuthRequest()
creds, _ = google.auth.default()
creds.refresh(auth_req)
id_token = creds.id_token`,wrap:!1}}),Ze=new g({props:{code:"ZXhwb3J0JTIwU0VSVklDRV9BQ0NPVU5UX05BTUUlM0R0Z2ktaW52b2tlcg==",highlighted:'<span class="hljs-built_in">export</span> SERVICE_ACCOUNT_NAME=tgi-invoker',wrap:!1}}),Le=new g({props:{code:"Z2Nsb3VkJTIwaWFtJTIwc2VydmljZS1hY2NvdW50cyUyMGNyZWF0ZSUyMCUyNFNFUlZJQ0VfQUNDT1VOVF9OQU1F",highlighted:'gcloud iam service-accounts create <span class="hljs-variable">$SERVICE_ACCOUNT_NAME</span>',wrap:!1}}),xe=new g({props:{code:"Z2Nsb3VkJTIwcnVuJTIwc2VydmljZXMlMjBhZGQtaWFtLXBvbGljeS1iaW5kaW5nJTIwJTI0U0VSVklDRV9OQU1FJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1tZW1iZXIlM0QlMjJzZXJ2aWNlQWNjb3VudCUzQSUyNFNFUlZJQ0VfQUNDT1VOVF9OQU1FJTQwJTI0UFJPSkVDVF9JRC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbSUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tcm9sZSUzRCUyMnJvbGVzJTJGcnVuLmludm9rZXIlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLXJlZ2lvbiUzRCUyNExPQ0FUSU9O",highlighted:`gcloud run services add-iam-policy-binding <span class="hljs-variable">$SERVICE_NAME</span> \\
--member=<span class="hljs-string">&quot;serviceAccount:<span class="hljs-variable">$SERVICE_ACCOUNT_NAME</span>@<span class="hljs-variable">$PROJECT_ID</span>.iam.gserviceaccount.com&quot;</span> \\
--role=<span class="hljs-string">&quot;roles/run.invoker&quot;</span> \\
--region=<span class="hljs-variable">$LOCATION</span>`,wrap:!1}}),_e=new g({props:{code:"ZXhwb3J0JTIwQUNDRVNTX1RPS0VOJTNEJTI0KGdjbG91ZCUyMGF1dGglMjBwcmludC1hY2Nlc3MtdG9rZW4lMjAtLWltcGVyc29uYXRlLXNlcnZpY2UtYWNjb3VudCUzRCUyNFNFUlZJQ0VfQUNDT1VOVF9OQU1FJTQwJTI0UFJPSkVDVF9JRC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbSk=",highlighted:'<span class="hljs-built_in">export</span> ACCESS_TOKEN=$(gcloud auth print-access-token --impersonate-service-account=<span class="hljs-variable">$SERVICE_ACCOUNT_NAME</span>@<span class="hljs-variable">$PROJECT_ID</span>.iam.gserviceaccount.com)',wrap:!1}}),L=new k({props:{warning:!0,$$slots:{default:[ws]},$$scope:{ctx:w}}}),G=new k({props:{$$slots:{default:[Js]},$$scope:{ctx:w}}}),Be=new J({props:{title:"cURL",local:"curl",headingTag:"h4"}}),Xe=new g({props:{code:"Y3VybCUyMCUyNFNFUlZJQ0VfVVJMJTJGdjElMkZjaGF0JTJGY29tcGxldGlvbnMlMjAlNUMlMEElMjAlMjAlMjAlMjAtWCUyMFBPU1QlMjAlNUMlMEElMjAlMjAlMjAlMjAtSCUyMCUyMkF1dGhvcml6YXRpb24lM0ElMjBCZWFyZXIlMjAlMjRBQ0NFU1NfVE9LRU4lMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAtSCUyMCdDb250ZW50LVR5cGUlM0ElMjBhcHBsaWNhdGlvbiUyRmpzb24nJTIwJTVDJTBBJTIwJTIwJTIwJTIwLWQlMjAnJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybW9kZWwlMjIlM0ElMjAlMjJ0Z2klMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJtZXNzYWdlcyUyMiUzQSUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMldoYXQlMjBpcyUyMERlZXAlMjBMZWFybmluZyUzRiUyMiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3RCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm1heF90b2tlbnMlMjIlM0ElMjAxMjglMEElMjAlMjAlMjAlMjAlN0Qn",highlighted:`curl <span class="hljs-variable">$SERVICE_URL</span>/v1/chat/completions \\
-X POST \\
-H <span class="hljs-string">&quot;Authorization: Bearer <span class="hljs-variable">$ACCESS_TOKEN</span>&quot;</span> \\
-H <span class="hljs-string">&#x27;Content-Type: application/json&#x27;</span> \\
-d <span class="hljs-string">&#x27;{
&quot;model&quot;: &quot;tgi&quot;,
&quot;messages&quot;: [
{
&quot;role&quot;: &quot;user&quot;,
&quot;content&quot;: &quot;What is Deep Learning?&quot;
}
],
&quot;max_tokens&quot;: 128
}&#x27;</span>`,wrap:!1}}),Fe=new J({props:{title:"Python",local:"python",headingTag:"h4"}}),Pe=new J({props:{title:"huggingface_hub",local:"huggingfacehub",headingTag:"h5"}}),De=new g({props:{code:"aW1wb3J0JTIwb3MlMEFmcm9tJTIwaHVnZ2luZ2ZhY2VfaHViJTIwaW1wb3J0JTIwSW5mZXJlbmNlQ2xpZW50JTBBJTBBY2xpZW50JTIwJTNEJTIwSW5mZXJlbmNlQ2xpZW50KCUwQSUyMCUyMCUyMCUyMGJhc2VfdXJsJTNEb3MuZ2V0ZW52KCUyMlNFUlZJQ0VfVVJMJTIyKSUyQyUwQSUyMCUyMCUyMCUyMGFwaV9rZXklM0Rvcy5nZXRlbnYoJTIyQUNDRVNTX1RPS0VOJTIyKSUyQyUwQSklMEElMEFjaGF0X2NvbXBsZXRpb24lMjAlM0QlMjBjbGllbnQuY2hhdC5jb21wbGV0aW9ucy5jcmVhdGUoJTBBJTIwJTIwbW9kZWwlM0QlMjJodWdnaW5nLXF1YW50cyUyRmdlbW1hLTItOWItaXQtQVdRLUlOVDQlMjIlMkMlMEElMjAlMjBtZXNzYWdlcyUzRCU1QiUwQSUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMldoYXQlMjBpcyUyMERlZXAlMjBMZWFybmluZyUzRiUyMiU3RCUyQyUwQSUyMCUyMCU1RCUyQyUwQSUyMCUyMG1heF90b2tlbnMlM0QxMjglMkMlMEEp",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient
client = InferenceClient(
base_url=os.getenv(<span class="hljs-string">&quot;SERVICE_URL&quot;</span>),
api_key=os.getenv(<span class="hljs-string">&quot;ACCESS_TOKEN&quot;</span>),
)
chat_completion = client.chat.completions.create(
model=<span class="hljs-string">&quot;hugging-quants/gemma-2-9b-it-AWQ-INT4&quot;</span>,
messages=[
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What is Deep Learning?&quot;</span>},
],
max_tokens=<span class="hljs-number">128</span>,
)`,wrap:!1}}),ze=new J({props:{title:"openai",local:"openai",headingTag:"h5"}}),Ke=new g({props:{code:"aW1wb3J0JTIwb3MlMEFmcm9tJTIwb3BlbmFpJTIwaW1wb3J0JTIwT3BlbkFJJTBBJTBBY2xpZW50JTIwJTNEJTIwT3BlbkFJKCUwQSUyMCUyMCUyMCUyMGJhc2VfdXJsJTNEb3MuZ2V0ZW52KCUyMlNFUlZJQ0VfVVJMJTIyKSUyQyUwQSUyMCUyMCUyMCUyMGFwaV9rZXklM0Rvcy5nZXRlbnYoJTIyQUNDRVNTX1RPS0VOJTIyKSUyQyUwQSklMEElMEFjaGF0X2NvbXBsZXRpb24lMjAlM0QlMjBjbGllbnQuY2hhdC5jb21wbGV0aW9ucy5jcmVhdGUoJTBBJTIwJTIwJTIwJTIwbW9kZWwlM0QlMjJ0Z2klMjIlMkMlMEElMjAlMjAlMjAlMjBtZXNzYWdlcyUzRCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMldoYXQlMjBpcyUyMERlZXAlMjBMZWFybmluZyUzRiUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCU1RCUyQyUwQSUyMCUyMCUyMCUyMG1heF90b2tlbnMlM0QxMjglMkMlMEEp",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI
client = OpenAI(
base_url=os.getenv(<span class="hljs-string">&quot;SERVICE_URL&quot;</span>),
api_key=os.getenv(<span class="hljs-string">&quot;ACCESS_TOKEN&quot;</span>),
)
chat_completion = client.chat.completions.create(
model=<span class="hljs-string">&quot;tgi&quot;</span>,
messages=[
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What is Deep Learning?&quot;</span>},
],
max_tokens=<span class="hljs-number">128</span>,
)`,wrap:!1}}),et=new J({props:{title:"Resource clean up",local:"resource-clean-up",headingTag:"h2"}}),nt=new g({props:{code:"Z2Nsb3VkJTIwcnVuJTIwc2VydmljZXMlMjBkZWxldGUlMjAlMjRTRVJWSUNFX05BTUUlMjAtLXJlZ2lvbiUyMCUyNExPQ0FUSU9O",highlighted:'gcloud run services delete <span class="hljs-variable">$SERVICE_NAME</span> --region <span class="hljs-variable">$LOCATION</span>',wrap:!1}}),at=new g({props:{code:"Z2Nsb3VkJTIwYXV0aCUyMHJldm9rZSUyMC0taW1wZXJzb25hdGUtc2VydmljZS1hY2NvdW50JTNEJTI0U0VSVklDRV9BQ0NPVU5UX05BTUUlNDAlMjRQUk9KRUNUX0lELmlhbS5nc2VydmljZWFjY291bnQuY29t",highlighted:'gcloud auth revoke --impersonate-service-account=<span class="hljs-variable">$SERVICE_ACCOUNT_NAME</span>@<span class="hljs-variable">$PROJECT_ID</span>.iam.gserviceaccount.com',wrap:!1}}),ut=new g({props:{code:"Z2Nsb3VkJTIwaWFtJTIwc2VydmljZS1hY2NvdW50cyUyMGRlbGV0ZSUyMCUyNFNFUlZJQ0VfQUNDT1VOVF9OQU1FJTQwJTI0UFJPSkVDVF9JRC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbQ==",highlighted:'gcloud iam service-accounts delete <span class="hljs-variable">$SERVICE_ACCOUNT_NAME</span>@<span class="hljs-variable">$PROJECT_ID</span>.iam.gserviceaccount.com',wrap:!1}}),ct=new g({props:{code:"Z2Nsb3VkJTIwY29tcHV0ZSUyMHJvdXRlcnMlMjBuYXRzJTIwZGVsZXRlJTIwdm0tbmF0JTIwLS1yb3V0ZXIlM0RuYXQtcm91dGVyJTIwLS1yZWdpb24lM0QlMjRMT0NBVElPTiUwQWdjbG91ZCUyMGNvbXB1dGUlMjByb3V0ZXJzJTIwZGVsZXRlJTIwbmF0LXJvdXRlciUyMC0tcmVnaW9uJTNEJTI0TE9DQVRJT04=",highlighted:`gcloud compute routers nats delete vm-nat --router=nat-router --region=<span class="hljs-variable">$LOCATION</span>
gcloud compute routers delete nat-router --region=<span class="hljs-variable">$LOCATION</span>`,wrap:!1}}),pt=new J({props:{title:"References",local:"references",headingTag:"h2"}}),x=new k({props:{$$slots:{default:[$s]},$$scope:{ctx:w}}}),dt=new ms({props:{source:"https://github.com/huggingface/Google-Cloud-Containers/blob/main/docs/source/examples/cloud-run-deploy-gemma-2-on-cloud-run.mdx"}}),{c(){a=i("meta"),U=s(),c=i("p"),C=s(),p(v.$$.fragment),T=s(),I=i("p"),I.textContent=Un,It=s(),W=i("p"),W.innerHTML=Cn,Vt=s(),p(E.$$.fragment),Rt=s(),p(B.$$.fragment),Et=s(),q=i("p"),q.innerHTML=fn,At=s(),X=i("p"),X.textContent=gn,Nt=s(),p(F.$$.fragment),St=s(),H=i("p"),H.textContent=Tn,Zt=s(),p(P.$$.fragment),Qt=s(),Y=i("p"),Y.textContent=wn,Lt=s(),p(D.$$.fragment),Gt=s(),p(z.$$.fragment),xt=s(),O=i("p"),O.innerHTML=Jn,kt=s(),K=i("p"),K.innerHTML=$n,_t=s(),ee=i("ul"),ee.innerHTML=bn,Wt=s(),p(A.$$.fragment),Bt=s(),te=i("p"),te.innerHTML=vn,qt=s(),p(le.$$.fragment),Xt=s(),ne=i("p"),ne.textContent=jn,Ft=s(),p(se.$$.fragment),Ht=s(),oe=i("p"),oe.innerHTML=In,Pt=s(),ae=i("p"),ae.innerHTML=Vn,Yt=s(),p(N.$$.fragment),Dt=s(),p(ie.$$.fragment),zt=s(),ue=i("p"),ue.innerHTML=Rn,Ot=s(),re=i("p"),re.textContent=En,Kt=s(),ce=i("p"),ce.innerHTML=An,el=s(),p(S.$$.fragment),tl=s(),p(pe.$$.fragment),ll=s(),Me=i("p"),Me.textContent=Nn,nl=s(),p(de.$$.fragment),sl=s(),ye=i("p"),ye.innerHTML=Sn,ol=s(),p(Z.$$.fragment),al=s(),p(he.$$.fragment),il=s(),me=i("p"),me.innerHTML=Zn,ul=s(),p(Ue.$$.fragment),rl=s(),p(Ce.$$.fragment),cl=s(),fe=i("p"),fe.innerHTML=Qn,pl=s(),p(ge.$$.fragment),Ml=s(),Te=i("p"),Te.innerHTML=Ln,dl=s(),p(we.$$.fragment),yl=s(),p(Je.$$.fragment),hl=s(),$e=i("p"),$e.innerHTML=Gn,ml=s(),p(be.$$.fragment),Ul=s(),p(ve.$$.fragment),Cl=s(),je=i("p"),je.textContent=xn,fl=s(),Ie=i("p"),Ie.innerHTML=kn,gl=s(),p(Ve.$$.fragment),Tl=s(),Re=i("p"),Re.innerHTML=_n,wl=s(),Q=i("ul"),$=i("li"),ft=i("p"),ft.textContent=Wn,sn=s(),gt=i("ul"),gt.innerHTML=Bn,on=s(),p(Ee.$$.fragment),an=s(),Tt=i("ul"),Tt.innerHTML=qn,un=s(),p(Ae.$$.fragment),rn=s(),wt=i("li"),wt.innerHTML=Xn,Jl=s(),Ne=i("p"),Ne.textContent=Fn,$l=s(),j=i("ul"),Se=i("li"),Jt=i("p"),Jt.innerHTML=Hn,cn=s(),p(Ze.$$.fragment),pn=s(),Qe=i("li"),$t=i("p"),$t.textContent=Pn,Mn=s(),p(Le.$$.fragment),dn=s(),Ge=i("li"),bt=i("p"),bt.textContent=Yn,yn=s(),p(xe.$$.fragment),hn=s(),ke=i("li"),vt=i("p"),vt.textContent=Dn,mn=s(),p(_e.$$.fragment),bl=s(),p(L.$$.fragment),vl=s(),We=i("p"),We.innerHTML=zn,jl=s(),p(G.$$.fragment),Il=s(),p(Be.$$.fragment),Vl=s(),qe=i("p"),qe.innerHTML=On,Rl=s(),p(Xe.$$.fragment),El=s(),p(Fe.$$.fragment),Al=s(),He=i("p"),He.innerHTML=Kn,Nl=s(),p(Pe.$$.fragment),Sl=s(),Ye=i("p"),Ye.innerHTML=es,Zl=s(),p(De.$$.fragment),Ql=s(),p(ze.$$.fragment),Ll=s(),Oe=i("p"),Oe.innerHTML=ts,Gl=s(),p(Ke.$$.fragment),xl=s(),p(et.$$.fragment),kl=s(),tt=i("p"),tt.textContent=ls,_l=s(),lt=i("p"),lt.innerHTML=ns,Wl=s(),p(nt.$$.fragment),Bl=s(),st=i("p"),st.innerHTML=ss,ql=s(),ot=i("ul"),ot.innerHTML=os,Xl=s(),p(at.$$.fragment),Fl=s(),it=i("ul"),it.innerHTML=as,Hl=s(),p(ut.$$.fragment),Pl=s(),rt=i("p"),rt.textContent=is,Yl=s(),p(ct.$$.fragment),Dl=s(),p(pt.$$.fragment),zl=s(),Mt=i("ul"),Mt.innerHTML=us,Ol=s(),Kl=i("hr"),en=s(),p(x.$$.fragment),tn=s(),p(dt.$$.fragment),ln=s(),jt=i("p"),this.h()},l(e){const t=hs("svelte-u9bgzb",document.head);a=u(t,"META",{name:!0,content:!0}),t.forEach(l),U=o(e),c=u(e,"P",{}),R(c).forEach(l),C=o(e),M(v.$$.fragment,e),T=o(e),I=u(e,"P",{"data-svelte-h":!0}),r(I)!=="svelte-x0ygba"&&(I.textContent=Un),It=o(e),W=u(e,"P",{"data-svelte-h":!0}),r(W)!=="svelte-ppx71b"&&(W.innerHTML=Cn),Vt=o(e),M(E.$$.fragment,e),Rt=o(e),M(B.$$.fragment,e),Et=o(e),q=u(e,"P",{"data-svelte-h":!0}),r(q)!=="svelte-1o67gfb"&&(q.innerHTML=fn),At=o(e),X=u(e,"P",{"data-svelte-h":!0}),r(X)!=="svelte-1i8n6rq"&&(X.textContent=gn),Nt=o(e),M(F.$$.fragment,e),St=o(e),H=u(e,"P",{"data-svelte-h":!0}),r(H)!=="svelte-pfbeqd"&&(H.textContent=Tn),Zt=o(e),M(P.$$.fragment,e),Qt=o(e),Y=u(e,"P",{"data-svelte-h":!0}),r(Y)!=="svelte-1usj4y5"&&(Y.textContent=wn),Lt=o(e),M(D.$$.fragment,e),Gt=o(e),M(z.$$.fragment,e),xt=o(e),O=u(e,"P",{"data-svelte-h":!0}),r(O)!=="svelte-v1hrv3"&&(O.innerHTML=Jn),kt=o(e),K=u(e,"P",{"data-svelte-h":!0}),r(K)!=="svelte-1edwfmt"&&(K.innerHTML=$n),_t=o(e),ee=u(e,"UL",{"data-svelte-h":!0}),r(ee)!=="svelte-1l6ubvy"&&(ee.innerHTML=bn),Wt=o(e),M(A.$$.fragment,e),Bt=o(e),te=u(e,"P",{"data-svelte-h":!0}),r(te)!=="svelte-xkgc51"&&(te.innerHTML=vn),qt=o(e),M(le.$$.fragment,e),Xt=o(e),ne=u(e,"P",{"data-svelte-h":!0}),r(ne)!=="svelte-vtd23h"&&(ne.textContent=jn),Ft=o(e),M(se.$$.fragment,e),Ht=o(e),oe=u(e,"P",{"data-svelte-h":!0}),r(oe)!=="svelte-1s71btd"&&(oe.innerHTML=In),Pt=o(e),ae=u(e,"P",{"data-svelte-h":!0}),r(ae)!=="svelte-1u0orza"&&(ae.innerHTML=Vn),Yt=o(e),M(N.$$.fragment,e),Dt=o(e),M(ie.$$.fragment,e),zt=o(e),ue=u(e,"P",{"data-svelte-h":!0}),r(ue)!=="svelte-1d9o3zj"&&(ue.innerHTML=Rn),Ot=o(e),re=u(e,"P",{"data-svelte-h":!0}),r(re)!=="svelte-1gzo66i"&&(re.textContent=En),Kt=o(e),ce=u(e,"P",{"data-svelte-h":!0}),r(ce)!=="svelte-1rn2q59"&&(ce.innerHTML=An),el=o(e),M(S.$$.fragment,e),tl=o(e),M(pe.$$.fragment,e),ll=o(e),Me=u(e,"P",{"data-svelte-h":!0}),r(Me)!=="svelte-1n42sxg"&&(Me.textContent=Nn),nl=o(e),M(de.$$.fragment,e),sl=o(e),ye=u(e,"P",{"data-svelte-h":!0}),r(ye)!=="svelte-yd30oz"&&(ye.innerHTML=Sn),ol=o(e),M(Z.$$.fragment,e),al=o(e),M(he.$$.fragment,e),il=o(e),me=u(e,"P",{"data-svelte-h":!0}),r(me)!=="svelte-1ks8djk"&&(me.innerHTML=Zn),ul=o(e),M(Ue.$$.fragment,e),rl=o(e),M(Ce.$$.fragment,e),cl=o(e),fe=u(e,"P",{"data-svelte-h":!0}),r(fe)!=="svelte-wqgbww"&&(fe.innerHTML=Qn),pl=o(e),M(ge.$$.fragment,e),Ml=o(e),Te=u(e,"P",{"data-svelte-h":!0}),r(Te)!=="svelte-u758zp"&&(Te.innerHTML=Ln),dl=o(e),M(we.$$.fragment,e),yl=o(e),M(Je.$$.fragment,e),hl=o(e),$e=u(e,"P",{"data-svelte-h":!0}),r($e)!=="svelte-1bjj57d"&&($e.innerHTML=Gn),ml=o(e),M(be.$$.fragment,e),Ul=o(e),M(ve.$$.fragment,e),Cl=o(e),je=u(e,"P",{"data-svelte-h":!0}),r(je)!=="svelte-1o5h253"&&(je.textContent=xn),fl=o(e),Ie=u(e,"P",{"data-svelte-h":!0}),r(Ie)!=="svelte-7kvofg"&&(Ie.innerHTML=kn),gl=o(e),M(Ve.$$.fragment,e),Tl=o(e),Re=u(e,"P",{"data-svelte-h":!0}),r(Re)!=="svelte-niuemd"&&(Re.innerHTML=_n),wl=o(e),Q=u(e,"UL",{});var yt=R(Q);$=u(yt,"LI",{});var b=R($);ft=u(b,"P",{"data-svelte-h":!0}),r(ft)!=="svelte-4rg0sg"&&(ft.textContent=Wn),sn=o(b),gt=u(b,"UL",{"data-svelte-h":!0}),r(gt)!=="svelte-1vwi62o"&&(gt.innerHTML=Bn),on=o(b),M(Ee.$$.fragment,b),an=o(b),Tt=u(b,"UL",{"data-svelte-h":!0}),r(Tt)!=="svelte-b6nbfv"&&(Tt.innerHTML=qn),un=o(b),M(Ae.$$.fragment,b),b.forEach(l),rn=o(yt),wt=u(yt,"LI",{"data-svelte-h":!0}),r(wt)!=="svelte-1v3f4rq"&&(wt.innerHTML=Xn),yt.forEach(l),Jl=o(e),Ne=u(e,"P",{"data-svelte-h":!0}),r(Ne)!=="svelte-18iwnqm"&&(Ne.textContent=Fn),$l=o(e),j=u(e,"UL",{});var V=R(j);Se=u(V,"LI",{});var ht=R(Se);Jt=u(ht,"P",{"data-svelte-h":!0}),r(Jt)!=="svelte-tbapg4"&&(Jt.innerHTML=Hn),cn=o(ht),M(Ze.$$.fragment,ht),ht.forEach(l),pn=o(V),Qe=u(V,"LI",{});var mt=R(Qe);$t=u(mt,"P",{"data-svelte-h":!0}),r($t)!=="svelte-1nwc9xz"&&($t.textContent=Pn),Mn=o(mt),M(Le.$$.fragment,mt),mt.forEach(l),dn=o(V),Ge=u(V,"LI",{});var Ut=R(Ge);bt=u(Ut,"P",{"data-svelte-h":!0}),r(bt)!=="svelte-19ickcu"&&(bt.textContent=Yn),yn=o(Ut),M(xe.$$.fragment,Ut),Ut.forEach(l),hn=o(V),ke=u(V,"LI",{});var Ct=R(ke);vt=u(Ct,"P",{"data-svelte-h":!0}),r(vt)!=="svelte-v6tnr5"&&(vt.textContent=Dn),mn=o(Ct),M(_e.$$.fragment,Ct),Ct.forEach(l),V.forEach(l),bl=o(e),M(L.$$.fragment,e),vl=o(e),We=u(e,"P",{"data-svelte-h":!0}),r(We)!=="svelte-ur1pan"&&(We.innerHTML=zn),jl=o(e),M(G.$$.fragment,e),Il=o(e),M(Be.$$.fragment,e),Vl=o(e),qe=u(e,"P",{"data-svelte-h":!0}),r(qe)!=="svelte-1ks8djk"&&(qe.innerHTML=On),Rl=o(e),M(Xe.$$.fragment,e),El=o(e),M(Fe.$$.fragment,e),Al=o(e),He=u(e,"P",{"data-svelte-h":!0}),r(He)!=="svelte-wqgbww"&&(He.innerHTML=Kn),Nl=o(e),M(Pe.$$.fragment,e),Sl=o(e),Ye=u(e,"P",{"data-svelte-h":!0}),r(Ye)!=="svelte-u758zp"&&(Ye.innerHTML=es),Zl=o(e),M(De.$$.fragment,e),Ql=o(e),M(ze.$$.fragment,e),Ll=o(e),Oe=u(e,"P",{"data-svelte-h":!0}),r(Oe)!=="svelte-1bjj57d"&&(Oe.innerHTML=ts),Gl=o(e),M(Ke.$$.fragment,e),xl=o(e),M(et.$$.fragment,e),kl=o(e),tt=u(e,"P",{"data-svelte-h":!0}),r(tt)!=="svelte-1c66oa"&&(tt.textContent=ls),_l=o(e),lt=u(e,"P",{"data-svelte-h":!0}),r(lt)!=="svelte-1sjp15c"&&(lt.innerHTML=ns),Wl=o(e),M(nt.$$.fragment,e),Bl=o(e),st=u(e,"P",{"data-svelte-h":!0}),r(st)!=="svelte-l00oox"&&(st.innerHTML=ss),ql=o(e),ot=u(e,"UL",{"data-svelte-h":!0}),r(ot)!=="svelte-uuoqil"&&(ot.innerHTML=os),Xl=o(e),M(at.$$.fragment,e),Fl=o(e),it=u(e,"UL",{"data-svelte-h":!0}),r(it)!=="svelte-1toh59e"&&(it.innerHTML=as),Hl=o(e),M(ut.$$.fragment,e),Pl=o(e),rt=u(e,"P",{"data-svelte-h":!0}),r(rt)!=="svelte-1xv6mwy"&&(rt.textContent=is),Yl=o(e),M(ct.$$.fragment,e),Dl=o(e),M(pt.$$.fragment,e),zl=o(e),Mt=u(e,"UL",{"data-svelte-h":!0}),r(Mt)!=="svelte-1fx9lvo"&&(Mt.innerHTML=us),Ol=o(e),Kl=u(e,"HR",{}),en=o(e),M(x.$$.fragment,e),tn=o(e),M(dt.$$.fragment,e),ln=o(e),jt=u(e,"P",{}),R(jt).forEach(l),this.h()},h(){cs(a,"name","hf:doc:metadata"),cs(a,"content",vs)},m(e,t){f(document.head,a),n(e,U,t),n(e,c,t),n(e,C,t),d(v,e,t),n(e,T,t),n(e,I,t),n(e,It,t),n(e,W,t),n(e,Vt,t),d(E,e,t),n(e,Rt,t),d(B,e,t),n(e,Et,t),n(e,q,t),n(e,At,t),n(e,X,t),n(e,Nt,t),d(F,e,t),n(e,St,t),n(e,H,t),n(e,Zt,t),d(P,e,t),n(e,Qt,t),n(e,Y,t),n(e,Lt,t),d(D,e,t),n(e,Gt,t),d(z,e,t),n(e,xt,t),n(e,O,t),n(e,kt,t),n(e,K,t),n(e,_t,t),n(e,ee,t),n(e,Wt,t),d(A,e,t),n(e,Bt,t),n(e,te,t),n(e,qt,t),d(le,e,t),n(e,Xt,t),n(e,ne,t),n(e,Ft,t),d(se,e,t),n(e,Ht,t),n(e,oe,t),n(e,Pt,t),n(e,ae,t),n(e,Yt,t),d(N,e,t),n(e,Dt,t),d(ie,e,t),n(e,zt,t),n(e,ue,t),n(e,Ot,t),n(e,re,t),n(e,Kt,t),n(e,ce,t),n(e,el,t),d(S,e,t),n(e,tl,t),d(pe,e,t),n(e,ll,t),n(e,Me,t),n(e,nl,t),d(de,e,t),n(e,sl,t),n(e,ye,t),n(e,ol,t),d(Z,e,t),n(e,al,t),d(he,e,t),n(e,il,t),n(e,me,t),n(e,ul,t),d(Ue,e,t),n(e,rl,t),d(Ce,e,t),n(e,cl,t),n(e,fe,t),n(e,pl,t),d(ge,e,t),n(e,Ml,t),n(e,Te,t),n(e,dl,t),d(we,e,t),n(e,yl,t),d(Je,e,t),n(e,hl,t),n(e,$e,t),n(e,ml,t),d(be,e,t),n(e,Ul,t),d(ve,e,t),n(e,Cl,t),n(e,je,t),n(e,fl,t),n(e,Ie,t),n(e,gl,t),d(Ve,e,t),n(e,Tl,t),n(e,Re,t),n(e,wl,t),n(e,Q,t),f(Q,$),f($,ft),f($,sn),f($,gt),f($,on),d(Ee,$,null),f($,an),f($,Tt),f($,un),d(Ae,$,null),f(Q,rn),f(Q,wt),n(e,Jl,t),n(e,Ne,t),n(e,$l,t),n(e,j,t),f(j,Se),f(Se,Jt),f(Se,cn),d(Ze,Se,null),f(j,pn),f(j,Qe),f(Qe,$t),f(Qe,Mn),d(Le,Qe,null),f(j,dn),f(j,Ge),f(Ge,bt),f(Ge,yn),d(xe,Ge,null),f(j,hn),f(j,ke),f(ke,vt),f(ke,mn),d(_e,ke,null),n(e,bl,t),d(L,e,t),n(e,vl,t),n(e,We,t),n(e,jl,t),d(G,e,t),n(e,Il,t),d(Be,e,t),n(e,Vl,t),n(e,qe,t),n(e,Rl,t),d(Xe,e,t),n(e,El,t),d(Fe,e,t),n(e,Al,t),n(e,He,t),n(e,Nl,t),d(Pe,e,t),n(e,Sl,t),n(e,Ye,t),n(e,Zl,t),d(De,e,t),n(e,Ql,t),d(ze,e,t),n(e,Ll,t),n(e,Oe,t),n(e,Gl,t),d(Ke,e,t),n(e,xl,t),d(et,e,t),n(e,kl,t),n(e,tt,t),n(e,_l,t),n(e,lt,t),n(e,Wl,t),d(nt,e,t),n(e,Bl,t),n(e,st,t),n(e,ql,t),n(e,ot,t),n(e,Xl,t),d(at,e,t),n(e,Fl,t),n(e,it,t),n(e,Hl,t),d(ut,e,t),n(e,Pl,t),n(e,rt,t),n(e,Yl,t),d(ct,e,t),n(e,Dl,t),d(pt,e,t),n(e,zl,t),n(e,Mt,t),n(e,Ol,t),n(e,Kl,t),n(e,en,t),d(x,e,t),n(e,tn,t),d(dt,e,t),n(e,ln,t),n(e,jt,t),nn=!0},p(e,[t]){const yt={};t&2&&(yt.$$scope={dirty:t,ctx:e}),E.$set(yt);const b={};t&2&&(b.$$scope={dirty:t,ctx:e}),A.$set(b);const V={};t&2&&(V.$$scope={dirty:t,ctx:e}),N.$set(V);const ht={};t&2&&(ht.$$scope={dirty:t,ctx:e}),S.$set(ht);const mt={};t&2&&(mt.$$scope={dirty:t,ctx:e}),Z.$set(mt);const Ut={};t&2&&(Ut.$$scope={dirty:t,ctx:e}),L.$set(Ut);const Ct={};t&2&&(Ct.$$scope={dirty:t,ctx:e}),G.$set(Ct);const rs={};t&2&&(rs.$$scope={dirty:t,ctx:e}),x.$set(rs)},i(e){nn||(y(v.$$.fragment,e),y(E.$$.fragment,e),y(B.$$.fragment,e),y(F.$$.fragment,e),y(P.$$.fragment,e),y(D.$$.fragment,e),y(z.$$.fragment,e),y(A.$$.fragment,e),y(le.$$.fragment,e),y(se.$$.fragment,e),y(N.$$.fragment,e),y(ie.$$.fragment,e),y(S.$$.fragment,e),y(pe.$$.fragment,e),y(de.$$.fragment,e),y(Z.$$.fragment,e),y(he.$$.fragment,e),y(Ue.$$.fragment,e),y(Ce.$$.fragment,e),y(ge.$$.fragment,e),y(we.$$.fragment,e),y(Je.$$.fragment,e),y(be.$$.fragment,e),y(ve.$$.fragment,e),y(Ve.$$.fragment,e),y(Ee.$$.fragment,e),y(Ae.$$.fragment,e),y(Ze.$$.fragment,e),y(Le.$$.fragment,e),y(xe.$$.fragment,e),y(_e.$$.fragment,e),y(L.$$.fragment,e),y(G.$$.fragment,e),y(Be.$$.fragment,e),y(Xe.$$.fragment,e),y(Fe.$$.fragment,e),y(Pe.$$.fragment,e),y(De.$$.fragment,e),y(ze.$$.fragment,e),y(Ke.$$.fragment,e),y(et.$$.fragment,e),y(nt.$$.fragment,e),y(at.$$.fragment,e),y(ut.$$.fragment,e),y(ct.$$.fragment,e),y(pt.$$.fragment,e),y(x.$$.fragment,e),y(dt.$$.fragment,e),nn=!0)},o(e){h(v.$$.fragment,e),h(E.$$.fragment,e),h(B.$$.fragment,e),h(F.$$.fragment,e),h(P.$$.fragment,e),h(D.$$.fragment,e),h(z.$$.fragment,e),h(A.$$.fragment,e),h(le.$$.fragment,e),h(se.$$.fragment,e),h(N.$$.fragment,e),h(ie.$$.fragment,e),h(S.$$.fragment,e),h(pe.$$.fragment,e),h(de.$$.fragment,e),h(Z.$$.fragment,e),h(he.$$.fragment,e),h(Ue.$$.fragment,e),h(Ce.$$.fragment,e),h(ge.$$.fragment,e),h(we.$$.fragment,e),h(Je.$$.fragment,e),h(be.$$.fragment,e),h(ve.$$.fragment,e),h(Ve.$$.fragment,e),h(Ee.$$.fragment,e),h(Ae.$$.fragment,e),h(Ze.$$.fragment,e),h(Le.$$.fragment,e),h(xe.$$.fragment,e),h(_e.$$.fragment,e),h(L.$$.fragment,e),h(G.$$.fragment,e),h(Be.$$.fragment,e),h(Xe.$$.fragment,e),h(Fe.$$.fragment,e),h(Pe.$$.fragment,e),h(De.$$.fragment,e),h(ze.$$.fragment,e),h(Ke.$$.fragment,e),h(et.$$.fragment,e),h(nt.$$.fragment,e),h(at.$$.fragment,e),h(ut.$$.fragment,e),h(ct.$$.fragment,e),h(pt.$$.fragment,e),h(x.$$.fragment,e),h(dt.$$.fragment,e),nn=!1},d(e){e&&(l(U),l(c),l(C),l(T),l(I),l(It),l(W),l(Vt),l(Rt),l(Et),l(q),l(At),l(X),l(Nt),l(St),l(H),l(Zt),l(Qt),l(Y),l(Lt),l(Gt),l(xt),l(O),l(kt),l(K),l(_t),l(ee),l(Wt),l(Bt),l(te),l(qt),l(Xt),l(ne),l(Ft),l(Ht),l(oe),l(Pt),l(ae),l(Yt),l(Dt),l(zt),l(ue),l(Ot),l(re),l(Kt),l(ce),l(el),l(tl),l(ll),l(Me),l(nl),l(sl),l(ye),l(ol),l(al),l(il),l(me),l(ul),l(rl),l(cl),l(fe),l(pl),l(Ml),l(Te),l(dl),l(yl),l(hl),l($e),l(ml),l(Ul),l(Cl),l(je),l(fl),l(Ie),l(gl),l(Tl),l(Re),l(wl),l(Q),l(Jl),l(Ne),l($l),l(j),l(bl),l(vl),l(We),l(jl),l(Il),l(Vl),l(qe),l(Rl),l(El),l(Al),l(He),l(Nl),l(Sl),l(Ye),l(Zl),l(Ql),l(Ll),l(Oe),l(Gl),l(xl),l(kl),l(tt),l(_l),l(lt),l(Wl),l(Bl),l(st),l(ql),l(ot),l(Xl),l(Fl),l(it),l(Hl),l(Pl),l(rt),l(Yl),l(Dl),l(zl),l(Mt),l(Ol),l(Kl),l(en),l(tn),l(ln),l(jt)),l(a),m(v,e),m(E,e),m(B,e),m(F,e),m(P,e),m(D,e),m(z,e),m(A,e),m(le,e),m(se,e),m(N,e),m(ie,e),m(S,e),m(pe,e),m(de,e),m(Z,e),m(he,e),m(Ue,e),m(Ce,e),m(ge,e),m(we,e),m(Je,e),m(be,e),m(ve,e),m(Ve,e),m(Ee),m(Ae),m(Ze),m(Le),m(xe),m(_e),m(L,e),m(G,e),m(Be,e),m(Xe,e),m(Fe,e),m(Pe,e),m(De,e),m(ze,e),m(Ke,e),m(et,e),m(nt,e),m(at,e),m(ut,e),m(ct,e),m(pt,e),m(x,e),m(dt,e)}}}const vs='{"title":"Deploy Gemma2 9B with TGI DLC on Cloud Run","local":"deploy-gemma2-9b-with-tgi-dlc-on-cloud-run","sections":[{"title":"Setup / Configuration","local":"setup--configuration","sections":[],"depth":2},{"title":"Deploy TGI on Cloud Run","local":"deploy-tgi-on-cloud-run","sections":[],"depth":2},{"title":"Inference on Cloud Run","local":"inference-on-cloud-run","sections":[{"title":"Via Cloud Run Proxy","local":"via-cloud-run-proxy","sections":[{"title":"cURL","local":"curl","sections":[],"depth":4},{"title":"Python","local":"python","sections":[{"title":"huggingface_hub","local":"huggingfacehub","sections":[],"depth":5},{"title":"openai","local":"openai","sections":[],"depth":5}],"depth":4}],"depth":3},{"title":"(recommended) Via Cloud Run Service URL","local":"recommended-via-cloud-run-service-url","sections":[{"title":"cURL","local":"curl","sections":[],"depth":4},{"title":"Python","local":"python","sections":[{"title":"huggingface_hub","local":"huggingfacehub","sections":[],"depth":5},{"title":"openai","local":"openai","sections":[],"depth":5}],"depth":4}],"depth":3}],"depth":2},{"title":"Resource clean up","local":"resource-clean-up","sections":[],"depth":2},{"title":"References","local":"references","sections":[],"depth":2}],"depth":1}';function js(w){return Ms(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ns extends ds{constructor(a){super(),ys(this,a,js,bs,ps,{})}}export{Ns as component};

Xet Storage Details

Size:
54.7 kB
·
Xet hash:
bd8954ca58916e466f09515b5dce67c122d92deddee8f669eb97d57aed536658

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.