Buckets:

rtrm's picture
download
raw
56.9 kB
import{s as Jn,o as gn,n as ze}from"../chunks/scheduler.b108d059.js";import{S as fn,i as wn,g as o,s as a,r as d,A as bn,h as i,f as l,c as s,j as Un,u as c,x as p,k as Tn,y as In,a as n,v as u,d as h,t as y,w as m}from"../chunks/index.008de539.js";import{T as Ye}from"../chunks/Tip.aeb15ab7.js";import{C as U}from"../chunks/CodeBlock.3968c746.js";import{H as g,E as Cn}from"../chunks/EditOnGithub.d1c48e3d.js";function Gn(J){let M,j='Starting from TGI 2.3 DLC i.e. <code>us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311</code>, and onwards, you can set the environment variable value <code>MESSAGES_API_ENABLED=&quot;true&quot;</code> to deploy the <a href="https://huggingface.co/docs/text-generation-inference/main/en/messages_api" rel="nofollow">Messages API</a> on Vertex AI, otherwise, the <a href="https://huggingface.co/docs/text-generation-inference/main/en/quicktour#consuming-tgi" rel="nofollow">Generate API</a> will be deployed instead.';return{c(){M=o("p"),M.innerHTML=j},l(r){M=i(r,"P",{"data-svelte-h":!0}),p(M)!=="svelte-1jcnsbw"&&(M.innerHTML=j)},m(r,T){n(r,M,T)},p:ze,d(r){r&&l(M)}}}function An(J){let M,j="As mentioned before, since Meta Llama 3.1 405B in FP8 takes ~400 GiB of disk space, that means you need at least 400 GiB of GPU VRAM to load the model, and the GPUs within the node need to support the FP8 data type. In this case, an A3 instance with 8 x NVIDIA H100 80GB with a total of ~640 GiB of VRAM will be used to load the model while also leaving some free VRAM for the KV Cache and the CUDA Graphs.";return{c(){M=o("p"),M.textContent=j},l(r){M=i(r,"P",{"data-svelte-h":!0}),p(M)!=="svelte-1gl89sv"&&(M.textContent=j)},m(r,T){n(r,M,T)},p:ze,d(r){r&&l(M)}}}function vn(J){let M,j='<a href="https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct-FP8" rel="nofollow"><code>meta-llama/Meta-Llama-3.1-405B-Instruct-FP8</code></a> deployment on Vertex AI will take ~30 minutes to deploy, as it needs to allocate the resources on Google Cloud, and then download the weights from the Hugging Face Hub (~10 minutes) and load those for inference in TGI (~3 minutes).';return{c(){M=o("p"),M.innerHTML=j},l(r){M=i(r,"P",{"data-svelte-h":!0}),p(M)!=="svelte-1guha2d"&&(M.innerHTML=j)},m(r,T){n(r,M,T)},p:ze,d(r){r&&l(M)}}}function Zn(J){let M,j="You will need to either retrieve the resource name i.e. the <code>projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}</code> URL yourself via the Google Cloud Console, or just replace the <code>ENDPOINT_ID</code> below that can either be found via the previously instantiated <code>endpoint</code> as <code>endpoint.id</code> or via the Google Cloud Console under the Online predictions where the endpoint is listed.";return{c(){M=o("p"),M.innerHTML=j},l(r){M=i(r,"P",{"data-svelte-h":!0}),p(M)!=="svelte-10r1du6"&&(M.innerHTML=j)},m(r,T){n(r,M,T)},p:ze,d(r){r&&l(M)}}}function $n(J){let M,j='📍 Find the complete example on GitHub <a href="https://github.com/huggingface/Google-Cloud-Containers/tree/gemma2-cloud-run/examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai" rel="nofollow">here</a>!';return{c(){M=o("p"),M.innerHTML=j},l(r){M=i(r,"P",{"data-svelte-h":!0}),p(M)!=="svelte-kjl76a"&&(M.innerHTML=j)},m(r,T){n(r,M,T)},p:ze,d(r){r&&l(M)}}}function _n(J){let M,j,r,T,G,qe,A,fl='<a href="https://huggingface.co/blog/llama31" rel="nofollow">Meta Llama 3.1</a> is the latest open LLM from Meta, a follow up iteration of Llama 3, released in July 2024. Meta Llama 3.1 comes in three sizes: 8B for efficient deployment and development on consumer-size GPU, 70B for large-scale AI native applications, and 405B for synthetic data, LLM as a Judge or distillation; among other use cases. Amongst Meta Llama 3.1 new features, the ones to highlight are: a large context length of 128K tokens (vs original 8K), multilingual capabilities, tool usage capabilities, and a more permissive license.',De,v,wl='This example showcases how to deploy <a href="https://hf.co/meta-llama/Meta-Llama-3.1-405B-Instruct-FP8" rel="nofollow"><code>meta-llama/Meta-Llama-3.1-405B-Instruct-FP8</code></a> on Vertex AI with an A3 accelerator-optimized instance with 8 NVIDIA H100s via the Hugging Face purpose-built Deep Learning Container (DLC) for Text Generation Inference (TGI) on Google Cloud.',Pe,Z,bl='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/gemma2-cloud-run/examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai/assets/model-in-hf-hub.png" alt="&#39;meta-llama/Meta-Llama-3.1-405B-Instruct-FP8&#39; in the Hugging Face Hub"/>',Oe,$,Ke,_,Il='First, you need to install <code>gcloud</code> in your local machine, which is the command-line tool for Google Cloud, following the instructions at <a href="https://cloud.google.com/sdk/docs/install" rel="nofollow">Cloud SDK Documentation - Install the gcloud CLI</a>.',et,B,Cl="Then, you also need to install the <code>google-cloud-aiplatform</code> Python SDK, required to programmatically create the Vertex AI model, register it, acreate the endpoint, and deploy it on Vertex AI.",tt,N,lt,x,Gl="Optionally, to ease the usage of the commands within this tutorial, you need to set the following environment variables for GCP:",nt,W,at,V,Al="Then you need to login into your GCP account and set the project ID to the one you want to use to register and deploy the models on Vertex AI.",st,R,ot,k,vl="Once you are logged in, you need to enable the necessary service APIs in GCP, such as the Vertex AI API, the Compute Engine API, and Google Container Registry related APIs.",it,Q,Mt,E,Zl='Once everything is set up, you can already initialize the Vertex AI session via the <a href="https://github.com/googleapis/python-aiplatform" rel="nofollow"><code>google-cloud-aiplatform</code></a> Python SDK as follows:',pt,H,rt,F,dt,X,$l='To serve <a href="https://hf.co/meta-llama/Meta-Llama-3.1-405B-Instruct-FP8" rel="nofollow"><code>meta-llama/Meta-Llama-3.1-405B-Instruct-FP8</code></a> you need an instance with at least 400GiB of GPU VRAM that supports the FP8 data-type, and the A3 accelerator-optimized machines on Google Cloud are the machines you would need to use.',ct,S,_l='Even if the A3 accelerator-optimized machines with 8 x NVIDIA H100 80GB GPUs are available within Google Cloud, you will still need to request a custom quota increase in Google Cloud, as those need a specific approval. Note that the A3 accelerator-optimized machines are only available in some zones, so make sure to check the availability of both A3 High or even A3 Mega per zone at <a href="https://cloud.google.com/compute/docs/gpus/gpu-regions-zones" rel="nofollow">Compute Engine - GPU regions and zones</a>.',ut,L,Bl='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/gemma2-cloud-run/examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai/assets/a3-general-availability.png" alt="A3 availability in Google Cloud"/>',ht,Y,Nl="In this case, to request a quota increase to use the machine with 8 NVIDIA H100s you will need to increase the following quotas:",yt,z,xl="<li><code>Service: Vertex AI API</code> and <code>Name: Custom model serving Nvidia H100 80GB GPUs per region</code> set to <strong>8</strong></li> <li><code>Service: Vertex AI API</code> and <code>Name: Custom model serving A3 CPUs per region</code> set to <strong>208</strong></li>",mt,q,Wl='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/gemma2-cloud-run/examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai/assets/a3-quota-request.png" alt="A3 Quota Request in Google Cloud"/>',jt,D,Vl='Read more on how to request a quota increase at <a href="https://cloud.google.com/docs/quotas/view-manage" rel="nofollow">Google Cloud Documentation - View and manage quotas</a>.',Ut,P,Tt,O,Rl='Since <a href="https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct-FP8" rel="nofollow"><code>meta-llama/Meta-Llama-3.1-405B-Instruct-FP8</code></a> is a gated model, you need to login into your Hugging Face Hub account, accept the gating requirements, and then generate an access token either with fine-grained read access to the gated model only (recommended), or read-access to your account.',Jt,K,kl='Read more about <a href="https://huggingface.co/docs/hub/en/security-tokens" rel="nofollow">access tokens for the Hugging Face Hub</a>.',gt,ee,Ql='To authenticate, you can either use the <a href="https://github.com/huggingface/huggingface_hub" rel="nofollow"><code>huggingface_hub</code></a> Python SDK as shown below (recommended), or just set the environment variable <code>HF_TOKEN</code> instead.',ft,te,wt,le,bt,ne,El="Then you can already “upload” the model i.e. register the model on Vertex AI. It is not an upload per se, since the model will be automatically downloaded from the Hugging Face Hub in the Hugging Face DLC for TGI on startup via the <code>MODEL_ID</code> environment variable, so what is uploaded is only the configuration, not the model weights.",It,ae,Hl="Before going into the code, let’s quickly review the arguments provided to the <code>upload</code> method:",Ct,se,Fl='<li><strong><code>display_name</code></strong> is the name that will be shown in the Vertex AI Model Registry.</li> <li><strong><code>serving_container_image_uri</code></strong> is the location of the Hugging Face DLC for TGI that will be used for serving the model.</li> <li><strong><code>serving_container_environment_variables</code></strong> are the environment variables that will be used during the container runtime, so these are aligned with the environment variables defined by TGI via the <a href="https://huggingface.co/docs/text-generation-inference/en/basic_tutorials/launcher" rel="nofollow"><code>text-generation-launcher</code></a>, which exposes some environment variables such as the following:<ul><li><code>MODEL_ID</code> the model ID on the Hugging Face Hub.</li> <li><code>NUM_SHARD</code> the number of shards to use i.e. the number of GPUs to use, in this case set to 8 as a node with 8 NVIDIA H100s will be used.</li> <li><code>HUGGING_FACE_HUB_TOKEN</code> is the Hugging Face Hub token, required as <a href="https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct-FP8" rel="nofollow"><code>meta-llama/Meta-Llama-3.1-405B-Instruct-FP8</code></a> is a gated model.</li> <li><code>HF_HUB_ENABLE_HF_TRANSFER</code> to enable a faster download speed via the <a href="https://github.com/huggingface/hf_transfer" rel="nofollow"><code>hf_transfer</code></a> library.</li></ul></li>',Gt,oe,Xl='For more information on the supported arguments, check <a href="https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.Model#google_cloud_aiplatform_Model_upload" rel="nofollow"><code>aiplatform.Model.upload</code> Python reference</a>.',At,f,vt,ie,Zt,Me,Sl='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/gemma2-cloud-run/examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai/assets/vertex-ai-model.png" alt="Meta Llama 3.1 405B FP8 registered on Vertex AI"/>',$t,pe,_t,re,Ll="Once Meta Llama 3.1 405B is registered on Vertex AI Model Registry, you can already deploy it on a Vertex AI Endpoint with the Hugging Face DLC for TGI.",Bt,de,Yl="The <code>deploy</code> method will link the previously created endpoint resource with the model that contains the configuration of the serving container, and then, it will deploy the model on Vertex AI in the specified instance.",Nt,ce,zl="Before going into the code, let’s quickly review the arguments provided to the <code>deploy</code> method:",xt,ue,ql='<li><strong><code>endpoint</code></strong> is the endpoint to deploy the model to, which is optional, and by default will be set to the model display name with the <code>_endpoint</code> suffix.</li> <li><strong><code>machine_type</code></strong>, <strong><code>accelerator_type</code></strong> and <strong><code>accelerator_count</code></strong> are arguments that define which instance to use, and additionally, the accelerator to use and the number of accelerators, respectively. The <code>machine_type</code> and the <code>accelerator_type</code> are tied together, so you will need to select an instance that supports the accelerator that you are using and vice-versa. More information about the different instances at <a href="https://cloud.google.com/compute/docs/gpus" rel="nofollow">Compute Engine Documentation - GPU machine types</a>, and about the <code>accelerator_type</code> naming at <a href="https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec" rel="nofollow">Vertex AI Documentation - MachineSpec</a>.</li>',Wt,he,Dl='For more information on the supported arguments you can check <a href="https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.Model#google_cloud_aiplatform_Model_deploy" rel="nofollow"><code>aiplatform.Model.deploy</code> Python reference</a>.',Vt,w,Rt,ye,kt,b,Qt,me,Pl='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/gemma2-cloud-run/examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai/assets/vertex-ai-endpoint.png" alt="Meta Llama 3.1 405B Instruct FP8 deployed on Vertex AI"/>',Et,je,Ht,Ue,Ol="Finally, you can run the online predictions on Vertex AI using the <code>predict</code> method, which will send the requests to the running endpoint in the <code>/predict</code> route specified within the container following Vertex AI I/O payload formatting.",Ft,Te,Kl="As <code>/generate</code> is the endpoint that is being exposed through TGI on Vertex AI, you will need to format the messages with the chat template before sending the request to Vertex AI, so you will need to install 🤗<code>transformers</code> to use the <code>apply_chat_template</code> method from the <code>PreTrainedTokenizerFast</code>.",Xt,Je,St,ge,en="And then apply the chat template to a conversation using the tokenizer as follows:",Lt,fe,Yt,we,tn='Which is what you will be sending within the payload to the deployed Vertex AI Endpoint, as well as the generation parameters as in <a href="https://huggingface.co/docs/huggingface_hub/main/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation" rel="nofollow">Consuming Text Generation Inference (TGI) -&gt; Generate</a>.',zt,be,qt,Ie,Dt,Ce,ln="If you are willing to run the online prediction within the current session, you can send requests programmatically via the <code>aiplatform.Endpoint</code> (returned by the <code>aiplatform.Model.deploy</code> method) as in the following snippet:",Pt,Ge,Ot,Ae,nn="Producing the following <code>output</code>:",Kt,ve,el,Ze,tl,$e,an="If the Vertex AI Endpoint was deployed in a different session and you want to use it but don’t have access to the <code>deployed_model</code> variable returned by the <code>aiplatform.Model.deploy</code> method as in the previous section; you can also run the following snippet to instantiate the deployed <code>aiplatform.Endpoint</code> via its resource name as <code>projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}</code>.",ll,I,nl,_e,al,Be,sn="Producing the following <code>output</code>:",sl,Ne,ol,xe,il,We,on="Alternatively, for testing purposes you can also use the Vertex AI Online Prediction UI, that provides a field that expects the JSON payload formatted according to the Vertex AI specification (as in the examples above) being:",Ml,Ve,pl,Re,Mn='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/gemma2-cloud-run/examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai/assets/vertex-ai-online-prediction.png" alt="Meta Llama 3.1 405B Instruct FP8 online prediction on Vertex AI"/>',rl,ke,dl,Qe,pn="Finally, you can release the resources that you’ve created as follows, to avoid unnecessary costs:",cl,Ee,rn="<li><code>deployed_model.undeploy_all</code> to undeploy the model from all the endpoints.</li> <li><code>deployed_model.delete</code> to delete the endpoint/s where the model was deployed gracefully, after the <code>undeploy_all</code> method.</li> <li><code>model.delete</code> to delete the model from the registry.</li>",ul,He,hl,Fe,dn="Alternatively, you can also remove those from the Google Cloud Console following the steps:",yl,Xe,cn="<li>Go to Vertex AI in Google Cloud</li> <li>Go to Deploy and use -&gt; Online prediction</li> <li>Click on the endpoint and then on the deployed model/s to “Undeploy model from endpoint”</li> <li>Then go back to the endpoint list and remove the endpoint</li> <li>Finally, go to Deploy and use -&gt; Model Registry, and remove the model</li>",ml,jl,Ul,C,Tl,Se,Jl,Le,gl;return G=new g({props:{title:"Deploy Meta Llama 3.1 405B with TGI DLC on Vertex AI",local:"deploy-meta-llama-31-405b-with-tgi-dlc-on-vertex-ai",headingTag:"h1"}}),$=new g({props:{title:"Setup / Configuration",local:"setup--configuration",headingTag:"h2"}}),N=new U({props:{code:"IXBpcCUyMGluc3RhbGwlMjAtLXVwZ3JhZGUlMjAtLXF1aWV0JTIwZ29vZ2xlLWNsb3VkLWFpcGxhdGZvcm0=",highlighted:"!pip install --upgrade --quiet google-cloud-aiplatform",wrap:!1}}),W=new U({props:{code:"JTI1ZW52JTIwUFJPSkVDVF9JRCUzRHlvdXItcHJvamVjdC1pZCUwQSUyNWVudiUyMExPQ0FUSU9OJTNEeW91ci1sb2NhdGlvbiUwQSUyNWVudiUyMENPTlRBSU5FUl9VUkklM0R1cy1kb2NrZXIucGtnLmRldiUyRmRlZXBsZWFybmluZy1wbGF0Zm9ybS1yZWxlYXNlJTJGZ2NyLmlvJTJGaHVnZ2luZ2ZhY2UtdGV4dC1nZW5lcmF0aW9uLWluZmVyZW5jZS1jdTEyNC4yLTMudWJ1bnR1MjIwNC5weTMxMQ==",highlighted:`%env PROJECT_ID=your-project-<span class="hljs-built_in">id</span>
%env LOCATION=your-location
%env CONTAINER_URI=us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124<span class="hljs-number">.2</span>-<span class="hljs-number">3.</span>ubuntu2204.py311`,wrap:!1}}),R=new U({props:{code:"IWdjbG91ZCUyMGF1dGglMjBsb2dpbiUwQSFnY2xvdWQlMjBhdXRoJTIwYXBwbGljYXRpb24tZGVmYXVsdCUyMGxvZ2luJTIwJTIwJTIzJTIwRm9yJTIwbG9jYWwlMjBkZXZlbG9wbWVudCUwQSFnY2xvdWQlMjBjb25maWclMjBzZXQlMjBwcm9qZWN0JTIwJTI0UFJPSkVDVF9JRA==",highlighted:`!gcloud auth login
!gcloud auth application-default login <span class="hljs-comment"># For local development</span>
!gcloud config <span class="hljs-built_in">set</span> project $PROJECT_ID`,wrap:!1}}),Q=new U({props:{code:"IWdjbG91ZCUyMHNlcnZpY2VzJTIwZW5hYmxlJTIwYWlwbGF0Zm9ybS5nb29nbGVhcGlzLmNvbSUwQSFnY2xvdWQlMjBzZXJ2aWNlcyUyMGVuYWJsZSUyMGNvbXB1dGUuZ29vZ2xlYXBpcy5jb20lMEEhZ2Nsb3VkJTIwc2VydmljZXMlMjBlbmFibGUlMjBjb250YWluZXIuZ29vZ2xlYXBpcy5jb20lMEEhZ2Nsb3VkJTIwc2VydmljZXMlMjBlbmFibGUlMjBjb250YWluZXJyZWdpc3RyeS5nb29nbGVhcGlzLmNvbSUwQSFnY2xvdWQlMjBzZXJ2aWNlcyUyMGVuYWJsZSUyMGNvbnRhaW5lcmZpbGVzeXN0ZW0uZ29vZ2xlYXBpcy5jb20=",highlighted:`!gcloud services enable aiplatform.googleapis.com
!gcloud services enable compute.googleapis.com
!gcloud services enable container.googleapis.com
!gcloud services enable containerregistry.googleapis.com
!gcloud services enable containerfilesystem.googleapis.com`,wrap:!1}}),H=new U({props:{code:"aW1wb3J0JTIwb3MlMEFmcm9tJTIwZ29vZ2xlLmNsb3VkJTIwaW1wb3J0JTIwYWlwbGF0Zm9ybSUwQSUwQWFpcGxhdGZvcm0uaW5pdCglMEElMjAlMjAlMjAlMjBwcm9qZWN0JTNEb3MuZ2V0ZW52KCUyMlBST0pFQ1RfSUQlMjIpJTJDJTBBJTIwJTIwJTIwJTIwbG9jYXRpb24lM0Rvcy5nZXRlbnYoJTIyTE9DQVRJT04lMjIpJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> google.cloud <span class="hljs-keyword">import</span> aiplatform
aiplatform.init(
project=os.getenv(<span class="hljs-string">&quot;PROJECT_ID&quot;</span>),
location=os.getenv(<span class="hljs-string">&quot;LOCATION&quot;</span>),
)`,wrap:!1}}),F=new g({props:{title:"Quotas on Google Cloud",local:"quotas-on-google-cloud",headingTag:"h3"}}),P=new g({props:{title:"Register model on Vertex AI",local:"register-model-on-vertex-ai",headingTag:"h2"}}),te=new U({props:{code:"IXBpcCUyMGluc3RhbGwlMjAtLXVwZ3JhZGUlMjAtLXF1aWV0JTIwaHVnZ2luZ2ZhY2VfaHVi",highlighted:"!pip install --upgrade --quiet huggingface_hub",wrap:!1}}),le=new U({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGludGVycHJldGVyX2xvZ2luJTBBJTBBaW50ZXJwcmV0ZXJfbG9naW4oKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> interpreter_login
interpreter_login()`,wrap:!1}}),f=new Ye({props:{$$slots:{default:[Gn]},$$scope:{ctx:J}}}),ie=new U({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGdldF90b2tlbiUwQSUwQW1vZGVsJTIwJTNEJTIwYWlwbGF0Zm9ybS5Nb2RlbC51cGxvYWQoJTBBJTIwJTIwJTIwJTIwZGlzcGxheV9uYW1lJTNEJTIybWV0YS1sbGFtYS0tTWV0YS1MbGFtYS0zLjEtNDA1Qi1JbnN0cnVjdC1GUDglMjIlMkMlMEElMjAlMjAlMjAlMjBzZXJ2aW5nX2NvbnRhaW5lcl9pbWFnZV91cmklM0QlMjIlMjIlMkMlMEElMjAlMjAlMjAlMjBzZXJ2aW5nX2NvbnRhaW5lcl9lbnZpcm9ubWVudF92YXJpYWJsZXMlM0QlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJNT0RFTF9JRCUyMiUzQSUyMCUyMm1ldGEtbGxhbWElMkZNZXRhLUxsYW1hLTMuMS00MDVCLUluc3RydWN0LUZQOCUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMkhVR0dJTkdfRkFDRV9IVUJfVE9LRU4lMjIlM0ElMjBnZXRfdG9rZW4oKSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMkhGX0hVQl9FTkFCTEVfSEZfVFJBTlNGRVIlMjIlM0ElMjAlMjIxJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyTlVNX1NIQVJEJTIyJTNBJTIwJTIyOCUyMiUyQyUwQSUyMCUyMCUyMCUyMCU3RCUyQyUwQSklMEFtb2RlbC53YWl0KCk=",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> get_token
model = aiplatform.Model.upload(
display_name=<span class="hljs-string">&quot;meta-llama--Meta-Llama-3.1-405B-Instruct-FP8&quot;</span>,
serving_container_image_uri=<span class="hljs-string">&quot;&quot;</span>,
serving_container_environment_variables={
<span class="hljs-string">&quot;MODEL_ID&quot;</span>: <span class="hljs-string">&quot;meta-llama/Meta-Llama-3.1-405B-Instruct-FP8&quot;</span>,
<span class="hljs-string">&quot;HUGGING_FACE_HUB_TOKEN&quot;</span>: get_token(),
<span class="hljs-string">&quot;HF_HUB_ENABLE_HF_TRANSFER&quot;</span>: <span class="hljs-string">&quot;1&quot;</span>,
<span class="hljs-string">&quot;NUM_SHARD&quot;</span>: <span class="hljs-string">&quot;8&quot;</span>,
},
)
model.wait()`,wrap:!1}}),pe=new g({props:{title:"Deploy model on Vertex AI",local:"deploy-model-on-vertex-ai",headingTag:"h2"}}),w=new Ye({props:{$$slots:{default:[An]},$$scope:{ctx:J}}}),ye=new U({props:{code:"ZGVwbG95ZWRfbW9kZWwlMjAlM0QlMjBtb2RlbC5kZXBsb3koJTBBJTIwJTIwJTIwJTIwZW5kcG9pbnQlM0RhaXBsYXRmb3JtLkVuZHBvaW50LmNyZWF0ZShkaXNwbGF5X25hbWUlM0QlMjJNZXRhLUxsYW1hLTMuMS00MDVCLUZQOC1FbmRwb2ludCUyMiklMkMlMEElMjAlMjAlMjAlMjBtYWNoaW5lX3R5cGUlM0QlMjJhMy1oaWdoZ3B1LThnJTIyJTJDJTBBJTIwJTIwJTIwJTIwYWNjZWxlcmF0b3JfdHlwZSUzRCUyMk5WSURJQV9IMTAwXzgwR0IlMjIlMkMlMEElMjAlMjAlMjAlMjBhY2NlbGVyYXRvcl9jb3VudCUzRDglMkMlMEElMjAlMjAlMjAlMjBlbmFibGVfYWNjZXNzX2xvZ2dpbmclM0RUcnVlJTJDJTBBKQ==",highlighted:`deployed_model = model.deploy(
endpoint=aiplatform.Endpoint.create(display_name=<span class="hljs-string">&quot;Meta-Llama-3.1-405B-FP8-Endpoint&quot;</span>),
machine_type=<span class="hljs-string">&quot;a3-highgpu-8g&quot;</span>,
accelerator_type=<span class="hljs-string">&quot;NVIDIA_H100_80GB&quot;</span>,
accelerator_count=<span class="hljs-number">8</span>,
enable_access_logging=<span class="hljs-literal">True</span>,
)`,wrap:!1}}),b=new Ye({props:{warning:!0,$$slots:{default:[vn]},$$scope:{ctx:J}}}),je=new g({props:{title:"Online predictions on Vertex AI",local:"online-predictions-on-vertex-ai",headingTag:"h2"}}),Je=new U({props:{code:"JTI1JTI1YmFzaCUwQXBpcCUyMGluc3RhbGwlMjAtLXVwZ3JhZGUlMjAtLXF1aWV0JTIwdHJhbnNmb3JtZXJz",highlighted:`%%bash
pip install --upgrade --quiet transformers`,wrap:!1}}),fe=new U({props:{code:"aW1wb3J0JTIwb3MlMEFmcm9tJTIwaHVnZ2luZ2ZhY2VfaHViJTIwaW1wb3J0JTIwZ2V0X3Rva2VuJTBBZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJtZXRhLWxsYW1hJTJGTWV0YS1MbGFtYS0zLjEtNDA1Qi1JbnN0cnVjdC1GUDglMjIlMkMlMEElMjAlMjAlMjAlMjB0b2tlbiUzRGdldF90b2tlbigpJTJDJTBBKSUwQSUwQW1lc3NhZ2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnN5c3RlbSUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJZb3UlMjBhcmUlMjBhbiUyMGFzc2lzdGFudCUyMHRoYXQlMjByZXNwb25kcyUyMGFzJTIwYSUyMHBpcmF0ZS4lMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJXaGF0J3MlMjB0aGUlMjBUaGVvcnklMjBvZiUyMFJlbGF0aXZpdHklM0YlMjIlN0QlMkMlMEElNUQlMEFpbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIuYXBwbHlfY2hhdF90ZW1wbGF0ZSglMEElMjAlMjAlMjAlMjBtZXNzYWdlcyUyQyUwQSUyMCUyMCUyMCUyMHRva2VuaXplJTNERmFsc2UlMkMlMEElMjAlMjAlMjAlMjBhZGRfZ2VuZXJhdGlvbl9wcm9tcHQlM0RUcnVlJTJDJTBBKSUwQSUyMyUyMCUzQyU3Q2JlZ2luX29mX3RleHQlN0MlM0UlM0MlN0NzdGFydF9oZWFkZXJfaWQlN0MlM0VzeXN0ZW0lM0MlN0NlbmRfaGVhZGVyX2lkJTdDJTNFJTVDbiU1Q25Zb3UlMjBhcmUlMjBhbiUyMGFzc2lzdGFudCUyMHRoYXQlMjByZXNwb25kcyUyMGFzJTIwYSUyMHBpcmF0ZS4lM0MlN0Nlb3RfaWQlN0MlM0UlM0MlN0NzdGFydF9oZWFkZXJfaWQlN0MlM0V1c2VyJTNDJTdDZW5kX2hlYWRlcl9pZCU3QyUzRSU1Q24lNUNuV2hhdCdzJTIwdGhlJTIwVGhlb3J5JTIwb2YlMjBSZWxhdGl2aXR5JTNGJTNDJTdDZW90X2lkJTdDJTNFJTNDJTdDc3RhcnRfaGVhZGVyX2lkJTdDJTNFYXNzaXN0YW50JTNDJTdDZW5kX2hlYWRlcl9pZCU3QyUzRSU1Q24lNUNu",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> get_token
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(
<span class="hljs-string">&quot;meta-llama/Meta-Llama-3.1-405B-Instruct-FP8&quot;</span>,
token=get_token(),
)
messages = [
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;system&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;You are an assistant that responds as a pirate.&quot;</span>},
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What&#x27;s the Theory of Relativity?&quot;</span>},
]
inputs = tokenizer.apply_chat_template(
messages,
tokenize=<span class="hljs-literal">False</span>,
add_generation_prompt=<span class="hljs-literal">True</span>,
)
<span class="hljs-comment"># &lt;|begin_of_text|&gt;&lt;|start_header_id|&gt;system&lt;|end_header_id|&gt;\\n\\nYou are an assistant that responds as a pirate.&lt;|eot_id|&gt;&lt;|start_header_id|&gt;user&lt;|end_header_id|&gt;\\n\\nWhat&#x27;s the Theory of Relativity?&lt;|eot_id|&gt;&lt;|start_header_id|&gt;assistant&lt;|end_header_id|&gt;\\n\\n</span>`,wrap:!1}}),be=new g({props:{title:"Via Python",local:"via-python",headingTag:"h3"}}),Ie=new g({props:{title:"Within the same session",local:"within-the-same-session",headingTag:"h4"}}),Ge=new U({props:{code:"b3V0cHV0JTIwJTNEJTIwZGVwbG95ZWRfbW9kZWwucHJlZGljdCglMEElMjAlMjAlMjAlMjBpbnN0YW5jZXMlM0QlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJpbnB1dHMlMjIlM0ElMjAlMjIlM0MlN0NiZWdpbl9vZl90ZXh0JTdDJTNFJTNDJTdDc3RhcnRfaGVhZGVyX2lkJTdDJTNFc3lzdGVtJTNDJTdDZW5kX2hlYWRlcl9pZCU3QyUzRSU1Q24lNUNuWW91JTIwYXJlJTIwYW4lMjBhc3Npc3RhbnQlMjB0aGF0JTIwcmVzcG9uZHMlMjBhcyUyMGElMjBwaXJhdGUuJTNDJTdDZW90X2lkJTdDJTNFJTNDJTdDc3RhcnRfaGVhZGVyX2lkJTdDJTNFdXNlciUzQyU3Q2VuZF9oZWFkZXJfaWQlN0MlM0UlNUNuJTVDbldoYXQncyUyMHRoZSUyMFRoZW9yeSUyMG9mJTIwUmVsYXRpdml0eSUzRiUzQyU3Q2VvdF9pZCU3QyUzRSUzQyU3Q3N0YXJ0X2hlYWRlcl9pZCU3QyUzRWFzc2lzdGFudCUzQyU3Q2VuZF9oZWFkZXJfaWQlN0MlM0UlNUNuJTVDbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnBhcmFtZXRlcnMlMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJtYXhfbmV3X3Rva2VucyUyMiUzQSUyMDEyOCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmRvX3NhbXBsZSUyMiUzQSUyMFRydWUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ0b3BfcCUyMiUzQSUyMDAuOTUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ0ZW1wZXJhdHVyZSUyMiUzQSUyMDEuMCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3RCUyQyUwQSUyMCUyMCUyMCUyMCU1RCUwQSklMEFwcmludChvdXRwdXQucHJlZGljdGlvbnMlNUIwJTVEKQ==",highlighted:`output = deployed_model.predict(
instances=[
{
<span class="hljs-string">&quot;inputs&quot;</span>: <span class="hljs-string">&quot;&lt;|begin_of_text|&gt;&lt;|start_header_id|&gt;system&lt;|end_header_id|&gt;\\n\\nYou are an assistant that responds as a pirate.&lt;|eot_id|&gt;&lt;|start_header_id|&gt;user&lt;|end_header_id|&gt;\\n\\nWhat&#x27;s the Theory of Relativity?&lt;|eot_id|&gt;&lt;|start_header_id|&gt;assistant&lt;|end_header_id|&gt;\\n\\n&quot;</span>,
<span class="hljs-string">&quot;parameters&quot;</span>: {
<span class="hljs-string">&quot;max_new_tokens&quot;</span>: <span class="hljs-number">128</span>,
<span class="hljs-string">&quot;do_sample&quot;</span>: <span class="hljs-literal">True</span>,
<span class="hljs-string">&quot;top_p&quot;</span>: <span class="hljs-number">0.95</span>,
<span class="hljs-string">&quot;temperature&quot;</span>: <span class="hljs-number">1.0</span>,
},
},
]
)
<span class="hljs-built_in">print</span>(output.predictions[<span class="hljs-number">0</span>])`,wrap:!1}}),ve=new U({props:{code:"UHJlZGljdGlvbihwcmVkaWN0aW9ucyUzRCU1QiUyMlllciUyMHdhbnQlMjB0YSUyMGtub3clMjBhYm91dCUyMHRoZW0lMjBmYW5jeSUyMHNjaWVuY2UlMjB0aGluZ3MlMkMlMjBlaCUzRiUyMEFscmlnaHQlMjB0aGVuJTJDJTIwbWF0ZXklMkMlMjBzZXR0bGUlMjB5ZXJzZWxmJTIwZG93biUyMHdpdGglMjBhJTIwcGludCUyMG8nJTIwZ3JvZyUyMGFuZCUyMGxpc3RlbiUyMGNsb3NlLiUyMEklMjBiZSUyMHRlbGxpbiclMjB5ZSUyMGFib3V0JTIwdGhlJTIwVGhlb3J5JTIwbyclMjBSZWxhdGl2aXR5JTJDJTIwYXMlMjBwcm9wb3NlZCUyMGJ5JTIwdGhhdCUyMHN3YXNoYnVja2xpbiclMjBnZW5pdXMlMkMlMjBBbGJlcnQlMjBFaW5zdGVpbi4lNUNuJTVDbk5vdyUyQyUyMHllJTIwc2VlJTJDJTIwRWluc3RlaW4lMjBzYWlkJTIwdGhhdCUyMHRpbWUlMjBhbmQlMjBzcGFjZSUyMGJlJTIwY29ubmVjdGVkJTIwbGlrZSUyMHRoZSUyMHNlYSUyMGFuZCUyMHRoZSUyMHdpbmQuJTIwWWUlMjBjYW4ndCUyMGhhdmUlMjBvbmUlMjB3aXRob3V0JTIwdGhlJTIwb3RoZXIlMkMlMjBzYXZ2eSUzRiUyMEFuZCUyMGhlJTIwcHJvcG9zZWQlMjB0aGF0JTIwaG93JTIweWUlMjBzZWUlMjB0aW1lJTIwYW5kJTIwc3BhY2UlMjBkZXBlbmRzJTIwb24lMjBob3clMjBmYXN0JTIweWUlMjBiZSUyMG1vdmluJyUyMGFuZCUyMHdoZXJlJTIweWUlMjBiZSUyMHN0YW5kaW4nLiUyMFRoYXQlMjBiZSUyMGNhbGxlZCUyMHJlbGF0aXZpdHklMkMlMjBtZSUyMiU1RCUyQyUyMGRlcGxveWVkX21vZGVsX2lkJTNEJyoqKiclMkMlMjBtZXRhZGF0YSUzRE5vbmUlMkMlMjBtb2RlbF92ZXJzaW9uX2lkJTNEJzEnJTJDJTIwbW9kZWxfcmVzb3VyY2VfbmFtZSUzRCdwcm9qZWN0cyUyRioqKiUyRmxvY2F0aW9ucyUyRnVzLWNlbnRyYWwxJTJGbW9kZWxzJTJGKioqJyUyQyUyMGV4cGxhbmF0aW9ucyUzRE5vbmUp",highlighted:'<span class="hljs-type">Prediction</span>(predictions=[<span class="hljs-comment">&quot;Yer want ta know about them fancy science things, eh? Alright then, matey, settle yerself down with a pint o&#x27; grog and listen close. I be tellin&#x27; ye about the Theory o&#x27; Relativity, as proposed by that swashbucklin&#x27; genius, Albert Einstein.\\n\\nNow, ye see, Einstein said that time and space be connected like the sea and the wind. Ye can&#x27;t have one without the other, savvy? And he proposed that how ye see time and space depends on how fast ye be movin&#x27; and where ye be standin&#x27;. That be called relativity, me&quot;</span>], deployed_model_id=<span class="hljs-string">&#x27;***&#x27;</span>, metadata=<span class="hljs-type">None</span>, model_version_id=<span class="hljs-string">&#x27;1&#x27;</span>, model_resource_name=<span class="hljs-string">&#x27;projects/***/locations/us-central1/models/***&#x27;</span>, explanations=<span class="hljs-type">None</span>)',wrap:!1}}),Ze=new g({props:{title:"From a different session",local:"from-a-different-session",headingTag:"h4"}}),I=new Ye({props:{$$slots:{default:[Zn]},$$scope:{ctx:J}}}),_e=new U({props:{code:"aW1wb3J0JTIwb3MlMEFmcm9tJTIwZ29vZ2xlLmNsb3VkJTIwaW1wb3J0JTIwYWlwbGF0Zm9ybSUwQSUwQWFpcGxhdGZvcm0uaW5pdChwcm9qZWN0JTNEb3MuZ2V0ZW52KCUyMlBST0pFQ1RfSUQlMjIpJTJDJTIwbG9jYXRpb24lM0Rvcy5nZXRlbnYoJTIyTE9DQVRJT04lMjIpKSUwQSUwQWVuZHBvaW50X2Rpc3BsYXlfbmFtZSUyMCUzRCUyMCUyMk1ldGEtTGxhbWEtMy4xLTQwNUItRlA4LUVuZHBvaW50JTIyJTIwJTIwJTIzJTIwVE9ETyUzQSUyMGNoYW5nZSUyMHRvJTIweW91ciUyMGVuZHBvaW50JTIwZGlzcGxheSUyMG5hbWUlMEElMEElMjMlMjBJdGVyYXRlcyUyMG92ZXIlMjBhbGwlMjB0aGUlMjBWZXJ0ZXglMjBBSSUyMEVuZHBvaW50cyUyMHdpdGhpbiUyMHRoZSUyMGN1cnJlbnQlMjBwcm9qZWN0JTIwYW5kJTIwa2VlcHMlMjB0aGUlMjBmaXJzdCUyMG1hdGNoJTIwKGlmJTIwYW55KSUyQyUyMG90aGVyd2lzZSUyMHNldCUyMHRvJTIwTm9uZSUwQUVORFBPSU5UX0lEJTIwJTNEJTIwbmV4dCglMEElMjAlMjAlMjAlMjAoZW5kcG9pbnQubmFtZSUyMGZvciUyMGVuZHBvaW50JTIwaW4lMjBhaXBsYXRmb3JtLkVuZHBvaW50Lmxpc3QoKSUyMGlmJTIwZW5kcG9pbnQuZGlzcGxheV9uYW1lJTIwJTNEJTNEJTIwZW5kcG9pbnRfZGlzcGxheV9uYW1lKSUyQyUyME5vbmUlMEEpJTBBYXNzZXJ0JTIwRU5EUE9JTlRfSUQlMkMlMjAoJTBBJTIwJTIwJTIwJTIwJTIyJTYwRU5EUE9JTlRfSUQlNjAlMjBpcyUyMG5vdCUyMHNldCUyQyUyMHBsZWFzZSUyMG1ha2UlMjBzdXJlJTIwdGhhdCUyMHRoZSUyMCU2MGVuZHBvaW50X2Rpc3BsYXlfbmFtZSU2MCUyMGlzJTIwY29ycmVjdCUyMGF0JTIwJTIyJTBBJTIwJTIwJTIwJTIwZiUyMmh0dHBzJTNBJTJGJTJGY29uc29sZS5jbG91ZC5nb29nbGUuY29tJTJGdmVydGV4LWFpJTJGb25saW5lLXByZWRpY3Rpb24lMkZlbmRwb2ludHMlM0Zwcm9qZWN0JTNEJTdCb3MuZ2V0ZW52KCdQUk9KRUNUX0lEJyklN0QlMjIlMEEpJTBBJTBBZW5kcG9pbnQlMjAlM0QlMjBhaXBsYXRmb3JtLkVuZHBvaW50KCUwQSUyMCUyMCUyMCUyMGYlMjJwcm9qZWN0cyUyRiU3Qm9zLmdldGVudignUFJPSkVDVF9JRCcpJTdEJTJGbG9jYXRpb25zJTJGJTdCb3MuZ2V0ZW52KCdMT0NBVElPTicpJTdEJTJGZW5kcG9pbnRzJTJGJTdCRU5EUE9JTlRfSUQlN0QlMjIlMEEpJTBBb3V0cHV0JTIwJTNEJTIwZW5kcG9pbnQucHJlZGljdCglMEElMjAlMjAlMjAlMjBpbnN0YW5jZXMlM0QlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJpbnB1dHMlMjIlM0ElMjAlMjIlM0MlN0NiZWdpbl9vZl90ZXh0JTdDJTNFJTNDJTdDc3RhcnRfaGVhZGVyX2lkJTdDJTNFc3lzdGVtJTNDJTdDZW5kX2hlYWRlcl9pZCU3QyUzRSU1Q24lNUNuWW91JTIwYXJlJTIwYW4lMjBhc3Npc3RhbnQlMjB0aGF0JTIwcmVzcG9uZHMlMjBhcyUyMGElMjBwaXJhdGUuJTNDJTdDZW90X2lkJTdDJTNFJTNDJTdDc3RhcnRfaGVhZGVyX2lkJTdDJTNFdXNlciUzQyU3Q2VuZF9oZWFkZXJfaWQlN0MlM0UlNUNuJTVDbldoYXQncyUyMHRoZSUyMFRoZW9yeSUyMG9mJTIwUmVsYXRpdml0eSUzRiUzQyU3Q2VvdF9pZCU3QyUzRSUzQyU3Q3N0YXJ0X2hlYWRlcl9pZCU3QyUzRWFzc2lzdGFudCUzQyU3Q2VuZF9oZWFkZXJfaWQlN0MlM0UlNUNuJTVDbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnBhcmFtZXRlcnMlMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJtYXhfbmV3X3Rva2VucyUyMiUzQSUyMDEyOCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmRvX3NhbXBsZSUyMiUzQSUyMFRydWUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ0b3BfcCUyMiUzQSUyMDAuOTUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ0ZW1wZXJhdHVyZSUyMiUzQSUyMDAuNyUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3RCUyQyUwQSUyMCUyMCUyMCUyMCU1RCUyQyUwQSklMEFwcmludChvdXRwdXQucHJlZGljdGlvbnMlNUIwJTVEKQ==",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> google.cloud <span class="hljs-keyword">import</span> aiplatform
aiplatform.init(project=os.getenv(<span class="hljs-string">&quot;PROJECT_ID&quot;</span>), location=os.getenv(<span class="hljs-string">&quot;LOCATION&quot;</span>))
endpoint_display_name = <span class="hljs-string">&quot;Meta-Llama-3.1-405B-FP8-Endpoint&quot;</span> <span class="hljs-comment"># <span class="hljs-doctag">TODO:</span> change to your endpoint display name</span>
<span class="hljs-comment"># Iterates over all the Vertex AI Endpoints within the current project and keeps the first match (if any), otherwise set to None</span>
ENDPOINT_ID = <span class="hljs-built_in">next</span>(
(endpoint.name <span class="hljs-keyword">for</span> endpoint <span class="hljs-keyword">in</span> aiplatform.Endpoint.<span class="hljs-built_in">list</span>() <span class="hljs-keyword">if</span> endpoint.display_name == endpoint_display_name), <span class="hljs-literal">None</span>
)
<span class="hljs-keyword">assert</span> ENDPOINT_ID, (
<span class="hljs-string">&quot;\`ENDPOINT_ID\` is not set, please make sure that the \`endpoint_display_name\` is correct at &quot;</span>
<span class="hljs-string">f&quot;https://console.cloud.google.com/vertex-ai/online-prediction/endpoints?project=<span class="hljs-subst">{os.getenv(<span class="hljs-string">&#x27;PROJECT_ID&#x27;</span>)}</span>&quot;</span>
)
endpoint = aiplatform.Endpoint(
<span class="hljs-string">f&quot;projects/<span class="hljs-subst">{os.getenv(<span class="hljs-string">&#x27;PROJECT_ID&#x27;</span>)}</span>/locations/<span class="hljs-subst">{os.getenv(<span class="hljs-string">&#x27;LOCATION&#x27;</span>)}</span>/endpoints/<span class="hljs-subst">{ENDPOINT_ID}</span>&quot;</span>
)
output = endpoint.predict(
instances=[
{
<span class="hljs-string">&quot;inputs&quot;</span>: <span class="hljs-string">&quot;&lt;|begin_of_text|&gt;&lt;|start_header_id|&gt;system&lt;|end_header_id|&gt;\\n\\nYou are an assistant that responds as a pirate.&lt;|eot_id|&gt;&lt;|start_header_id|&gt;user&lt;|end_header_id|&gt;\\n\\nWhat&#x27;s the Theory of Relativity?&lt;|eot_id|&gt;&lt;|start_header_id|&gt;assistant&lt;|end_header_id|&gt;\\n\\n&quot;</span>,
<span class="hljs-string">&quot;parameters&quot;</span>: {
<span class="hljs-string">&quot;max_new_tokens&quot;</span>: <span class="hljs-number">128</span>,
<span class="hljs-string">&quot;do_sample&quot;</span>: <span class="hljs-literal">True</span>,
<span class="hljs-string">&quot;top_p&quot;</span>: <span class="hljs-number">0.95</span>,
<span class="hljs-string">&quot;temperature&quot;</span>: <span class="hljs-number">0.7</span>,
},
},
],
)
<span class="hljs-built_in">print</span>(output.predictions[<span class="hljs-number">0</span>])`,wrap:!1}}),Ne=new U({props:{code:"UHJlZGljdGlvbihwcmVkaWN0aW9ucyUzRCU1QiUyMlllciUyMGxvb2tpbiclMjBmZXIlMjBhJTIwdHJlYXN1cmUlMjB0cm92ZSUyMG8nJTIwa25vd2xlZGdlJTIwYWJvdXQlMjB0aGVtJTIwZmFuY3klMjBwaHlzaWNzJTJDJTIwZWglM0YlMjBBbHJpZ2h0JTIwdGhlbiUyQyUyMG1hdGV5JTJDJTIwc2V0dGxlJTIweWVyc2VsZiUyMGRvd24lMjB3aXRoJTIwYSUyMHBpbnQlMjBvJyUyMGdyb2clMjBhbmQlMjBsaXN0ZW4lMjBjbG9zZSUyQyUyMGFzJTIwSSUyMHNwaW4lMjB5ZSUyMHRoZSUyMHlhcm4lMjBvJyUyMEVpbnN0ZWluJ3MlMjBUaGVvcnklMjBvJyUyMFJlbGF0aXZpdHkuJTVDbiU1Q25JdCUyMGJlJTIwYSUyMHRhbGUlMjBvJyUyMHR3byUyMHBhcnRzJTJDJTIwbWUlMjBoZWFydHklM0ElMjBTcGVjaWFsJTIwUmVsYXRpdml0eSUyMGFuZCUyMEdlbmVyYWwlMjBSZWxhdGl2aXR5LiUyME5vdyUyQyUyMEklMjBrbm93JTIwd2hhdCUyMHllJTIwYmUlMjB0aGlua2luJyUzQSUyMHdoYXQlMjBpbiUyMGJsYXplcyUyMGJlJTIwdGhlJTIwZGlmZmVyZW5jZSUzRiUyMFdlbGwlMkMlMjBtYXRleSUyQyUyMGxldCUyMG1lJTIwYnJlYWslMjBpdCUyMGRvd24lMjBmZXIlMjB5ZS4lNUNuJTVDblNwZWNpYWwlMjBSZWxhdGl2aXR5JTIwYmUlMjB0aGUlMjBpZGVhJTIwdGhhdCUyMHRpbWUlMjBhbmQlMjBzcGFjZSUyMGJlJTIwY29ubmVjdGVkJTIwbGlrZSUyMHRoZSUyMHNlYSUyMGFuZCUyMHRoZSUyMHNreS4lMjIlNUQlMkMlMjBkZXBsb3llZF9tb2RlbF9pZCUzRCcqKionJTJDJTIwbWV0YWRhdGElM0ROb25lJTJDJTIwbW9kZWxfdmVyc2lvbl9pZCUzRCcxJyUyQyUyMG1vZGVsX3Jlc291cmNlX25hbWUlM0QncHJvamVjdHMlMkYqKiolMkZsb2NhdGlvbnMlMkZ1cy1jZW50cmFsMSUyRm1vZGVscyUyRioqKiclMkMlMjBleHBsYW5hdGlvbnMlM0ROb25lKQ==",highlighted:'Prediction(predictions=[<span class="hljs-string">&quot;Yer lookin&#x27; fer a treasure trove o&#x27; knowledge about them fancy physics, eh? Alright then, matey, settle yerself down with a pint o&#x27; grog and listen close, as I spin ye the yarn o&#x27; Einstein&#x27;s Theory o&#x27; Relativity.\\n\\nIt be a tale o&#x27; two parts, me hearty: Special Relativity and General Relativity. Now, I know what ye be thinkin&#x27;: what in blazes be the difference? Well, matey, let me break it down fer ye.\\n\\nSpecial Relativity be the idea that time and space be connected like the sea and the sky.&quot;</span>], <span class="hljs-attribute">deployed_model_id</span>=<span class="hljs-string">&#x27;***&#x27;</span>, <span class="hljs-attribute">metadata</span>=None, <span class="hljs-attribute">model_version_id</span>=<span class="hljs-string">&#x27;1&#x27;</span>, <span class="hljs-attribute">model_resource_name</span>=<span class="hljs-string">&#x27;projects/***/locations/us-central1/models/***&#x27;</span>, <span class="hljs-attribute">explanations</span>=None)',wrap:!1}}),xe=new g({props:{title:"Via the Vertex AI Online Prediction UI",local:"via-the-vertex-ai-online-prediction-ui",headingTag:"h3"}}),Ve=new U({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyaW5zdGFuY2VzJTIyJTNBJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyaW5wdXRzJTIyJTNBJTIwJTIyJTNDJTdDYmVnaW5fb2ZfdGV4dCU3QyUzRSUzQyU3Q3N0YXJ0X2hlYWRlcl9pZCU3QyUzRXN5c3RlbSUzQyU3Q2VuZF9oZWFkZXJfaWQlN0MlM0UlNUNuJTVDbllvdSUyMGFyZSUyMGFuJTIwYXNzaXN0YW50JTIwdGhhdCUyMHJlc3BvbmRzJTIwYXMlMjBhJTIwcGlyYXRlLiUzQyU3Q2VvdF9pZCU3QyUzRSUzQyU3Q3N0YXJ0X2hlYWRlcl9pZCU3QyUzRXVzZXIlM0MlN0NlbmRfaGVhZGVyX2lkJTdDJTNFJTVDbiU1Q25XaGF0J3MlMjB0aGUlMjBUaGVvcnklMjBvZiUyMFJlbGF0aXZpdHklM0YlM0MlN0Nlb3RfaWQlN0MlM0UlM0MlN0NzdGFydF9oZWFkZXJfaWQlN0MlM0Vhc3Npc3RhbnQlM0MlN0NlbmRfaGVhZGVyX2lkJTdDJTNFJTVDbiU1Q24lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJwYXJhbWV0ZXJzJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybWF4X25ld190b2tlbnMlMjIlM0ElMjAxMjglMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJkb19zYW1wbGUlMjIlM0ElMjB0cnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydG9wX3AlMjIlM0ElMjAwLjk1JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydGVtcGVyYXR1cmUlMjIlM0ElMjAwLjclMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMEElMjAlMjAlMjAlMjAlNUQlMEElN0Q=",highlighted:`<span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;instances&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span>
<span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;inputs&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;&lt;|begin_of_text|&gt;&lt;|start_header_id|&gt;system&lt;|end_header_id|&gt;\\n\\nYou are an assistant that responds as a pirate.&lt;|eot_id|&gt;&lt;|start_header_id|&gt;user&lt;|end_header_id|&gt;\\n\\nWhat&#x27;s the Theory of Relativity?&lt;|eot_id|&gt;&lt;|start_header_id|&gt;assistant&lt;|end_header_id|&gt;\\n\\n&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;parameters&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;max_new_tokens&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-number">128</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;do_sample&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;top_p&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.95</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;temperature&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.7</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">]</span>
<span class="hljs-punctuation">}</span>`,wrap:!1}}),ke=new g({props:{title:"Resource clean-up",local:"resource-clean-up",headingTag:"h2"}}),He=new U({props:{code:"ZGVwbG95ZWRfbW9kZWwudW5kZXBsb3lfYWxsKCklMEFkZXBsb3llZF9tb2RlbC5kZWxldGUoKSUwQW1vZGVsLmRlbGV0ZSgp",highlighted:`deployed_model.undeploy_all()
deployed_model.delete()
model.delete()`,wrap:!1}}),C=new Ye({props:{$$slots:{default:[$n]},$$scope:{ctx:J}}}),Se=new Cn({props:{source:"https://github.com/huggingface/Google-Cloud-Containers/blob/main/docs/source/examples/vertex-ai-notebooks-deploy-llama-3-1-405b-on-vertex-ai.mdx"}}),{c(){M=o("meta"),j=a(),r=o("p"),T=a(),d(G.$$.fragment),qe=a(),A=o("p"),A.innerHTML=fl,De=a(),v=o("p"),v.innerHTML=wl,Pe=a(),Z=o("p"),Z.innerHTML=bl,Oe=a(),d($.$$.fragment),Ke=a(),_=o("p"),_.innerHTML=Il,et=a(),B=o("p"),B.innerHTML=Cl,tt=a(),d(N.$$.fragment),lt=a(),x=o("p"),x.textContent=Gl,nt=a(),d(W.$$.fragment),at=a(),V=o("p"),V.textContent=Al,st=a(),d(R.$$.fragment),ot=a(),k=o("p"),k.textContent=vl,it=a(),d(Q.$$.fragment),Mt=a(),E=o("p"),E.innerHTML=Zl,pt=a(),d(H.$$.fragment),rt=a(),d(F.$$.fragment),dt=a(),X=o("p"),X.innerHTML=$l,ct=a(),S=o("p"),S.innerHTML=_l,ut=a(),L=o("p"),L.innerHTML=Bl,ht=a(),Y=o("p"),Y.textContent=Nl,yt=a(),z=o("ul"),z.innerHTML=xl,mt=a(),q=o("p"),q.innerHTML=Wl,jt=a(),D=o("p"),D.innerHTML=Vl,Ut=a(),d(P.$$.fragment),Tt=a(),O=o("p"),O.innerHTML=Rl,Jt=a(),K=o("p"),K.innerHTML=kl,gt=a(),ee=o("p"),ee.innerHTML=Ql,ft=a(),d(te.$$.fragment),wt=a(),d(le.$$.fragment),bt=a(),ne=o("p"),ne.innerHTML=El,It=a(),ae=o("p"),ae.innerHTML=Hl,Ct=a(),se=o("ul"),se.innerHTML=Fl,Gt=a(),oe=o("p"),oe.innerHTML=Xl,At=a(),d(f.$$.fragment),vt=a(),d(ie.$$.fragment),Zt=a(),Me=o("p"),Me.innerHTML=Sl,$t=a(),d(pe.$$.fragment),_t=a(),re=o("p"),re.textContent=Ll,Bt=a(),de=o("p"),de.innerHTML=Yl,Nt=a(),ce=o("p"),ce.innerHTML=zl,xt=a(),ue=o("ul"),ue.innerHTML=ql,Wt=a(),he=o("p"),he.innerHTML=Dl,Vt=a(),d(w.$$.fragment),Rt=a(),d(ye.$$.fragment),kt=a(),d(b.$$.fragment),Qt=a(),me=o("p"),me.innerHTML=Pl,Et=a(),d(je.$$.fragment),Ht=a(),Ue=o("p"),Ue.innerHTML=Ol,Ft=a(),Te=o("p"),Te.innerHTML=Kl,Xt=a(),d(Je.$$.fragment),St=a(),ge=o("p"),ge.textContent=en,Lt=a(),d(fe.$$.fragment),Yt=a(),we=o("p"),we.innerHTML=tn,zt=a(),d(be.$$.fragment),qt=a(),d(Ie.$$.fragment),Dt=a(),Ce=o("p"),Ce.innerHTML=ln,Pt=a(),d(Ge.$$.fragment),Ot=a(),Ae=o("p"),Ae.innerHTML=nn,Kt=a(),d(ve.$$.fragment),el=a(),d(Ze.$$.fragment),tl=a(),$e=o("p"),$e.innerHTML=an,ll=a(),d(I.$$.fragment),nl=a(),d(_e.$$.fragment),al=a(),Be=o("p"),Be.innerHTML=sn,sl=a(),d(Ne.$$.fragment),ol=a(),d(xe.$$.fragment),il=a(),We=o("p"),We.textContent=on,Ml=a(),d(Ve.$$.fragment),pl=a(),Re=o("p"),Re.innerHTML=Mn,rl=a(),d(ke.$$.fragment),dl=a(),Qe=o("p"),Qe.textContent=pn,cl=a(),Ee=o("ul"),Ee.innerHTML=rn,ul=a(),d(He.$$.fragment),hl=a(),Fe=o("p"),Fe.textContent=dn,yl=a(),Xe=o("ul"),Xe.innerHTML=cn,ml=a(),jl=o("hr"),Ul=a(),d(C.$$.fragment),Tl=a(),d(Se.$$.fragment),Jl=a(),Le=o("p"),this.h()},l(e){const t=bn("svelte-u9bgzb",document.head);M=i(t,"META",{name:!0,content:!0}),t.forEach(l),j=s(e),r=i(e,"P",{}),Un(r).forEach(l),T=s(e),c(G.$$.fragment,e),qe=s(e),A=i(e,"P",{"data-svelte-h":!0}),p(A)!=="svelte-1n624pr"&&(A.innerHTML=fl),De=s(e),v=i(e,"P",{"data-svelte-h":!0}),p(v)!=="svelte-tsvylf"&&(v.innerHTML=wl),Pe=s(e),Z=i(e,"P",{"data-svelte-h":!0}),p(Z)!=="svelte-np7eth"&&(Z.innerHTML=bl),Oe=s(e),c($.$$.fragment,e),Ke=s(e),_=i(e,"P",{"data-svelte-h":!0}),p(_)!=="svelte-1o67gfb"&&(_.innerHTML=Il),et=s(e),B=i(e,"P",{"data-svelte-h":!0}),p(B)!=="svelte-1ebrquh"&&(B.innerHTML=Cl),tt=s(e),c(N.$$.fragment,e),lt=s(e),x=i(e,"P",{"data-svelte-h":!0}),p(x)!=="svelte-1i8n6rq"&&(x.textContent=Gl),nt=s(e),c(W.$$.fragment,e),at=s(e),V=i(e,"P",{"data-svelte-h":!0}),p(V)!=="svelte-15nvzfz"&&(V.textContent=Al),st=s(e),c(R.$$.fragment,e),ot=s(e),k=i(e,"P",{"data-svelte-h":!0}),p(k)!=="svelte-1nm8o2l"&&(k.textContent=vl),it=s(e),c(Q.$$.fragment,e),Mt=s(e),E=i(e,"P",{"data-svelte-h":!0}),p(E)!=="svelte-1f1rnfu"&&(E.innerHTML=Zl),pt=s(e),c(H.$$.fragment,e),rt=s(e),c(F.$$.fragment,e),dt=s(e),X=i(e,"P",{"data-svelte-h":!0}),p(X)!=="svelte-49fshu"&&(X.innerHTML=$l),ct=s(e),S=i(e,"P",{"data-svelte-h":!0}),p(S)!=="svelte-1ylcy5e"&&(S.innerHTML=_l),ut=s(e),L=i(e,"P",{"data-svelte-h":!0}),p(L)!=="svelte-1c6qedf"&&(L.innerHTML=Bl),ht=s(e),Y=i(e,"P",{"data-svelte-h":!0}),p(Y)!=="svelte-1s5855i"&&(Y.textContent=Nl),yt=s(e),z=i(e,"UL",{"data-svelte-h":!0}),p(z)!=="svelte-caffm5"&&(z.innerHTML=xl),mt=s(e),q=i(e,"P",{"data-svelte-h":!0}),p(q)!=="svelte-t0njbv"&&(q.innerHTML=Wl),jt=s(e),D=i(e,"P",{"data-svelte-h":!0}),p(D)!=="svelte-1sivr44"&&(D.innerHTML=Vl),Ut=s(e),c(P.$$.fragment,e),Tt=s(e),O=i(e,"P",{"data-svelte-h":!0}),p(O)!=="svelte-14ie1f2"&&(O.innerHTML=Rl),Jt=s(e),K=i(e,"P",{"data-svelte-h":!0}),p(K)!=="svelte-1bvo1af"&&(K.innerHTML=kl),gt=s(e),ee=i(e,"P",{"data-svelte-h":!0}),p(ee)!=="svelte-16zhg01"&&(ee.innerHTML=Ql),ft=s(e),c(te.$$.fragment,e),wt=s(e),c(le.$$.fragment,e),bt=s(e),ne=i(e,"P",{"data-svelte-h":!0}),p(ne)!=="svelte-1voap9p"&&(ne.innerHTML=El),It=s(e),ae=i(e,"P",{"data-svelte-h":!0}),p(ae)!=="svelte-zklutt"&&(ae.innerHTML=Hl),Ct=s(e),se=i(e,"UL",{"data-svelte-h":!0}),p(se)!=="svelte-18m17zt"&&(se.innerHTML=Fl),Gt=s(e),oe=i(e,"P",{"data-svelte-h":!0}),p(oe)!=="svelte-1f83l3s"&&(oe.innerHTML=Xl),At=s(e),c(f.$$.fragment,e),vt=s(e),c(ie.$$.fragment,e),Zt=s(e),Me=i(e,"P",{"data-svelte-h":!0}),p(Me)!=="svelte-qaf7of"&&(Me.innerHTML=Sl),$t=s(e),c(pe.$$.fragment,e),_t=s(e),re=i(e,"P",{"data-svelte-h":!0}),p(re)!=="svelte-1cjntg8"&&(re.textContent=Ll),Bt=s(e),de=i(e,"P",{"data-svelte-h":!0}),p(de)!=="svelte-njbdvc"&&(de.innerHTML=Yl),Nt=s(e),ce=i(e,"P",{"data-svelte-h":!0}),p(ce)!=="svelte-1ezlr9j"&&(ce.innerHTML=zl),xt=s(e),ue=i(e,"UL",{"data-svelte-h":!0}),p(ue)!=="svelte-1mv29i5"&&(ue.innerHTML=ql),Wt=s(e),he=i(e,"P",{"data-svelte-h":!0}),p(he)!=="svelte-1d6p8k5"&&(he.innerHTML=Dl),Vt=s(e),c(w.$$.fragment,e),Rt=s(e),c(ye.$$.fragment,e),kt=s(e),c(b.$$.fragment,e),Qt=s(e),me=i(e,"P",{"data-svelte-h":!0}),p(me)!=="svelte-lfolo9"&&(me.innerHTML=Pl),Et=s(e),c(je.$$.fragment,e),Ht=s(e),Ue=i(e,"P",{"data-svelte-h":!0}),p(Ue)!=="svelte-166ya0u"&&(Ue.innerHTML=Ol),Ft=s(e),Te=i(e,"P",{"data-svelte-h":!0}),p(Te)!=="svelte-1i26vhx"&&(Te.innerHTML=Kl),Xt=s(e),c(Je.$$.fragment,e),St=s(e),ge=i(e,"P",{"data-svelte-h":!0}),p(ge)!=="svelte-xmsalg"&&(ge.textContent=en),Lt=s(e),c(fe.$$.fragment,e),Yt=s(e),we=i(e,"P",{"data-svelte-h":!0}),p(we)!=="svelte-rrzyzn"&&(we.innerHTML=tn),zt=s(e),c(be.$$.fragment,e),qt=s(e),c(Ie.$$.fragment,e),Dt=s(e),Ce=i(e,"P",{"data-svelte-h":!0}),p(Ce)!=="svelte-hnmp6i"&&(Ce.innerHTML=ln),Pt=s(e),c(Ge.$$.fragment,e),Ot=s(e),Ae=i(e,"P",{"data-svelte-h":!0}),p(Ae)!=="svelte-d9pmg3"&&(Ae.innerHTML=nn),Kt=s(e),c(ve.$$.fragment,e),el=s(e),c(Ze.$$.fragment,e),tl=s(e),$e=i(e,"P",{"data-svelte-h":!0}),p($e)!=="svelte-gm5hxo"&&($e.innerHTML=an),ll=s(e),c(I.$$.fragment,e),nl=s(e),c(_e.$$.fragment,e),al=s(e),Be=i(e,"P",{"data-svelte-h":!0}),p(Be)!=="svelte-d9pmg3"&&(Be.innerHTML=sn),sl=s(e),c(Ne.$$.fragment,e),ol=s(e),c(xe.$$.fragment,e),il=s(e),We=i(e,"P",{"data-svelte-h":!0}),p(We)!=="svelte-1ajbcx5"&&(We.textContent=on),Ml=s(e),c(Ve.$$.fragment,e),pl=s(e),Re=i(e,"P",{"data-svelte-h":!0}),p(Re)!=="svelte-mxn4cr"&&(Re.innerHTML=Mn),rl=s(e),c(ke.$$.fragment,e),dl=s(e),Qe=i(e,"P",{"data-svelte-h":!0}),p(Qe)!=="svelte-762i25"&&(Qe.textContent=pn),cl=s(e),Ee=i(e,"UL",{"data-svelte-h":!0}),p(Ee)!=="svelte-16ibgml"&&(Ee.innerHTML=rn),ul=s(e),c(He.$$.fragment,e),hl=s(e),Fe=i(e,"P",{"data-svelte-h":!0}),p(Fe)!=="svelte-sgegyq"&&(Fe.textContent=dn),yl=s(e),Xe=i(e,"UL",{"data-svelte-h":!0}),p(Xe)!=="svelte-1p833nz"&&(Xe.innerHTML=cn),ml=s(e),jl=i(e,"HR",{}),Ul=s(e),c(C.$$.fragment,e),Tl=s(e),c(Se.$$.fragment,e),Jl=s(e),Le=i(e,"P",{}),Un(Le).forEach(l),this.h()},h(){Tn(M,"name","hf:doc:metadata"),Tn(M,"content",Bn)},m(e,t){In(document.head,M),n(e,j,t),n(e,r,t),n(e,T,t),u(G,e,t),n(e,qe,t),n(e,A,t),n(e,De,t),n(e,v,t),n(e,Pe,t),n(e,Z,t),n(e,Oe,t),u($,e,t),n(e,Ke,t),n(e,_,t),n(e,et,t),n(e,B,t),n(e,tt,t),u(N,e,t),n(e,lt,t),n(e,x,t),n(e,nt,t),u(W,e,t),n(e,at,t),n(e,V,t),n(e,st,t),u(R,e,t),n(e,ot,t),n(e,k,t),n(e,it,t),u(Q,e,t),n(e,Mt,t),n(e,E,t),n(e,pt,t),u(H,e,t),n(e,rt,t),u(F,e,t),n(e,dt,t),n(e,X,t),n(e,ct,t),n(e,S,t),n(e,ut,t),n(e,L,t),n(e,ht,t),n(e,Y,t),n(e,yt,t),n(e,z,t),n(e,mt,t),n(e,q,t),n(e,jt,t),n(e,D,t),n(e,Ut,t),u(P,e,t),n(e,Tt,t),n(e,O,t),n(e,Jt,t),n(e,K,t),n(e,gt,t),n(e,ee,t),n(e,ft,t),u(te,e,t),n(e,wt,t),u(le,e,t),n(e,bt,t),n(e,ne,t),n(e,It,t),n(e,ae,t),n(e,Ct,t),n(e,se,t),n(e,Gt,t),n(e,oe,t),n(e,At,t),u(f,e,t),n(e,vt,t),u(ie,e,t),n(e,Zt,t),n(e,Me,t),n(e,$t,t),u(pe,e,t),n(e,_t,t),n(e,re,t),n(e,Bt,t),n(e,de,t),n(e,Nt,t),n(e,ce,t),n(e,xt,t),n(e,ue,t),n(e,Wt,t),n(e,he,t),n(e,Vt,t),u(w,e,t),n(e,Rt,t),u(ye,e,t),n(e,kt,t),u(b,e,t),n(e,Qt,t),n(e,me,t),n(e,Et,t),u(je,e,t),n(e,Ht,t),n(e,Ue,t),n(e,Ft,t),n(e,Te,t),n(e,Xt,t),u(Je,e,t),n(e,St,t),n(e,ge,t),n(e,Lt,t),u(fe,e,t),n(e,Yt,t),n(e,we,t),n(e,zt,t),u(be,e,t),n(e,qt,t),u(Ie,e,t),n(e,Dt,t),n(e,Ce,t),n(e,Pt,t),u(Ge,e,t),n(e,Ot,t),n(e,Ae,t),n(e,Kt,t),u(ve,e,t),n(e,el,t),u(Ze,e,t),n(e,tl,t),n(e,$e,t),n(e,ll,t),u(I,e,t),n(e,nl,t),u(_e,e,t),n(e,al,t),n(e,Be,t),n(e,sl,t),u(Ne,e,t),n(e,ol,t),u(xe,e,t),n(e,il,t),n(e,We,t),n(e,Ml,t),u(Ve,e,t),n(e,pl,t),n(e,Re,t),n(e,rl,t),u(ke,e,t),n(e,dl,t),n(e,Qe,t),n(e,cl,t),n(e,Ee,t),n(e,ul,t),u(He,e,t),n(e,hl,t),n(e,Fe,t),n(e,yl,t),n(e,Xe,t),n(e,ml,t),n(e,jl,t),n(e,Ul,t),u(C,e,t),n(e,Tl,t),u(Se,e,t),n(e,Jl,t),n(e,Le,t),gl=!0},p(e,[t]){const un={};t&2&&(un.$$scope={dirty:t,ctx:e}),f.$set(un);const hn={};t&2&&(hn.$$scope={dirty:t,ctx:e}),w.$set(hn);const yn={};t&2&&(yn.$$scope={dirty:t,ctx:e}),b.$set(yn);const mn={};t&2&&(mn.$$scope={dirty:t,ctx:e}),I.$set(mn);const jn={};t&2&&(jn.$$scope={dirty:t,ctx:e}),C.$set(jn)},i(e){gl||(h(G.$$.fragment,e),h($.$$.fragment,e),h(N.$$.fragment,e),h(W.$$.fragment,e),h(R.$$.fragment,e),h(Q.$$.fragment,e),h(H.$$.fragment,e),h(F.$$.fragment,e),h(P.$$.fragment,e),h(te.$$.fragment,e),h(le.$$.fragment,e),h(f.$$.fragment,e),h(ie.$$.fragment,e),h(pe.$$.fragment,e),h(w.$$.fragment,e),h(ye.$$.fragment,e),h(b.$$.fragment,e),h(je.$$.fragment,e),h(Je.$$.fragment,e),h(fe.$$.fragment,e),h(be.$$.fragment,e),h(Ie.$$.fragment,e),h(Ge.$$.fragment,e),h(ve.$$.fragment,e),h(Ze.$$.fragment,e),h(I.$$.fragment,e),h(_e.$$.fragment,e),h(Ne.$$.fragment,e),h(xe.$$.fragment,e),h(Ve.$$.fragment,e),h(ke.$$.fragment,e),h(He.$$.fragment,e),h(C.$$.fragment,e),h(Se.$$.fragment,e),gl=!0)},o(e){y(G.$$.fragment,e),y($.$$.fragment,e),y(N.$$.fragment,e),y(W.$$.fragment,e),y(R.$$.fragment,e),y(Q.$$.fragment,e),y(H.$$.fragment,e),y(F.$$.fragment,e),y(P.$$.fragment,e),y(te.$$.fragment,e),y(le.$$.fragment,e),y(f.$$.fragment,e),y(ie.$$.fragment,e),y(pe.$$.fragment,e),y(w.$$.fragment,e),y(ye.$$.fragment,e),y(b.$$.fragment,e),y(je.$$.fragment,e),y(Je.$$.fragment,e),y(fe.$$.fragment,e),y(be.$$.fragment,e),y(Ie.$$.fragment,e),y(Ge.$$.fragment,e),y(ve.$$.fragment,e),y(Ze.$$.fragment,e),y(I.$$.fragment,e),y(_e.$$.fragment,e),y(Ne.$$.fragment,e),y(xe.$$.fragment,e),y(Ve.$$.fragment,e),y(ke.$$.fragment,e),y(He.$$.fragment,e),y(C.$$.fragment,e),y(Se.$$.fragment,e),gl=!1},d(e){e&&(l(j),l(r),l(T),l(qe),l(A),l(De),l(v),l(Pe),l(Z),l(Oe),l(Ke),l(_),l(et),l(B),l(tt),l(lt),l(x),l(nt),l(at),l(V),l(st),l(ot),l(k),l(it),l(Mt),l(E),l(pt),l(rt),l(dt),l(X),l(ct),l(S),l(ut),l(L),l(ht),l(Y),l(yt),l(z),l(mt),l(q),l(jt),l(D),l(Ut),l(Tt),l(O),l(Jt),l(K),l(gt),l(ee),l(ft),l(wt),l(bt),l(ne),l(It),l(ae),l(Ct),l(se),l(Gt),l(oe),l(At),l(vt),l(Zt),l(Me),l($t),l(_t),l(re),l(Bt),l(de),l(Nt),l(ce),l(xt),l(ue),l(Wt),l(he),l(Vt),l(Rt),l(kt),l(Qt),l(me),l(Et),l(Ht),l(Ue),l(Ft),l(Te),l(Xt),l(St),l(ge),l(Lt),l(Yt),l(we),l(zt),l(qt),l(Dt),l(Ce),l(Pt),l(Ot),l(Ae),l(Kt),l(el),l(tl),l($e),l(ll),l(nl),l(al),l(Be),l(sl),l(ol),l(il),l(We),l(Ml),l(pl),l(Re),l(rl),l(dl),l(Qe),l(cl),l(Ee),l(ul),l(hl),l(Fe),l(yl),l(Xe),l(ml),l(jl),l(Ul),l(Tl),l(Jl),l(Le)),l(M),m(G,e),m($,e),m(N,e),m(W,e),m(R,e),m(Q,e),m(H,e),m(F,e),m(P,e),m(te,e),m(le,e),m(f,e),m(ie,e),m(pe,e),m(w,e),m(ye,e),m(b,e),m(je,e),m(Je,e),m(fe,e),m(be,e),m(Ie,e),m(Ge,e),m(ve,e),m(Ze,e),m(I,e),m(_e,e),m(Ne,e),m(xe,e),m(Ve,e),m(ke,e),m(He,e),m(C,e),m(Se,e)}}}const Bn='{"title":"Deploy Meta Llama 3.1 405B with TGI DLC on Vertex AI","local":"deploy-meta-llama-31-405b-with-tgi-dlc-on-vertex-ai","sections":[{"title":"Setup / Configuration","local":"setup--configuration","sections":[{"title":"Quotas on Google Cloud","local":"quotas-on-google-cloud","sections":[],"depth":3}],"depth":2},{"title":"Register model on Vertex AI","local":"register-model-on-vertex-ai","sections":[],"depth":2},{"title":"Deploy model on Vertex AI","local":"deploy-model-on-vertex-ai","sections":[],"depth":2},{"title":"Online predictions on Vertex AI","local":"online-predictions-on-vertex-ai","sections":[{"title":"Via Python","local":"via-python","sections":[{"title":"Within the same session","local":"within-the-same-session","sections":[],"depth":4},{"title":"From a different session","local":"from-a-different-session","sections":[],"depth":4}],"depth":3},{"title":"Via the Vertex AI Online Prediction UI","local":"via-the-vertex-ai-online-prediction-ui","sections":[],"depth":3}],"depth":2},{"title":"Resource clean-up","local":"resource-clean-up","sections":[],"depth":2}],"depth":1}';function Nn(J){return gn(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Qn extends fn{constructor(M){super(),wn(this,M,Nn,_n,Jn,{})}}export{Qn as component};

Xet Storage Details

Size:
56.9 kB
·
Xet hash:
df5d0e54b1315eca746b6a5f09cdab241a93937d7e026d79bfcd139c574673f3

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.