Buckets:

rtrm's picture
download
raw
57.7 kB
import{s as Hl,o as Rl,n as el}from"../chunks/scheduler.b108d059.js";import{S as Fl,i as Xl,g as o,s,r as y,A as Yl,h as M,f as l,c as a,j as kl,u as T,x as i,k as Wl,y as Pl,a as n,v as J,d as r,t as U,w}from"../chunks/index.008de539.js";import{T as Kt}from"../chunks/Tip.aeb15ab7.js";import{C as j}from"../chunks/CodeBlock.3968c746.js";import{H as A,E as Dl}from"../chunks/EditOnGithub.d1c48e3d.js";function zl(f){let p,m="Regarding the licensing terms, Llama 3.2 comes with a very similar license to Llama 3.1, with one key difference in the acceptable use policy: any individual domiciled in, or a company with a principal place of business in, the European Union (EU) is not being granted the license rights to use multimodal models included in Llama 3.2. This restriction does not apply to end users of a product or service that incorporates any such multimodal models, so people can still build global products with the vision variants.",c,I,h='For full details, please make sure to read <a href="https://huggingface.co/meta-llama/Llama-3.2-1B/blob/main/LICENSE.txt" rel="nofollow">the official license</a> and <a href="https://huggingface.co/meta-llama/Llama-3.2-1B/blob/main/USE_POLICY.md" rel="nofollow">the acceptable use policy</a>.';return{c(){p=o("p"),p.textContent=m,c=s(),I=o("p"),I.innerHTML=h},l(u){p=M(u,"P",{"data-svelte-h":!0}),i(p)!=="svelte-1nnv2wz"&&(p.textContent=m),c=a(u),I=M(u,"P",{"data-svelte-h":!0}),i(I)!=="svelte-1pc67jk"&&(I.innerHTML=h)},m(u,d){n(u,p,d),n(u,c,d),n(u,I,d)},p:el,d(u){u&&(l(p),l(c),l(I))}}}function Ol(f){let p,m="Note that the <code>MESSAGES_API_ENABLED</code> flag will only work from the TGI 2.3 DLC i.e. <code>us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311</code>, onwards.",c,I,h='For the previous releases the <code>MESSAGES_API_ENABLED</code> flag won’t work as it was introduced <a href="https://github.com/huggingface/text-generation-inference/pull/2481" rel="nofollow">in the following TGI PR</a>, the uncompatible releases being:',u,d,qe="<li><code>us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.1-4.ubuntu2204.py310</code></li> <li><code>us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-0.ubuntu2204.py310</code></li> <li><code>us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-1.ubuntu2204.py310</code></li> <li><code>us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310</code></li>";return{c(){p=o("p"),p.innerHTML=m,c=s(),I=o("p"),I.innerHTML=h,u=s(),d=o("ul"),d.innerHTML=qe},l(C){p=M(C,"P",{"data-svelte-h":!0}),i(p)!=="svelte-uqtsy4"&&(p.innerHTML=m),c=a(C),I=M(C,"P",{"data-svelte-h":!0}),i(I)!=="svelte-1v5cpel"&&(I.innerHTML=h),u=a(C),d=M(C,"UL",{"data-svelte-h":!0}),i(d)!=="svelte-kb15s0"&&(d.innerHTML=qe)},m(C,g){n(C,p,g),n(C,c,g),n(C,I,g),n(C,u,g),n(C,d,g)},p:el,d(C){C&&(l(p),l(c),l(I),l(u),l(d))}}}function Kl(f){let p,m='📍 Find the complete example on GitHub <a href="https://github.com/huggingface/Google-Cloud-Containers/tree/merge/examples/vertex-ai/notebooks/deploy-llama-vision-on-vertex-ai" rel="nofollow">here</a>!';return{c(){p=o("p"),p.innerHTML=m},l(c){p=M(c,"P",{"data-svelte-h":!0}),i(p)!=="svelte-111g3na"&&(p.innerHTML=m)},m(c,I){n(c,p,I)},p:el,d(c){c&&l(p)}}}function en(f){let p,m,c,I,h,u,d,qe='<a href="https://huggingface.co/blog/llama32" rel="nofollow">Llama 3.2</a> is the latest release of open LLMs from the Llama family released by Meta (as of October 2024); Llama 3.2 Vision comes in two sizes: 11B for efficient deployment and development on consumer-size GPU, and 90B for large-scale applications. Text Generation Inference (TGI) is a toolkit developed by Hugging Face for deploying and serving LLMs, with high performance text generation. And, Google Vertex AI is a Machine Learning (ML) platform that lets you train and deploy ML models and AI applications, and customize large language models (LLMs) for use in your AI-powered applications.',C,g,tl='This example showcases how to deploy <a href="https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct" rel="nofollow"><code>meta-llama/Llama-3.2-11B-Vision-Instruct</code></a> on Vertex AI via the Hugging Face purpose-built Deep Learning Container (DLC) for Text Generation Inference (TGI) on Google Cloud.',Le,b,Se,G,ll='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/merge/examples/vertex-ai/notebooks/deploy-llama-vision-on-vertex-ai/assets/model-in-hf-hub.png" alt="&#39;google/gemma-7b-it&#39; in the Hugging Face Hub"/>',Qe,$,ke,x,nl='First, you need to install <code>gcloud</code> in your local machine, which is the command-line tool for Google Cloud, following the instructions at <a href="https://cloud.google.com/sdk/docs/install" rel="nofollow">Cloud SDK Documentation - Install the gcloud CLI</a>.',We,B,sl="Then, you also need to install the <code>google-cloud-aiplatform</code> Python SDK, required to programmatically create the Vertex AI model, register it, acreate the endpoint, and deploy it on Vertex AI.",He,_,Re,Z,al="Optionally, to ease the usage of the commands within this tutorial, you need to set the following environment variables for GCP:",Fe,V,Xe,q,ol="Then you need to login into your GCP account and set the project ID to the one you want to use to register and deploy the models on Vertex AI.",Ye,N,Pe,L,Ml="Once you are logged in, you need to enable the necessary service APIs in GCP, such as the Vertex AI API, the Compute Engine API, and Google Container Registry related APIs.",De,S,ze,Q,il="Once everything is set up, you can already initialize the Vertex AI session via the <code>google-cloud-aiplatform</code> Python SDK as follows:",Oe,k,Ke,W,et,H,pl='As <a href="https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct" rel="nofollow"><code>meta-llama/Llama-3.2-11B-Vision-Instruct</code></a> is a gated model with restricted access on the European Union (EU), meaning that you need to accept the license agreement.',tt,R,yl='To generate a token for the Hugging Face Hub, you can follow the instructions in <a href="https://huggingface.co/docs/hub/en/security-tokens" rel="nofollow">Hugging Face Hub - User access tokens</a>; the generated token can either be fine-grained to have access to the model, or just overall read-only access to your account.',lt,F,nt,X,st,Y,Tl="Then you can already “upload” the model i.e. register the model on Vertex AI. It is not an upload per se, since the model will be automatically downloaded from the Hugging Face Hub in the Hugging Face DLC for TGI on startup via the <code>MODEL_ID</code> environment variable, so what is uploaded is only the configuration, not the model weights.",at,P,Jl="Before going into the code, let’s quickly review the arguments provided to the <code>upload</code> method:",ot,D,rl='<li><p><strong><code>display_name</code></strong> is the name that will be shown in the Vertex AI Model Registry.</p></li> <li><p><strong><code>serving_container_image_uri</code></strong> is the location of the Hugging Face DLC for TGI that will be used for serving the model.</p></li> <li><p><strong><code>serving_container_environment_variables</code></strong> are the environment variables that will be used during the container runtime, so these are aligned with the environment variables defined by <code>text-generation-inference</code>, which are analog to the <a href="https://huggingface.co/docs/text-generation-inference/en/basic_tutorials/launcher" rel="nofollow"><code>text-generation-launcher</code> arguments</a>. Additionally, the Hugging Face DLCs for TGI also capture the <code>AIP_</code> environment variables from Vertex AI as in <a href="https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements" rel="nofollow">Vertex AI Documentation - Custom container requirements for prediction</a>.</p> <ul><li><p><code>MODEL_ID</code> is the identifier of the model in the Hugging Face Hub. To explore all the supported models you can check <a href="https://huggingface.co/models?sort=trending&amp;other=text-generation-inference" rel="nofollow">the models tagged with <code>text-generation-inference</code> in the Hugging Face Hub</a>.</p></li> <li><p><code>NUM_SHARD</code> is the number of shards to use if you don’t want to use all GPUs on a given machine e.g. if you have two GPUs but you just want to use one for TGI then <code>NUM_SHARD=1</code>, otherwise it matches the <code>CUDA_VISIBLE_DEVICES</code>.</p></li> <li><p><code>MAX_INPUT_TOKENS</code> is the maximum allowed input length (expressed in number of tokens), the larger it is, the larger the prompt can be, but also more memory will be consumed.</p></li> <li><p><code>MAX_TOTAL_TOKENS</code> is the most important value to set as it defines the “memory budget” of running clients requests, the larger this value, the larger amount each request will be in your RAM and the less effective batching can be.</p></li> <li><p><code>MAX_BATCH_PREFILL_TOKENS</code> limits the number of tokens for the prefill operation, as it takes the most memory and is compute bound, it is interesting to limit the number of requests that can be sent.</p></li> <li><p><code>HF_HUB_ENABLE_HF_TRANSFER</code> to enable a faster download speed via the hf_transfer library.</p></li> <li><p><code>HUGGING_FACE_HUB_TOKEN</code> is the Hugging Face Hub token, required as <a href="https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct" rel="nofollow"><code>meta-llama/Llama-3.2-11B-Vision-Instruct</code></a> is a gated model with restricted access in the European Union (EU).</p> <p>Additionally, you need to specify the <code>MESSAGES_API_ENABLED</code> environment variable that was introduced in the TGI 2.3.0 Release, since the Messages API is required to process both the text and the images within the input payload.</p></li> <li><p><code>MESSAGES_API_ENABLED</code> set to “true” to use the Messages API i.e. <code>/v1/chat/completions</code>, instead of the Generation API i.e. <code>/generation</code> (default).</p></li></ul></li> <li><p>(optional) <strong><code>serving_container_ports</code></strong> is the port where the Vertex AI endpoint will be exposed, by default 8080.</p></li>',Mt,z,Ul='For more information on the supported arguments you can check <a href="https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.Model#google_cloud_aiplatform_Model_upload" rel="nofollow"><code>aiplatform.Model.upload</code> Python reference</a>.',it,v,pt,O,yt,K,wl='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/merge/examples/vertex-ai/notebooks/deploy-llama-vision-on-vertex-ai/assets/vertex-ai-model.png" alt="Model on Vertex AI Model Registry"/>',Tt,ee,Jt,te,Il="After the model is registered on Vertex AI, you need to define the endpoint that you want to deploy the model to, and then link the model deployment to that endpoint resource.",rt,le,cl="To do so, you need to call the method <code>aiplatform.Endpoint.create</code> to create a new Vertex AI endpoint resource (which is not linked to a model or anything usable yet).",Ut,ne,wt,se,ul='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/merge/examples/vertex-ai/notebooks/deploy-llama-vision-on-vertex-ai/assets/vertex-ai-endpoint.png" alt="Vertex AI Endpoint created"/>',It,ae,dl="Now you can deploy the registered model in an endpoint on Vertex AI.",ct,oe,Cl="The <code>deploy</code> method will link the previously created endpoint resource with the model that contains the configuration of the serving container, and then, it will deploy the model on Vertex AI in the specified instance.",ut,Me,jl="Before going into the code, let’s quickly review the arguments provided to the <code>deploy</code> method:",dt,ie,ml='<li><p><strong><code>endpoint</code></strong> is the endpoint to deploy the model to, which is optional, and by default will be set to the model display name with the <code>_endpoint</code> suffix.</p></li> <li><p><strong><code>machine_type</code></strong>, <strong><code>accelerator_type</code></strong> and <strong><code>accelerator_count</code></strong> are arguments that define which instance to use, and additionally, the accelerator to use and the number of accelerators, respectively. The <code>machine_type</code> and the <code>accelerator_type</code> are tied together, so you will need to select an instance that supports the accelerator that you are using and vice-versa. More information about the different instances at <a href="https://cloud.google.com/compute/docs/gpus" rel="nofollow">Compute Engine Documentation - GPU machine types</a>, and about the <code>accelerator_type</code> naming at <a href="https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec" rel="nofollow">Vertex AI Documentation - MachineSpec</a>.</p></li>',Ct,pe,hl='For more information on the supported arguments you can check <a href="https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.Model#google_cloud_aiplatform_Model_deploy" rel="nofollow"><code>aiplatform.Model.deploy</code> Python reference</a>.',jt,ye,mt,Te,gl="<strong>WARNING</strong>: <em>The Vertex AI endpoint deployment via the <code>deploy</code> method may take from 15 to 25 minutes.</em>",ht,Je,Al='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/merge/examples/vertex-ai/notebooks/deploy-llama-vision-on-vertex-ai/assets/vertex-ai-endpoint-run.png" alt="Vertex AI Endpoint running the model"/>',gt,re,At,Ue,fl="Finally, you can run the online predictions on Vertex AI using the <code>predict</code> method, which will send the requests to the running endpoint in the <code>/predict</code> route specified within the container following Vertex AI I/O payload formatting.",ft,we,bl='Note that the input payload differs a bit from the standard Text Generation Inference (TGI), as <a href="https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct" rel="nofollow"><code>meta-llama/Llama-3.2-11B-Vision-Instruct</code></a> is a Visual Language Model (VLM), as those models consume both text and images. More information in <a href="https://huggingface.co/docs/text-generation-inference/en/basic_tutorials/visual_language_models" rel="nofollow">Vision Language Model Inference in TGI</a>.',bt,Ie,vt,ce,Et,ue,vl="If you are willing to run the online prediction within the current session, you can send requests programmatically via the <code>aiplatform.Endpoint</code> (returned by the <code>aiplatform.Model.deploy</code> method) as in the following snippet:",Gt,de,$t,Ce,El='<tbody><tr><td><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png" alt=""/></td> <td>The image depicts a stylized illustration of an anthropomorphic rabbit dressed in a space suit, standing on a rocky, alien-like planet.</td></tr></tbody>',xt,je,Bt,me,Gl="If the Vertex AI Endpoint was deployed in a different session and you want to use it but don’t have access to the <code>deployed_model</code> variable returned by the <code>aiplatform.Model.deploy</code> method as in the previous section; you can also run the following snippet to instantiate the deployed <code>aiplatform.Endpoint</code> via its resource name as <code>projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}</code>.",_t,he,$l="Note that you will need to either retrieve the resource name i.e. the <code>projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}</code> URL yourself via the Google Cloud Console, or just replace the <code>ENDPOINT_ID</code> below that can either be found via the previously instantiated endpoint as endpoint.id or via the Google Cloud Console under the Online predictions where the endpoint is listed.",Zt,ge,Vt,Ae,xl=`<tbody><tr><td><img src="https://huggingface.co/datasets/huggingface/release-assets/resolve/main/invoice.png" alt="Invoice Image"/></td> <td>To calculate the time difference between the invoice date and the due date, we need to subtract the invoice date from the due date.<br/><br/>
Invoice Date: 11/02/2019<br/>
Due Date: 26/02/2019<br/><br/>
Time Difference = Due Date - Invoice Date<br/>
Time Difference = 26/02/2019 - 11/02/2019<br/>
Time Difference = 15 days<br/><br/>
Therefore, it takes <strong>15 days</strong> from the invoice date to the due date.</td></tr></tbody>`,qt,fe,Nt,be,Bl="Alternatively, for testing purposes you can also use the Vertex AI Online Prediction UI, that provides a field that expects the JSON payload formatted according to the Vertex AI specification (as in the examples above) being:",Lt,ve,St,Ee,_l='<img src="https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/merge/examples/vertex-ai/notebooks/deploy-llama-vision-on-vertex-ai/assets/vertex-ai-online-prediction.png" alt="Vertex AI Endpoint online inference"/>',Qt,Ge,kt,$e,Zl="Finally, you can already release the resources that you’ve created as follows, to avoid unnecessary costs:",Wt,xe,Vl="<li><code>deployed_model.undeploy_all</code> to undeploy the model from all the endpoints.</li> <li><code>deployed_model.delete</code> to delete the endpoint/s where the model was deployed gracefully, after the <code>undeploy_all</code> method.</li> <li><code>model.delete</code> to delete the model from the registry.</li>",Ht,Be,Rt,_e,ql="Alternatively, you can also remove those from the Google Cloud Console following the steps:",Ft,Ze,Nl="<li>Go to Vertex AI in Google Cloud</li> <li>Go to Deploy and use -&gt; Online prediction</li> <li>Click on the endpoint and then on the deployed model/s to “Undeploy model from endpoint”</li> <li>Then go back to the endpoint list and remove the endpoint</li> <li>Finally, go to Deploy and use -&gt; Model Registry, and remove the model</li>",Xt,Yt,Pt,E,Dt,Ve,zt,Ne,Ot;return h=new A({props:{title:"Deploy Llama 3.2 11B Vision with TGI DLC on Vertex AI",local:"deploy-llama-32-11b-vision-with-tgi-dlc-on-vertex-ai",headingTag:"h1"}}),b=new Kt({props:{warning:!0,$$slots:{default:[zl]},$$scope:{ctx:f}}}),$=new A({props:{title:"Setup / Configuration",local:"setup--configuration",headingTag:"h2"}}),_=new j({props:{code:"IXBpcCUyMGluc3RhbGwlMjAtLXVwZ3JhZGUlMjAtLXF1aWV0JTIwZ29vZ2xlLWNsb3VkLWFpcGxhdGZvcm0=",highlighted:"!pip install --upgrade --quiet google-cloud-aiplatform",wrap:!1}}),V=new j({props:{code:"JTI1ZW52JTIwUFJPSkVDVF9JRCUzRHlvdXItcHJvamVjdC1pZCUwQSUyNWVudiUyMExPQ0FUSU9OJTNEeW91ci1sb2NhdGlvbiUwQSUyNWVudiUyMENPTlRBSU5FUl9VUkklM0R1cy1kb2NrZXIucGtnLmRldiUyRmRlZXBsZWFybmluZy1wbGF0Zm9ybS1yZWxlYXNlJTJGZ2NyLmlvJTJGaHVnZ2luZ2ZhY2UtdGV4dC1nZW5lcmF0aW9uLWluZmVyZW5jZS1jdTEyNC4yLTMudWJ1bnR1MjIwNC5weTMxMQ==",highlighted:`%env PROJECT_ID=your-project-<span class="hljs-built_in">id</span>
%env LOCATION=your-location
%env CONTAINER_URI=us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124<span class="hljs-number">.2</span>-<span class="hljs-number">3.</span>ubuntu2204.py311`,wrap:!1}}),N=new j({props:{code:"IWdjbG91ZCUyMGF1dGglMjBsb2dpbiUwQSFnY2xvdWQlMjBhdXRoJTIwYXBwbGljYXRpb24tZGVmYXVsdCUyMGxvZ2luJTIwJTIwJTIzJTIwRm9yJTIwbG9jYWwlMjBkZXZlbG9wbWVudCUwQSFnY2xvdWQlMjBjb25maWclMjBzZXQlMjBwcm9qZWN0JTIwJTI0UFJPSkVDVF9JRA==",highlighted:`!gcloud auth login
!gcloud auth application-default login <span class="hljs-comment"># For local development</span>
!gcloud config <span class="hljs-built_in">set</span> project $PROJECT_ID`,wrap:!1}}),S=new j({props:{code:"IWdjbG91ZCUyMHNlcnZpY2VzJTIwZW5hYmxlJTIwYWlwbGF0Zm9ybS5nb29nbGVhcGlzLmNvbSUwQSFnY2xvdWQlMjBzZXJ2aWNlcyUyMGVuYWJsZSUyMGNvbXB1dGUuZ29vZ2xlYXBpcy5jb20lMEEhZ2Nsb3VkJTIwc2VydmljZXMlMjBlbmFibGUlMjBjb250YWluZXIuZ29vZ2xlYXBpcy5jb20lMEEhZ2Nsb3VkJTIwc2VydmljZXMlMjBlbmFibGUlMjBjb250YWluZXJyZWdpc3RyeS5nb29nbGVhcGlzLmNvbSUwQSFnY2xvdWQlMjBzZXJ2aWNlcyUyMGVuYWJsZSUyMGNvbnRhaW5lcmZpbGVzeXN0ZW0uZ29vZ2xlYXBpcy5jb20=",highlighted:`!gcloud services enable aiplatform.googleapis.com
!gcloud services enable compute.googleapis.com
!gcloud services enable container.googleapis.com
!gcloud services enable containerregistry.googleapis.com
!gcloud services enable containerfilesystem.googleapis.com`,wrap:!1}}),k=new j({props:{code:"aW1wb3J0JTIwb3MlMEFmcm9tJTIwZ29vZ2xlLmNsb3VkJTIwaW1wb3J0JTIwYWlwbGF0Zm9ybSUwQSUwQWFpcGxhdGZvcm0uaW5pdCglMEElMjAlMjAlMjAlMjBwcm9qZWN0JTNEb3MuZ2V0ZW52KCUyMlBST0pFQ1RfSUQlMjIpJTJDJTBBJTIwJTIwJTIwJTIwbG9jYXRpb24lM0Rvcy5nZXRlbnYoJTIyTE9DQVRJT04lMjIpJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> google.cloud <span class="hljs-keyword">import</span> aiplatform
aiplatform.init(
project=os.getenv(<span class="hljs-string">&quot;PROJECT_ID&quot;</span>),
location=os.getenv(<span class="hljs-string">&quot;LOCATION&quot;</span>),
)`,wrap:!1}}),W=new A({props:{title:"Register model on Vertex AI",local:"register-model-on-vertex-ai",headingTag:"h2"}}),F=new j({props:{code:"IXBpcCUyMGluc3RhbGwlMjAtLXVwZ3JhZGUlMjAtLXF1aWV0JTIwaHVnZ2luZ2ZhY2VfaHVi",highlighted:"!pip install --upgrade --quiet huggingface_hub",wrap:!1}}),X=new j({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGludGVycHJldGVyX2xvZ2luJTBBJTBBaW50ZXJwcmV0ZXJfbG9naW4oKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> interpreter_login
interpreter_login()`,wrap:!1}}),v=new Kt({props:{warning:!0,$$slots:{default:[Ol]},$$scope:{ctx:f}}}),O=new j({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGdldF90b2tlbiUwQSUwQW1vZGVsJTIwJTNEJTIwYWlwbGF0Zm9ybS5Nb2RlbC51cGxvYWQoJTBBJTIwJTIwJTIwJTIwZGlzcGxheV9uYW1lJTNEJTIyTGxhbWEtVmlzaW9uLTExQiUyMiUyQyUwQSUyMCUyMCUyMCUyMHNlcnZpbmdfY29udGFpbmVyX2ltYWdlX3VyaSUzRG9zLmdldGVudiglMjJDT05UQUlORVJfVVJJJTIyKSUyQyUwQSUyMCUyMCUyMCUyMHNlcnZpbmdfY29udGFpbmVyX2Vudmlyb25tZW50X3ZhcmlhYmxlcyUzRCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMk1PREVMX0lEJTIyJTNBJTIwJTIybWV0YS1sbGFtYSUyRkxsYW1hLTMuMi0xMUItVmlzaW9uLUluc3RydWN0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyTlVNX1NIQVJEJTIyJTNBJTIwJTIyMiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMk1BWF9JTlBVVF9UT0tFTlMlMjIlM0ElMjAlMjI1MTIlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJNQVhfVE9UQUxfVE9LRU5TJTIyJTNBJTIwJTIyMTAyNCUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMk1BWF9CQVRDSF9QUkVGSUxMX1RPS0VOUyUyMiUzQSUyMCUyMjE1MTIlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJIRl9IVUJfRU5BQkxFX0hGX1RSQU5TRkVSJTIyJTNBJTIwJTIyMSUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMkhVR0dJTkdfRkFDRV9IVUJfVE9LRU4lMjIlM0ElMjBnZXRfdG9rZW4oKSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMk1FU1NBR0VTX0FQSV9FTkFCTEVEJTIyJTNBJTIwJTIydHJ1ZSUyMiUyQyUwQSUyMCUyMCUyMCUyMCU3RCUyQyUwQSUyMCUyMCUyMCUyMHNlcnZpbmdfY29udGFpbmVyX3BvcnRzJTNEJTVCODA4MCU1RCUyQyUwQSklMEFtb2RlbC53YWl0KCk=",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> get_token
model = aiplatform.Model.upload(
display_name=<span class="hljs-string">&quot;Llama-Vision-11B&quot;</span>,
serving_container_image_uri=os.getenv(<span class="hljs-string">&quot;CONTAINER_URI&quot;</span>),
serving_container_environment_variables={
<span class="hljs-string">&quot;MODEL_ID&quot;</span>: <span class="hljs-string">&quot;meta-llama/Llama-3.2-11B-Vision-Instruct&quot;</span>,
<span class="hljs-string">&quot;NUM_SHARD&quot;</span>: <span class="hljs-string">&quot;2&quot;</span>,
<span class="hljs-string">&quot;MAX_INPUT_TOKENS&quot;</span>: <span class="hljs-string">&quot;512&quot;</span>,
<span class="hljs-string">&quot;MAX_TOTAL_TOKENS&quot;</span>: <span class="hljs-string">&quot;1024&quot;</span>,
<span class="hljs-string">&quot;MAX_BATCH_PREFILL_TOKENS&quot;</span>: <span class="hljs-string">&quot;1512&quot;</span>,
<span class="hljs-string">&quot;HF_HUB_ENABLE_HF_TRANSFER&quot;</span>: <span class="hljs-string">&quot;1&quot;</span>,
<span class="hljs-string">&quot;HUGGING_FACE_HUB_TOKEN&quot;</span>: get_token(),
<span class="hljs-string">&quot;MESSAGES_API_ENABLED&quot;</span>: <span class="hljs-string">&quot;true&quot;</span>,
},
serving_container_ports=[<span class="hljs-number">8080</span>],
)
model.wait()`,wrap:!1}}),ee=new A({props:{title:"Deploy model on Vertex AI",local:"deploy-model-on-vertex-ai",headingTag:"h2"}}),ne=new j({props:{code:"ZW5kcG9pbnQlMjAlM0QlMjBhaXBsYXRmb3JtLkVuZHBvaW50LmNyZWF0ZShkaXNwbGF5X25hbWUlM0QlMjJMbGFtYS1WaXNpb24tMTFCLUFQSSUyMik=",highlighted:'endpoint = aiplatform.Endpoint.create(display_name=<span class="hljs-string">&quot;Llama-Vision-11B-API&quot;</span>)',wrap:!1}}),ye=new j({props:{code:"ZGVwbG95ZWRfbW9kZWwlMjAlM0QlMjBtb2RlbC5kZXBsb3koJTBBJTIwJTIwJTIwJTIwZW5kcG9pbnQlM0RlbmRwb2ludCUyQyUwQSUyMCUyMCUyMCUyMG1hY2hpbmVfdHlwZSUzRCUyMmcyLXN0YW5kYXJkLTI0JTIyJTJDJTBBJTIwJTIwJTIwJTIwYWNjZWxlcmF0b3JfdHlwZSUzRCUyMk5WSURJQV9MNCUyMiUyQyUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yX2NvdW50JTNEMiUyQyUwQSk=",highlighted:`deployed_model = model.deploy(
endpoint=endpoint,
machine_type=<span class="hljs-string">&quot;g2-standard-24&quot;</span>,
accelerator_type=<span class="hljs-string">&quot;NVIDIA_L4&quot;</span>,
accelerator_count=<span class="hljs-number">2</span>,
)`,wrap:!1}}),re=new A({props:{title:"Online predictions on Vertex AI",local:"online-predictions-on-vertex-ai",headingTag:"h2"}}),Ie=new A({props:{title:"Via Python",local:"via-python",headingTag:"h3"}}),ce=new A({props:{title:"Within the same session",local:"within-the-same-session",headingTag:"h4"}}),de=new j({props:{code:"b3V0cHV0JTIwJTNEJTIwZGVwbG95ZWRfbW9kZWwucHJlZGljdCglMEElMjAlMjAlMjAlMjBpbnN0YW5jZXMlM0QlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJtZXNzYWdlcyUyMiUzQSUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyY29udGVudCUyMiUzQSUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnR5cGUlMjIlM0ElMjAlMjJ0ZXh0JTIyJTJDJTIwJTIydGV4dCUyMiUzQSUyMCUyMldoYXQncyUyMGluJTIwdGhpcyUyMGltYWdlJTNGJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydHlwZSUyMiUzQSUyMCUyMmltYWdlX3VybCUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmltYWdlX3VybCUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnVybCUyMiUzQSUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmh1Z2dpbmdmYWNlJTJGZG9jdW1lbnRhdGlvbi1pbWFnZXMlMkZyZXNvbHZlJTJGbWFpbiUyRnRyYW5zZm9ybWVycyUyRnJhYmJpdC5wbmclMjIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJwYXJhbWV0ZXJzJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybWF4X25ld190b2tlbnMlMjIlM0ElMjAyNTYlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJkb19zYW1wbGUlMjIlM0ElMjBUcnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydG9wX3AlMjIlM0ElMjAwLjk1JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydGVtcGVyYXR1cmUlMjIlM0ElMjAxLjAlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdHJlYW0lMjIlM0ElMjBGYWxzZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3RCUyQyUwQSUyMCUyMCUyMCUyMCU1RCUyQyUwQSklMEFwcmludChvdXRwdXQucHJlZGljdGlvbnMlNUIwJTVEKQ==",highlighted:`output = deployed_model.predict(
instances=[
{
<span class="hljs-string">&quot;messages&quot;</span>: [
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: [
{<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;text&quot;</span>, <span class="hljs-string">&quot;text&quot;</span>: <span class="hljs-string">&quot;What&#x27;s in this image?&quot;</span>},
{
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;image_url&quot;</span>,
<span class="hljs-string">&quot;image_url&quot;</span>: {
<span class="hljs-string">&quot;url&quot;</span>: <span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png&quot;</span>
},
},
],
},
],
<span class="hljs-string">&quot;parameters&quot;</span>: {
<span class="hljs-string">&quot;max_new_tokens&quot;</span>: <span class="hljs-number">256</span>,
<span class="hljs-string">&quot;do_sample&quot;</span>: <span class="hljs-literal">True</span>,
<span class="hljs-string">&quot;top_p&quot;</span>: <span class="hljs-number">0.95</span>,
<span class="hljs-string">&quot;temperature&quot;</span>: <span class="hljs-number">1.0</span>,
<span class="hljs-string">&quot;stream&quot;</span>: <span class="hljs-literal">False</span>,
},
},
],
)
<span class="hljs-built_in">print</span>(output.predictions[<span class="hljs-number">0</span>])`,wrap:!1}}),je=new A({props:{title:"From a different session",local:"from-a-different-session",headingTag:"h4"}}),ge=new j({props:{code:"aW1wb3J0JTIwb3MlMEFmcm9tJTIwZ29vZ2xlLmNsb3VkJTIwaW1wb3J0JTIwYWlwbGF0Zm9ybSUwQSUwQWFpcGxhdGZvcm0uaW5pdChwcm9qZWN0JTNEb3MuZ2V0ZW52KCUyMlBST0pFQ1RfSUQlMjIpJTJDJTIwbG9jYXRpb24lM0Rvcy5nZXRlbnYoJTIyTE9DQVRJT04lMjIpKSUwQSUwQWVuZHBvaW50X2Rpc3BsYXlfbmFtZSUyMCUzRCUyMCUyMkxsYW1hLVZpc2lvbi0xMUItQVBJJTIyJTIwJTIwJTIzJTIwVE9ETyUzQSUyMGNoYW5nZSUyMHRvJTIweW91ciUyMGVuZHBvaW50JTIwZGlzcGxheSUyMG5hbWUlMEElMEElMjMlMjBJdGVyYXRlcyUyMG92ZXIlMjBhbGwlMjB0aGUlMjBWZXJ0ZXglMjBBSSUyMEVuZHBvaW50cyUyMHdpdGhpbiUyMHRoZSUyMGN1cnJlbnQlMjBwcm9qZWN0JTIwYW5kJTIwa2VlcHMlMjB0aGUlMjBmaXJzdCUyMG1hdGNoJTIwKGlmJTIwYW55KSUyQyUyMG90aGVyd2lzZSUyMHNldCUyMHRvJTIwTm9uZSUwQUVORFBPSU5UX0lEJTIwJTNEJTIwbmV4dCglMEElMjAlMjAlMjAlMjAoZW5kcG9pbnQubmFtZSUyMGZvciUyMGVuZHBvaW50JTIwaW4lMjBhaXBsYXRmb3JtLkVuZHBvaW50Lmxpc3QoKSUyMGlmJTIwZW5kcG9pbnQuZGlzcGxheV9uYW1lJTIwJTNEJTNEJTIwZW5kcG9pbnRfZGlzcGxheV9uYW1lKSUyQyUyME5vbmUlMEEpJTBBYXNzZXJ0JTIwRU5EUE9JTlRfSUQlMkMlMjAoJTBBJTIwJTIwJTIwJTIwJTIyJTYwRU5EUE9JTlRfSUQlNjAlMjBpcyUyMG5vdCUyMHNldCUyQyUyMHBsZWFzZSUyMG1ha2UlMjBzdXJlJTIwdGhhdCUyMHRoZSUyMCU2MGVuZHBvaW50X2Rpc3BsYXlfbmFtZSU2MCUyMGlzJTIwY29ycmVjdCUyMGF0JTIwJTIyJTBBJTIwJTIwJTIwJTIwZiUyMmh0dHBzJTNBJTJGJTJGY29uc29sZS5jbG91ZC5nb29nbGUuY29tJTJGdmVydGV4LWFpJTJGb25saW5lLXByZWRpY3Rpb24lMkZlbmRwb2ludHMlM0Zwcm9qZWN0JTNEJTdCb3MuZ2V0ZW52KCdQUk9KRUNUX0lEJyklN0QlMjIlMEEpJTBBJTBBZW5kcG9pbnQlMjAlM0QlMjBhaXBsYXRmb3JtLkVuZHBvaW50KCUwQSUyMCUyMCUyMCUyMGYlMjJwcm9qZWN0cyUyRiU3Qm9zLmdldGVudignUFJPSkVDVF9JRCcpJTdEJTJGbG9jYXRpb25zJTJGJTdCb3MuZ2V0ZW52KCdMT0NBVElPTicpJTdEJTJGZW5kcG9pbnRzJTJGJTdCRU5EUE9JTlRfSUQlN0QlMjIlMEEpJTBBb3V0cHV0JTIwJTNEJTIwZW5kcG9pbnQucHJlZGljdCglMEElMjAlMjAlMjAlMjBpbnN0YW5jZXMlM0QlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJtZXNzYWdlcyUyMiUzQSUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyY29udGVudCUyMiUzQSUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnR5cGUlMjIlM0ElMjAlMjJ0ZXh0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydGV4dCUyMiUzQSUyMCUyMkhvdyUyMGxvbmclMjBkb2VzJTIwaXQlMjB0YWtlJTIwZnJvbSUyMGludm9pY2UlMjBkYXRlJTIwdG8lMjBkdWUlMjBkYXRlJTNGJTIwQmUlMjBzaG9ydCUyMGFuZCUyMGNvbmNpc2UuJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydHlwZSUyMiUzQSUyMCUyMmltYWdlX3VybCUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmltYWdlX3VybCUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnVybCUyMiUzQSUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmh1Z2dpbmdmYWNlJTJGcmVsZWFzZS1hc3NldHMlMkZyZXNvbHZlJTJGbWFpbiUyRmludm9pY2UucG5nJTIyJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycGFyYW1ldGVycyUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm1heF9uZXdfdG9rZW5zJTIyJTNBJTIwMjU2JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZG9fc2FtcGxlJTIyJTNBJTIwVHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnRvcF9wJTIyJTNBJTIwMC45NSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnRlbXBlcmF0dXJlJTIyJTNBJTIwMS4wJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyc3RyZWFtJTIyJTNBJTIwRmFsc2UlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEEpJTBBcHJpbnQob3V0cHV0LnByZWRpY3Rpb25zJTVCMCU1RCk=",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> google.cloud <span class="hljs-keyword">import</span> aiplatform
aiplatform.init(project=os.getenv(<span class="hljs-string">&quot;PROJECT_ID&quot;</span>), location=os.getenv(<span class="hljs-string">&quot;LOCATION&quot;</span>))
endpoint_display_name = <span class="hljs-string">&quot;Llama-Vision-11B-API&quot;</span> <span class="hljs-comment"># <span class="hljs-doctag">TODO:</span> change to your endpoint display name</span>
<span class="hljs-comment"># Iterates over all the Vertex AI Endpoints within the current project and keeps the first match (if any), otherwise set to None</span>
ENDPOINT_ID = <span class="hljs-built_in">next</span>(
(endpoint.name <span class="hljs-keyword">for</span> endpoint <span class="hljs-keyword">in</span> aiplatform.Endpoint.<span class="hljs-built_in">list</span>() <span class="hljs-keyword">if</span> endpoint.display_name == endpoint_display_name), <span class="hljs-literal">None</span>
)
<span class="hljs-keyword">assert</span> ENDPOINT_ID, (
<span class="hljs-string">&quot;\`ENDPOINT_ID\` is not set, please make sure that the \`endpoint_display_name\` is correct at &quot;</span>
<span class="hljs-string">f&quot;https://console.cloud.google.com/vertex-ai/online-prediction/endpoints?project=<span class="hljs-subst">{os.getenv(<span class="hljs-string">&#x27;PROJECT_ID&#x27;</span>)}</span>&quot;</span>
)
endpoint = aiplatform.Endpoint(
<span class="hljs-string">f&quot;projects/<span class="hljs-subst">{os.getenv(<span class="hljs-string">&#x27;PROJECT_ID&#x27;</span>)}</span>/locations/<span class="hljs-subst">{os.getenv(<span class="hljs-string">&#x27;LOCATION&#x27;</span>)}</span>/endpoints/<span class="hljs-subst">{ENDPOINT_ID}</span>&quot;</span>
)
output = endpoint.predict(
instances=[
{
<span class="hljs-string">&quot;messages&quot;</span>: [
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: [
{
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;text&quot;</span>,
<span class="hljs-string">&quot;text&quot;</span>: <span class="hljs-string">&quot;How long does it take from invoice date to due date? Be short and concise.&quot;</span>,
},
{
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;image_url&quot;</span>,
<span class="hljs-string">&quot;image_url&quot;</span>: {
<span class="hljs-string">&quot;url&quot;</span>: <span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/release-assets/resolve/main/invoice.png&quot;</span>
},
},
],
},
],
<span class="hljs-string">&quot;parameters&quot;</span>: {
<span class="hljs-string">&quot;max_new_tokens&quot;</span>: <span class="hljs-number">256</span>,
<span class="hljs-string">&quot;do_sample&quot;</span>: <span class="hljs-literal">True</span>,
<span class="hljs-string">&quot;top_p&quot;</span>: <span class="hljs-number">0.95</span>,
<span class="hljs-string">&quot;temperature&quot;</span>: <span class="hljs-number">1.0</span>,
<span class="hljs-string">&quot;stream&quot;</span>: <span class="hljs-literal">False</span>,
},
},
],
)
<span class="hljs-built_in">print</span>(output.predictions[<span class="hljs-number">0</span>])`,wrap:!1}}),fe=new A({props:{title:"Via the Vertex AI Online Prediction UI",local:"via-the-vertex-ai-online-prediction-ui",headingTag:"h3"}}),ve=new j({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyaW5zdGFuY2VzJTIyJTNBJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybWVzc2FnZXMlMjIlM0ElMjAlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ0eXBlJTIyJTNBJTIwJTIydGV4dCUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnRleHQlMjIlM0ElMjAlMjJXaGF0J3MlMjBpbiUyMHRoaXMlMjBpbWFnZSUzRiUyMiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnR5cGUlMjIlM0ElMjAlMjJpbWFnZV91cmwlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJpbWFnZV91cmwlMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ1cmwlMjIlM0ElMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZodWdnaW5nZmFjZSUyRmRvY3VtZW50YXRpb24taW1hZ2VzJTJGcmVzb2x2ZSUyRm1haW4lMkZ0cmFuc2Zvcm1lcnMlMkZyYWJiaXQucG5nJTIyJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTVEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycGFyYW1ldGVycyUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm1heF9uZXdfdG9rZW5zJTIyJTNBJTIwMjU2JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZG9fc2FtcGxlJTIyJTNBJTIwdHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnRvcF9wJTIyJTNBJTIwMC45NSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnRlbXBlcmF0dXJlJTIyJTNBJTIwMS4wJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyc3RyZWFtJTIyJTNBJTIwZmFsc2UlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMEElMjAlMjAlMjAlMjAlNUQlMEElN0Q=",highlighted:`<span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;instances&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span>
<span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;messages&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span>
<span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;role&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;user&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;content&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span>
<span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;text&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;text&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;What&#x27;s in this image?&quot;</span>
<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;image_url&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;image_url&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;url&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png&quot;</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">]</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;parameters&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;max_new_tokens&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-number">256</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;do_sample&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;top_p&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.95</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;temperature&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1.0</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;stream&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">false</span></span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">]</span>
<span class="hljs-punctuation">}</span>`,wrap:!1}}),Ge=new A({props:{title:"Resource clean-up",local:"resource-clean-up",headingTag:"h2"}}),Be=new j({props:{code:"ZGVwbG95ZWRfbW9kZWwudW5kZXBsb3lfYWxsKCklMEFkZXBsb3llZF9tb2RlbC5kZWxldGUoKSUwQW1vZGVsLmRlbGV0ZSgp",highlighted:`deployed_model.undeploy_all()
deployed_model.delete()
model.delete()`,wrap:!1}}),E=new Kt({props:{$$slots:{default:[Kl]},$$scope:{ctx:f}}}),Ve=new Dl({props:{source:"https://github.com/huggingface/Google-Cloud-Containers/blob/main/docs/source/examples/vertex-ai-notebooks-deploy-llama-vision-on-vertex-ai.mdx"}}),{c(){p=o("meta"),m=s(),c=o("p"),I=s(),y(h.$$.fragment),u=s(),d=o("p"),d.innerHTML=qe,C=s(),g=o("p"),g.innerHTML=tl,Le=s(),y(b.$$.fragment),Se=s(),G=o("p"),G.innerHTML=ll,Qe=s(),y($.$$.fragment),ke=s(),x=o("p"),x.innerHTML=nl,We=s(),B=o("p"),B.innerHTML=sl,He=s(),y(_.$$.fragment),Re=s(),Z=o("p"),Z.textContent=al,Fe=s(),y(V.$$.fragment),Xe=s(),q=o("p"),q.textContent=ol,Ye=s(),y(N.$$.fragment),Pe=s(),L=o("p"),L.textContent=Ml,De=s(),y(S.$$.fragment),ze=s(),Q=o("p"),Q.innerHTML=il,Oe=s(),y(k.$$.fragment),Ke=s(),y(W.$$.fragment),et=s(),H=o("p"),H.innerHTML=pl,tt=s(),R=o("p"),R.innerHTML=yl,lt=s(),y(F.$$.fragment),nt=s(),y(X.$$.fragment),st=s(),Y=o("p"),Y.innerHTML=Tl,at=s(),P=o("p"),P.innerHTML=Jl,ot=s(),D=o("ul"),D.innerHTML=rl,Mt=s(),z=o("p"),z.innerHTML=Ul,it=s(),y(v.$$.fragment),pt=s(),y(O.$$.fragment),yt=s(),K=o("p"),K.innerHTML=wl,Tt=s(),y(ee.$$.fragment),Jt=s(),te=o("p"),te.textContent=Il,rt=s(),le=o("p"),le.innerHTML=cl,Ut=s(),y(ne.$$.fragment),wt=s(),se=o("p"),se.innerHTML=ul,It=s(),ae=o("p"),ae.textContent=dl,ct=s(),oe=o("p"),oe.innerHTML=Cl,ut=s(),Me=o("p"),Me.innerHTML=jl,dt=s(),ie=o("ul"),ie.innerHTML=ml,Ct=s(),pe=o("p"),pe.innerHTML=hl,jt=s(),y(ye.$$.fragment),mt=s(),Te=o("p"),Te.innerHTML=gl,ht=s(),Je=o("p"),Je.innerHTML=Al,gt=s(),y(re.$$.fragment),At=s(),Ue=o("p"),Ue.innerHTML=fl,ft=s(),we=o("p"),we.innerHTML=bl,bt=s(),y(Ie.$$.fragment),vt=s(),y(ce.$$.fragment),Et=s(),ue=o("p"),ue.innerHTML=vl,Gt=s(),y(de.$$.fragment),$t=s(),Ce=o("table"),Ce.innerHTML=El,xt=s(),y(je.$$.fragment),Bt=s(),me=o("p"),me.innerHTML=Gl,_t=s(),he=o("p"),he.innerHTML=$l,Zt=s(),y(ge.$$.fragment),Vt=s(),Ae=o("table"),Ae.innerHTML=xl,qt=s(),y(fe.$$.fragment),Nt=s(),be=o("p"),be.textContent=Bl,Lt=s(),y(ve.$$.fragment),St=s(),Ee=o("p"),Ee.innerHTML=_l,Qt=s(),y(Ge.$$.fragment),kt=s(),$e=o("p"),$e.textContent=Zl,Wt=s(),xe=o("ul"),xe.innerHTML=Vl,Ht=s(),y(Be.$$.fragment),Rt=s(),_e=o("p"),_e.textContent=ql,Ft=s(),Ze=o("ul"),Ze.innerHTML=Nl,Xt=s(),Yt=o("hr"),Pt=s(),y(E.$$.fragment),Dt=s(),y(Ve.$$.fragment),zt=s(),Ne=o("p"),this.h()},l(e){const t=Yl("svelte-u9bgzb",document.head);p=M(t,"META",{name:!0,content:!0}),t.forEach(l),m=a(e),c=M(e,"P",{}),kl(c).forEach(l),I=a(e),T(h.$$.fragment,e),u=a(e),d=M(e,"P",{"data-svelte-h":!0}),i(d)!=="svelte-1alulrj"&&(d.innerHTML=qe),C=a(e),g=M(e,"P",{"data-svelte-h":!0}),i(g)!=="svelte-1905465"&&(g.innerHTML=tl),Le=a(e),T(b.$$.fragment,e),Se=a(e),G=M(e,"P",{"data-svelte-h":!0}),i(G)!=="svelte-o74xqu"&&(G.innerHTML=ll),Qe=a(e),T($.$$.fragment,e),ke=a(e),x=M(e,"P",{"data-svelte-h":!0}),i(x)!=="svelte-1o67gfb"&&(x.innerHTML=nl),We=a(e),B=M(e,"P",{"data-svelte-h":!0}),i(B)!=="svelte-1ebrquh"&&(B.innerHTML=sl),He=a(e),T(_.$$.fragment,e),Re=a(e),Z=M(e,"P",{"data-svelte-h":!0}),i(Z)!=="svelte-1i8n6rq"&&(Z.textContent=al),Fe=a(e),T(V.$$.fragment,e),Xe=a(e),q=M(e,"P",{"data-svelte-h":!0}),i(q)!=="svelte-15nvzfz"&&(q.textContent=ol),Ye=a(e),T(N.$$.fragment,e),Pe=a(e),L=M(e,"P",{"data-svelte-h":!0}),i(L)!=="svelte-1nm8o2l"&&(L.textContent=Ml),De=a(e),T(S.$$.fragment,e),ze=a(e),Q=M(e,"P",{"data-svelte-h":!0}),i(Q)!=="svelte-14aynbe"&&(Q.innerHTML=il),Oe=a(e),T(k.$$.fragment,e),Ke=a(e),T(W.$$.fragment,e),et=a(e),H=M(e,"P",{"data-svelte-h":!0}),i(H)!=="svelte-1tq8rb1"&&(H.innerHTML=pl),tt=a(e),R=M(e,"P",{"data-svelte-h":!0}),i(R)!=="svelte-pgxbss"&&(R.innerHTML=yl),lt=a(e),T(F.$$.fragment,e),nt=a(e),T(X.$$.fragment,e),st=a(e),Y=M(e,"P",{"data-svelte-h":!0}),i(Y)!=="svelte-1voap9p"&&(Y.innerHTML=Tl),at=a(e),P=M(e,"P",{"data-svelte-h":!0}),i(P)!=="svelte-zklutt"&&(P.innerHTML=Jl),ot=a(e),D=M(e,"UL",{"data-svelte-h":!0}),i(D)!=="svelte-1hjc6dt"&&(D.innerHTML=rl),Mt=a(e),z=M(e,"P",{"data-svelte-h":!0}),i(z)!=="svelte-1gbrlfd"&&(z.innerHTML=Ul),it=a(e),T(v.$$.fragment,e),pt=a(e),T(O.$$.fragment,e),yt=a(e),K=M(e,"P",{"data-svelte-h":!0}),i(K)!=="svelte-le5c8x"&&(K.innerHTML=wl),Tt=a(e),T(ee.$$.fragment,e),Jt=a(e),te=M(e,"P",{"data-svelte-h":!0}),i(te)!=="svelte-2nwexf"&&(te.textContent=Il),rt=a(e),le=M(e,"P",{"data-svelte-h":!0}),i(le)!=="svelte-1lovlvw"&&(le.innerHTML=cl),Ut=a(e),T(ne.$$.fragment,e),wt=a(e),se=M(e,"P",{"data-svelte-h":!0}),i(se)!=="svelte-13pvwn4"&&(se.innerHTML=ul),It=a(e),ae=M(e,"P",{"data-svelte-h":!0}),i(ae)!=="svelte-it668a"&&(ae.textContent=dl),ct=a(e),oe=M(e,"P",{"data-svelte-h":!0}),i(oe)!=="svelte-njbdvc"&&(oe.innerHTML=Cl),ut=a(e),Me=M(e,"P",{"data-svelte-h":!0}),i(Me)!=="svelte-1ezlr9j"&&(Me.innerHTML=jl),dt=a(e),ie=M(e,"UL",{"data-svelte-h":!0}),i(ie)!=="svelte-1ibxwcd"&&(ie.innerHTML=ml),Ct=a(e),pe=M(e,"P",{"data-svelte-h":!0}),i(pe)!=="svelte-1d6p8k5"&&(pe.innerHTML=hl),jt=a(e),T(ye.$$.fragment,e),mt=a(e),Te=M(e,"P",{"data-svelte-h":!0}),i(Te)!=="svelte-mraq68"&&(Te.innerHTML=gl),ht=a(e),Je=M(e,"P",{"data-svelte-h":!0}),i(Je)!=="svelte-1s0utoz"&&(Je.innerHTML=Al),gt=a(e),T(re.$$.fragment,e),At=a(e),Ue=M(e,"P",{"data-svelte-h":!0}),i(Ue)!=="svelte-166ya0u"&&(Ue.innerHTML=fl),ft=a(e),we=M(e,"P",{"data-svelte-h":!0}),i(we)!=="svelte-znwdqe"&&(we.innerHTML=bl),bt=a(e),T(Ie.$$.fragment,e),vt=a(e),T(ce.$$.fragment,e),Et=a(e),ue=M(e,"P",{"data-svelte-h":!0}),i(ue)!=="svelte-hnmp6i"&&(ue.innerHTML=vl),Gt=a(e),T(de.$$.fragment,e),$t=a(e),Ce=M(e,"TABLE",{"data-svelte-h":!0}),i(Ce)!=="svelte-ta69sk"&&(Ce.innerHTML=El),xt=a(e),T(je.$$.fragment,e),Bt=a(e),me=M(e,"P",{"data-svelte-h":!0}),i(me)!=="svelte-gm5hxo"&&(me.innerHTML=Gl),_t=a(e),he=M(e,"P",{"data-svelte-h":!0}),i(he)!=="svelte-31hjql"&&(he.innerHTML=$l),Zt=a(e),T(ge.$$.fragment,e),Vt=a(e),Ae=M(e,"TABLE",{"data-svelte-h":!0}),i(Ae)!=="svelte-hsy8it"&&(Ae.innerHTML=xl),qt=a(e),T(fe.$$.fragment,e),Nt=a(e),be=M(e,"P",{"data-svelte-h":!0}),i(be)!=="svelte-1ajbcx5"&&(be.textContent=Bl),Lt=a(e),T(ve.$$.fragment,e),St=a(e),Ee=M(e,"P",{"data-svelte-h":!0}),i(Ee)!=="svelte-19i1uxg"&&(Ee.innerHTML=_l),Qt=a(e),T(Ge.$$.fragment,e),kt=a(e),$e=M(e,"P",{"data-svelte-h":!0}),i($e)!=="svelte-l6tvv1"&&($e.textContent=Zl),Wt=a(e),xe=M(e,"UL",{"data-svelte-h":!0}),i(xe)!=="svelte-16ibgml"&&(xe.innerHTML=Vl),Ht=a(e),T(Be.$$.fragment,e),Rt=a(e),_e=M(e,"P",{"data-svelte-h":!0}),i(_e)!=="svelte-sgegyq"&&(_e.textContent=ql),Ft=a(e),Ze=M(e,"UL",{"data-svelte-h":!0}),i(Ze)!=="svelte-1p833nz"&&(Ze.innerHTML=Nl),Xt=a(e),Yt=M(e,"HR",{}),Pt=a(e),T(E.$$.fragment,e),Dt=a(e),T(Ve.$$.fragment,e),zt=a(e),Ne=M(e,"P",{}),kl(Ne).forEach(l),this.h()},h(){Wl(p,"name","hf:doc:metadata"),Wl(p,"content",tn)},m(e,t){Pl(document.head,p),n(e,m,t),n(e,c,t),n(e,I,t),J(h,e,t),n(e,u,t),n(e,d,t),n(e,C,t),n(e,g,t),n(e,Le,t),J(b,e,t),n(e,Se,t),n(e,G,t),n(e,Qe,t),J($,e,t),n(e,ke,t),n(e,x,t),n(e,We,t),n(e,B,t),n(e,He,t),J(_,e,t),n(e,Re,t),n(e,Z,t),n(e,Fe,t),J(V,e,t),n(e,Xe,t),n(e,q,t),n(e,Ye,t),J(N,e,t),n(e,Pe,t),n(e,L,t),n(e,De,t),J(S,e,t),n(e,ze,t),n(e,Q,t),n(e,Oe,t),J(k,e,t),n(e,Ke,t),J(W,e,t),n(e,et,t),n(e,H,t),n(e,tt,t),n(e,R,t),n(e,lt,t),J(F,e,t),n(e,nt,t),J(X,e,t),n(e,st,t),n(e,Y,t),n(e,at,t),n(e,P,t),n(e,ot,t),n(e,D,t),n(e,Mt,t),n(e,z,t),n(e,it,t),J(v,e,t),n(e,pt,t),J(O,e,t),n(e,yt,t),n(e,K,t),n(e,Tt,t),J(ee,e,t),n(e,Jt,t),n(e,te,t),n(e,rt,t),n(e,le,t),n(e,Ut,t),J(ne,e,t),n(e,wt,t),n(e,se,t),n(e,It,t),n(e,ae,t),n(e,ct,t),n(e,oe,t),n(e,ut,t),n(e,Me,t),n(e,dt,t),n(e,ie,t),n(e,Ct,t),n(e,pe,t),n(e,jt,t),J(ye,e,t),n(e,mt,t),n(e,Te,t),n(e,ht,t),n(e,Je,t),n(e,gt,t),J(re,e,t),n(e,At,t),n(e,Ue,t),n(e,ft,t),n(e,we,t),n(e,bt,t),J(Ie,e,t),n(e,vt,t),J(ce,e,t),n(e,Et,t),n(e,ue,t),n(e,Gt,t),J(de,e,t),n(e,$t,t),n(e,Ce,t),n(e,xt,t),J(je,e,t),n(e,Bt,t),n(e,me,t),n(e,_t,t),n(e,he,t),n(e,Zt,t),J(ge,e,t),n(e,Vt,t),n(e,Ae,t),n(e,qt,t),J(fe,e,t),n(e,Nt,t),n(e,be,t),n(e,Lt,t),J(ve,e,t),n(e,St,t),n(e,Ee,t),n(e,Qt,t),J(Ge,e,t),n(e,kt,t),n(e,$e,t),n(e,Wt,t),n(e,xe,t),n(e,Ht,t),J(Be,e,t),n(e,Rt,t),n(e,_e,t),n(e,Ft,t),n(e,Ze,t),n(e,Xt,t),n(e,Yt,t),n(e,Pt,t),J(E,e,t),n(e,Dt,t),J(Ve,e,t),n(e,zt,t),n(e,Ne,t),Ot=!0},p(e,[t]){const Ll={};t&2&&(Ll.$$scope={dirty:t,ctx:e}),b.$set(Ll);const Sl={};t&2&&(Sl.$$scope={dirty:t,ctx:e}),v.$set(Sl);const Ql={};t&2&&(Ql.$$scope={dirty:t,ctx:e}),E.$set(Ql)},i(e){Ot||(r(h.$$.fragment,e),r(b.$$.fragment,e),r($.$$.fragment,e),r(_.$$.fragment,e),r(V.$$.fragment,e),r(N.$$.fragment,e),r(S.$$.fragment,e),r(k.$$.fragment,e),r(W.$$.fragment,e),r(F.$$.fragment,e),r(X.$$.fragment,e),r(v.$$.fragment,e),r(O.$$.fragment,e),r(ee.$$.fragment,e),r(ne.$$.fragment,e),r(ye.$$.fragment,e),r(re.$$.fragment,e),r(Ie.$$.fragment,e),r(ce.$$.fragment,e),r(de.$$.fragment,e),r(je.$$.fragment,e),r(ge.$$.fragment,e),r(fe.$$.fragment,e),r(ve.$$.fragment,e),r(Ge.$$.fragment,e),r(Be.$$.fragment,e),r(E.$$.fragment,e),r(Ve.$$.fragment,e),Ot=!0)},o(e){U(h.$$.fragment,e),U(b.$$.fragment,e),U($.$$.fragment,e),U(_.$$.fragment,e),U(V.$$.fragment,e),U(N.$$.fragment,e),U(S.$$.fragment,e),U(k.$$.fragment,e),U(W.$$.fragment,e),U(F.$$.fragment,e),U(X.$$.fragment,e),U(v.$$.fragment,e),U(O.$$.fragment,e),U(ee.$$.fragment,e),U(ne.$$.fragment,e),U(ye.$$.fragment,e),U(re.$$.fragment,e),U(Ie.$$.fragment,e),U(ce.$$.fragment,e),U(de.$$.fragment,e),U(je.$$.fragment,e),U(ge.$$.fragment,e),U(fe.$$.fragment,e),U(ve.$$.fragment,e),U(Ge.$$.fragment,e),U(Be.$$.fragment,e),U(E.$$.fragment,e),U(Ve.$$.fragment,e),Ot=!1},d(e){e&&(l(m),l(c),l(I),l(u),l(d),l(C),l(g),l(Le),l(Se),l(G),l(Qe),l(ke),l(x),l(We),l(B),l(He),l(Re),l(Z),l(Fe),l(Xe),l(q),l(Ye),l(Pe),l(L),l(De),l(ze),l(Q),l(Oe),l(Ke),l(et),l(H),l(tt),l(R),l(lt),l(nt),l(st),l(Y),l(at),l(P),l(ot),l(D),l(Mt),l(z),l(it),l(pt),l(yt),l(K),l(Tt),l(Jt),l(te),l(rt),l(le),l(Ut),l(wt),l(se),l(It),l(ae),l(ct),l(oe),l(ut),l(Me),l(dt),l(ie),l(Ct),l(pe),l(jt),l(mt),l(Te),l(ht),l(Je),l(gt),l(At),l(Ue),l(ft),l(we),l(bt),l(vt),l(Et),l(ue),l(Gt),l($t),l(Ce),l(xt),l(Bt),l(me),l(_t),l(he),l(Zt),l(Vt),l(Ae),l(qt),l(Nt),l(be),l(Lt),l(St),l(Ee),l(Qt),l(kt),l($e),l(Wt),l(xe),l(Ht),l(Rt),l(_e),l(Ft),l(Ze),l(Xt),l(Yt),l(Pt),l(Dt),l(zt),l(Ne)),l(p),w(h,e),w(b,e),w($,e),w(_,e),w(V,e),w(N,e),w(S,e),w(k,e),w(W,e),w(F,e),w(X,e),w(v,e),w(O,e),w(ee,e),w(ne,e),w(ye,e),w(re,e),w(Ie,e),w(ce,e),w(de,e),w(je,e),w(ge,e),w(fe,e),w(ve,e),w(Ge,e),w(Be,e),w(E,e),w(Ve,e)}}}const tn='{"title":"Deploy Llama 3.2 11B Vision with TGI DLC on Vertex AI","local":"deploy-llama-32-11b-vision-with-tgi-dlc-on-vertex-ai","sections":[{"title":"Setup / Configuration","local":"setup--configuration","sections":[],"depth":2},{"title":"Register model on Vertex AI","local":"register-model-on-vertex-ai","sections":[],"depth":2},{"title":"Deploy model on Vertex AI","local":"deploy-model-on-vertex-ai","sections":[],"depth":2},{"title":"Online predictions on Vertex AI","local":"online-predictions-on-vertex-ai","sections":[{"title":"Via Python","local":"via-python","sections":[{"title":"Within the same session","local":"within-the-same-session","sections":[],"depth":4},{"title":"From a different session","local":"from-a-different-session","sections":[],"depth":4}],"depth":3},{"title":"Via the Vertex AI Online Prediction UI","local":"via-the-vertex-ai-online-prediction-ui","sections":[],"depth":3}],"depth":2},{"title":"Resource clean-up","local":"resource-clean-up","sections":[],"depth":2}],"depth":1}';function ln(f){return Rl(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class pn extends Fl{constructor(p){super(),Xl(this,p,ln,en,Hl,{})}}export{pn as component};

Xet Storage Details

Size:
57.7 kB
·
Xet hash:
95dbb27e2118b7405d0f57e2a5343ead615b06881fbc1059deb9d33e340504b5

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.