Buckets:

rtrm's picture
download
raw
76.9 kB
import{s as Es,o as _s,n as b}from"../chunks/scheduler.b108d059.js";import{S as Zs,i as Rs,g as r,s,r as c,A as Hs,h as u,f as n,c as o,j as Ls,u as m,x as p,k as ks,y as Ws,a as l,v as d,d as M,t as h,w as y}from"../chunks/index.008de539.js";import{T as U}from"../chunks/Tip.aeb15ab7.js";import{C as T}from"../chunks/CodeBlock.7b00c886.js";import{H as J,E as Bs}from"../chunks/getInferenceSnippets.e921e30e.js";function qs(w){let a,f='Note that this example will go through the Python SDK / Azure CLI programmatic deployment, if you’d rather prefer using the one-click deployment experience, please check <a href="https://huggingface.co/docs/microsoft-azure/guides/one-click-deployment-azure-ml" rel="nofollow">One-click deployments from the Hugging Face Hub on Azure ML</a>. But note that when deploying from the Hugging Face Hub, the endpoint + deployment will be created within Azure ML instead of within Azure AI Foundry, whereas this example focuses on Azure AI Foundry Hub deployments (also made available on Azure ML, but not the other way around).';return{c(){a=r("p"),a.innerHTML=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-147duvx"&&(a.innerHTML=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function Qs(w){let a,f="Note that you can select any LLM available on the Hugging Face Hub with the “Deploy to AzureML” option enabled, or directly select any of the LLMs available in either the Azure ML or Azure AI Foundry Hub Model Catalog under the “HuggingFace” collection (note that for Azure AI Foundry the Hugging Face Collection will only be available for Hub-based projects).";return{c(){a=r("p"),a.textContent=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-1a5zyti"&&(a.textContent=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function Vs(w){let a,f='You can also create the Azure Resource Group <a href="https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/manage-resource-groups-portal" rel="nofollow">via the Azure Portal</a>, or <a href="https://learn.microsoft.com/en-us/azure/developer/python/sdk/examples/azure-sdk-example-resource-group?tabs=bash" rel="nofollow">via the Azure Resource Management Python SDK</a> (requires it to be installed as <code>pip install azure-mgmt-resource</code> in advance).';return{c(){a=r("p"),a.innerHTML=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-9y4bdx"&&(a.innerHTML=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function Fs(w){let a,f="Note that the main difference with an standard Azure ML Workspace is that the Azure AI Foundry Hub command requires you to specify the <code>--kind hub</code>, removing it would create a standard Azure ML Workspace instead, so you wouldn’t benefit from the features that the Azure AI Foundry brings. But, when you create an Azure AI Foundry Hub, you can still benefit from all the features that Azure ML brings, since the Azure AI Foundry Hub will still rely on Azure ML, but not the other way around.";return{c(){a=r("p"),a.innerHTML=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-md1zaj"&&(a.innerHTML=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function xs(w){let a,f='The <code>--filtered-kinds</code> argument has been recently included as of <a href="https://learn.microsoft.com/en-us/azure/machine-learning/azure-machine-learning-release-notes-cli-v2?view=azureml-api-2#azure-machine-learning-cli-v2-v-2370" rel="nofollow">Azure ML CLI 2.37.0</a>, meaning that you may need to upgrade <code>az ml</code> as <code>az extension update --name ml</code>.';return{c(){a=r("p"),a.innerHTML=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-13uwmnt"&&(a.innerHTML=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function Gs(w){let a,f='You can also create the Azure AI Foundry Hub <a href="https://learn.microsoft.com/en-us/azure/ai-foundry/how-to/create-secure-ai-hub" rel="nofollow">via the Azure Portal</a>, or <a href="https://learn.microsoft.com/en-us/azure/ai-foundry/how-to/develop/create-hub-project-sdk?tabs=python" rel="nofollow">via the Azure ML Python SDK</a>, among other options listed in <a href="https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/ai-resources" rel="nofollow">Manage AI Hub Resources</a>.';return{c(){a=r("p"),a.innerHTML=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-1dqchg3"&&(a.innerHTML=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function Ns(w){let a,f="Note that endpoint names must to be globally unique per region i.e., even if you don’t have any endpoint named that way running under your subscription, if the name is reserved by another Azure customer, then you won’t be able to use the same name. Adding a timestamp or a custom identifier is recommended to prevent running into HTTP 400 validation issues when trying to deploy an endpoint with an already locked / reserved name. Also the endpoint name must be between 3 and 32 characters long.";return{c(){a=r("p"),a.textContent=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-zrbum5"&&(a.textContent=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function Ss(w){let a,f="On standard Azure ML deployments you’d need to create the <code>MLClient</code> using the Azure ML Workspace as the <code>workspace_name</code> whereas for Azure AI Foundry, you need to provide the Azure AI Foundry Hub name as the <code>workspace_name</code> instead, and that will deploy the endpoint under the Azure AI Foundry too.";return{c(){a=r("p"),a.innerHTML=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-fg1top"&&(a.innerHTML=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function Xs(w){let a,f='Since for some models and inference engines you need to run those on a GPU-accelerated instance, you may need to request a quota increase for some of the supported instances as per the model you want to deploy. Also, keep into consideration that each model comes with a list of all the supported instances, being the recommended one for each tier the lower instance in terms of available VRAM. Read more about quota increase requests for Azure ML at <a href="https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-quotas?view=azureml-api-2" rel="nofollow">Manage and increase quotas and limits for resources with Azure Machine Learning</a>.';return{c(){a=r("p"),a.innerHTML=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-3styqc"&&(a.innerHTML=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function Ps(w){let a,f="In Azure AI Foundry the endpoint will only be listed within the “My assets -> Models + endpoints” tab once the deployment is created, not before as in Azure ML where the endpoint is shown even if it doesn’t contain any active or in-progress deployments.";return{c(){a=r("p"),a.textContent=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-13fiegq"&&(a.textContent=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function Ys(w){let a,f="Note that whilst the Azure AI Endpoint creation is relatively fast, the deployment will take longer since it needs to allocate the resources on Azure so expect it to take ~10-15 minutes, but it could as well take longer depending on the instance provisioning and availability.";return{c(){a=r("p"),a.textContent=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-1q2pvid"&&(a.textContent=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function Ds(w){let a,f="Note that below only some of the options are listed, but you can send requests to the deployed endpoint as long as you send the HTTP requests with the <code>azureml-model-deployment</code> header set to the name of the Azure AI Deployment (not the Endpoint), and have the necessary authentication token / key to send requests to the given endpoint; then you can send HTTP request to all the routes that the backend engine is exposing, not only to the scoring route.";return{c(){a=r("p"),a.innerHTML=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-1fmd54g"&&(a.innerHTML=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function Os(w){let a,f='Support for Hugging Face models via <a href="https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ai/azure-ai-inference" rel="nofollow"><code>azure-ai-inference</code> Python SDK</a> is still a work in progress, but that will be included soon and set as the recommended inference method, stay tuned!';return{c(){a=r("p"),a.innerHTML=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-1c2r502"&&(a.innerHTML=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function Ks(w){let a,f="Note that the Azure ML Python SDK requires a path to a JSON file when invoking the endpoints, meaning that whatever payload you want to send to the endpoint will need to be first converted into a JSON file, whilst that only applies to the requests sent via the Azure ML Python SDK.";return{c(){a=r("p"),a.textContent=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-1tlguht"&&(a.textContent=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function eo(w){let a,f="Alternatively, you can also build the API URL manually as it follows, since the URIs are globally unique per region, meaning that there will only be one endpoint named the same way within the same region:",i,g,j,A,C="Or just retrieve it from either the Azure AI Foundry or the Azure ML Studio.",G;return g=new T({props:{code:"YXBpX3VybCUyMCUzRCUyMGYlMjJodHRwcyUzQSUyRiUyRiU3Qm9zLmdldGVudignRU5EUE9JTlRfTkFNRScpJTdELiU3Qm9zLmdldGVudignTE9DQVRJT04nKSU3RC5pbmZlcmVuY2UubWwuYXp1cmUuY29tJTJGdjElMjI=",highlighted:'api_url = <span class="hljs-string">f&quot;https://<span class="hljs-subst">{os.getenv(<span class="hljs-string">&#x27;ENDPOINT_NAME&#x27;</span>)}</span>.<span class="hljs-subst">{os.getenv(<span class="hljs-string">&#x27;LOCATION&#x27;</span>)}</span>.inference.ml.azure.com/v1&quot;</span>',wrap:!1}}),{c(){a=r("p"),a.textContent=f,i=s(),c(g.$$.fragment),j=s(),A=r("p"),A.textContent=C},l($){a=u($,"P",{"data-svelte-h":!0}),p(a)!=="svelte-ufs9gh"&&(a.textContent=f),i=o($),m(g.$$.fragment,$),j=o($),A=u($,"P",{"data-svelte-h":!0}),p(A)!=="svelte-vz1qru"&&(A.textContent=C)},m($,I){l($,a,I),l($,i,I),d(g,$,I),l($,j,I),l($,A,I),G=!0},p:b,i($){G||(M(g.$$.fragment,$),G=!0)},o($){h(g.$$.fragment,$),G=!1},d($){$&&(n(a),n(i),n(j),n(A)),y(g,$)}}}function to(w){let a,f='Ideally you could deploy the Gradio Chat Interface connected to your Azure ML Managed Online Endpoint as an Azure Container App as described in <a href="https://learn.microsoft.com/en-us/azure/container-apps/tutorial-deploy-from-code?tabs=python" rel="nofollow">Tutorial: Build and deploy from source code to Azure Container Apps</a>. If you’d like us to show you how to do it for Gradio in particular, feel free to <a href="https://github.com/huggingface/Microsoft-Azure/issues/new" rel="nofollow">open an issue requesting it</a>.';return{c(){a=r("p"),a.innerHTML=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-ncfpy5"&&(a.innerHTML=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function no(w){let a,f='📍 Find the complete example on GitHub <a href="https://github.com/huggingface/Microsoft-Azure/tree/main/examples/azure-ai/deploy-large-language-models/azure-notebook.ipynb" rel="nofollow">here</a>!';return{c(){a=r("p"),a.innerHTML=f},l(i){a=u(i,"P",{"data-svelte-h":!0}),p(a)!=="svelte-1dt9wzm"&&(a.innerHTML=f)},m(i,g){l(i,a,g)},p:b,d(i){i&&n(a)}}}function lo(w){let a,f,i,g,j,A,C,G="This example showcases how to deploy a Large Language Model (LLM) from the Hugging Face Collection in Azure AI Foundry Hub as an Azure ML Managed Online Endpoint, powered by Hugging Face’s Text Generation Inference (TGI). Additionally, this example also showcases how to run inference with both the Azure ML Python SDK, the OpenAI Python SDK, and even how to locally run a Gradio application for chat completion.",$,I,xt,N,pa="TL;DR Text Generation Inference (TGI) is a solution developed by Hugging Face for deploying and serving LLMs and VLMs with high performance text generation. Azure AI Foundry provides a unified platform for enterprise AI operations, model builders, and application development. Azure Machine Learning is a cloud service for accelerating and managing the machine learning (ML) project lifecycle.",Gt,Nt,St,S,ca='This example will specifically deploy <a href="https://huggingface.co/Qwen/Qwen2.5-32B-Instruct" rel="nofollow"><code>Qwen/Qwen2.5-32B-Instruct</code></a> from the Hugging Face Hub (or see it on <a href="https://ml.azure.com/models/qwen-qwen2.5-32b-instruct/version/1/catalog/registry/HuggingFace" rel="nofollow">AzureML</a> or on <a href="https://ai.azure.com/explore/models/qwen-qwen2.5-32b-instruct/version/1/registry/HuggingFace" rel="nofollow">Azure AI Foundry</a>) as an Azure ML Managed Online Endpoint on Azure AI Foundry Hub.',Xt,X,ma="Qwen2.5 is one of the latest series of Qwen large language models, bringing the following improvements upon Qwen2 such as:",Pt,P,da="<li>Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.</li> <li>Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.</li> <li>Long-context Support up to 128K tokens and can generate up to 8K tokens.</li> <li>Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.</li>",Yt,Y,Ma='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/microsoft-azure/azure-ai/deploy-large-language-models/qwen2.5-hub.png" alt="Qwen2.5 32B Instruct on the Hugging Face Hub"/>',Dt,D,ha='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/microsoft-azure/azure-ai/deploy-large-language-models/qwen2.5-azure-ml.png" alt="Qwen2.5 32B Instruct on Azure ML"/>',Ot,O,ya='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/microsoft-azure/azure-ai/deploy-large-language-models/qwen2.5-azure-ai-foundry.png" alt="Qwen2.5 32B Instruct on Azure AI Foundry"/>',Kt,K,fa='For more information, make sure to check <a href="https://huggingface.co/Qwen/Qwen2.5-32B-Instruct/blob/main/README.md" rel="nofollow">their model card on the Hugging Face Hub</a>.',en,z,tn,ee,nn,te,ga='To run the following example, you will need to comply with the following pre-requisites, alternatively, you can also read more about those in the <a href="https://learn.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources?view=azureml-api-2" rel="nofollow">Azure Machine Learning Tutorial: Create resources you need to get started</a>.',ln,ne,an,le,wa='A Microsoft Azure account with an active subscription. If you don’t have a Microsoft Azure account, you can now <a href="https://azure.microsoft.com/en-us/pricing/purchase-options/azure-account" rel="nofollow">create one for free</a>, including 200 USD worth of credits to use within the next 30 days after the account creation.',sn,ae,on,se,Ta='The Azure CLI (<code>az</code>) installed on the instance that you’re running this example on, see <a href="https://learn.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest" rel="nofollow">the installation steps</a>, and follow the steps of the preferred method based on your instance. Then log in into your subscription as follows:',rn,oe,un,ie,$a='More information at <a href="https://learn.microsoft.com/en-us/cli/azure/authenticate-azure-cli?view=azure-cli-latest" rel="nofollow">Sign in with Azure CLI - Login and Authentication</a>.',pn,re,cn,ue,Ua="Besides the Azure CLI (<code>az</code>), you also need to install the Azure ML CLI extension (<code>az ml</code>) which will be used to create the Azure ML and Azure AI Foundry required resources.",mn,pe,Ja="First you will need to list the current extensions and remove any <code>ml</code>-related extension before installing the latest one i.e., v2.",dn,ce,Mn,me,ba="Then you can install the <code>az ml</code> v2 extension as follows:",hn,de,yn,Me,Ia='More information at <a href="https://learn.microsoft.com/en-us/azure/machine-learning/how-to-configure-cli?view=azureml-api-2&amp;tabs=public" rel="nofollow">Azure Machine Learning (ML) - Install and setup the CLI (v2)</a>.',fn,he,gn,ye,ja="An Azure Resource Group under the one you will create the Azure AI Foundry Hub-based project (note it will create an Azure AI Foundry resource as an Azure L Workspace, but not the other way around, meaning that the Azure AI Foundry Hub will be listed as an Azure ML workspace, but leveraging the Azure AI Foundry capabilities for Gen AI), and the rest of the required resources. If you don’t have one, you can create it as follow:",wn,fe,Tn,ge,Aa="Then, you can ensure that the resource group was created successfully by e.g. listing all the available resource groups that you have access to on your subscription:",$n,we,Un,Te,Ca='More information at <a href="https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/manage-resource-groups-cli" rel="nofollow">Manage Azure resource groups by using Azure CLI</a>.',Jn,v,bn,$e,In,Ue,za="An Azure AI Foundry Hub under the subscription and resource group aforementioned. If you don’t have one, you can create it as follows:",jn,Je,An,L,Cn,be,va="Then, you can ensure that the workspace was created successfully by e.g. listing all the available workspaces that you have access to on your subscription:",zn,Ie,vn,k,Ln,je,La="Once the Azure AI Foundry Hub is created, you need to create an Azure AI Foundry Project linked to that Hub, to do so you first need to obtain the Azure AI Foundry Hub ID of the recently created Hub as follows (replace the resource names with yours):",kn,Ae,En,Ce,ka="That command will provide the ID as follows <code>/subscriptions/&lt;SUBSCRIPTION_ID&gt;/resourceGroups/&lt;RESOURCE_GROUP&gt;/providers/Microsoft.MachineLearningServices/workspaces/huggingface-azure-hub</code>, meaning that you can also format it manually yourself with the appropriate replacements. Then you need to run the following command to create the Azure AI Foundry Project for that Hub as:",_n,ze,Zn,ve,Ea="Finally, you can verify that it was correctly created with the following command:",Rn,Le,Hn,ke,_a='More information at <a href="https://learn.microsoft.com/en-us/azure/ai-foundry/how-to/create-azure-ai-resource?tabs=portal" rel="nofollow">How to create and manage an Azure AI Foundry Hub</a> and at <a href="https://learn.microsoft.com/en-us/azure/ai-foundry/how-to/develop/create-hub-project-sdk?tabs=azurecli" rel="nofollow">How to create a Hub using the Azure CLI</a>.',Wn,E,Bn,Ee,qn,_e,Za='In this example, the <a href="https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ml/azure-ai-ml" rel="nofollow">Azure Machine Learning SDK for Python</a> will be used to create the endpoint and the deployment, as well as to invoke the deployed API. Along with it, you will also need to install <code>azure-identity</code> to authenticate with your Azure credentials via Python.',Qn,Ze,Vn,Re,Ra='More information at <a href="https://learn.microsoft.com/en-us/python/api/overview/azure/ai-ml-readme?view=azure-python" rel="nofollow">Azure Machine Learning SDK for Python</a>.',Fn,He,Ha="Then, for convenience setting the following environment variables is recommended as those will be used along the example for the Azure ML Client, so make sure to update and set those values accordingly as per your Microsoft Azure account and resources.",xn,We,Gn,Be,Wa="Finally, you also need to define both the endpoint and deployment names, as those will be used throughout the example too:",Nn,_,Sn,qe,Xn,Qe,Pn,Ve,Yn,Fe,Ba="Initially, you need to authenticate into the Azure AI Foundry Hub via Azure ML with the Azure ML Python SDK, which will be later used to deploy <code>Qwen/Qwen2.5-32B-Instruct</code> as an Azure ML Managed Online Endpoint in your Azure AI Foundry Hub.",Dn,Z,On,xe,Kn,Ge,el,Ne,qa="Before creating the Managed Online Endpoint, you need to build the model URI, which is formatted as it follows <code>azureml://registries/&lt;REGISTRY_NAME&gt;/models/&lt;MODEL_ID&gt;/labels/latest</code> (even if the URI contains <code>azureml</code> it’s the same as in Azure AI Foundry, since the model catalog is shared), that means that the <code>REGISTRY_NAME</code> should be set to “HuggingFace” as you intend to deploy a model from the Hugging Face Collection, and the <code>MODEL_ID</code> won’t be the Hugging Face Hub ID, but rather the ID with hyphen replacements for both backslash (/) and underscores (_) with hyphens (-), and then into lower case, as follows:",tl,Se,nl,Xe,Qa='Note that you will need to verify in advance that the URI is valid, and that the given Hugging Face Hub Model ID exists on Azure, since Hugging Face is publishing those models into their collection, meaning that some models may be available on the Hugging Face Hub but not yet on the Azure Model Catalog (you can request adding a model following the guide <a href="https://huggingface.co/docs/microsoft-azure/guides/request-model-addition" rel="nofollow">Request a model addition</a>).',ll,Pe,Va="Alternatively, you can use the following snippet to verify if a model is available on the Azure Model Catalog programmatically:",al,Ye,sl,De,Fa='Then you can create the Managed Online Endpoint specifying its name (note that the name must be unique per entire region, not only within a single subscription, resource group, workspace, etc., so it’s a nice practice to add some sort of unique name to it in case multi-region deployments are intended) via the <a href="https://learn.microsoft.com/en-us/python/api/azure-ai-ml/azure.ai.ml.entities.managedonlineendpoint?view=azure-python" rel="nofollow">ManagedOnlineEndpoint Python class</a>.',ol,Oe,xa='Also note that by default the <code>ManagedOnlineEndpoint</code> will use the <code>key</code> authentication method, meaning that there will be a primary and secondary key that should be sent within the Authentication headers as a Bearer token; but also the <code>aml_token</code> authentication method can be used, read more about it at <a href="https://learn.microsoft.com/en-us/azure/machine-learning/how-to-authenticate-online-endpoint" rel="nofollow">Authenticate clients for online endpoints</a>.',il,Ke,Ga='The deployment, created via the <a href="https://learn.microsoft.com/en-us/python/api/azure-ai-ml/azure.ai.ml.entities.managedonlinedeployment?view=azure-python" rel="nofollow">ManagedOnlineDeployment Python class</a>, will define the actual model deployment that will be exposed via the previously created endpoint. The <code>ManagedOnlineDeployment</code> will expect: the <code>model</code> i.e., the previously created URI <code>azureml://registries/HuggingFace/models/qwen-qwen2.5-32b-instruct/labels/latest</code>, the <code>endpoint_name</code>, and the instance requirements being the <code>instance_type</code> and the <code>instance_count</code>.',rl,et,Na='Every model in the Hugging Face Collection is powered by an efficient inference backend, and each of those can run on a wide variety of instance types (as listed in <a href="https://huggingface.co/docs/microsoft-azure/azure-ai/supported-hardware" rel="nofollow">Supported Hardware</a>); in this case, a NVIDIA H100 GPU will be used i.e., <code>Standard_NC40ads_H100_v5</code>.',ul,R,pl,tt,cl,nt,ml,lt,Sa='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/microsoft-azure/azure-ai/deploy-large-language-models/azure-ml-endpoint.png" alt="Azure AI Endpoint from Azure ML Studio"/>',dl,at,Xa='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/microsoft-azure/azure-ai/deploy-large-language-models/azure-ai-endpoint.png" alt="Azure AI Endpoint from Azure AI Foundry"/>',Ml,H,hl,st,yl,ot,Pa='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/microsoft-azure/azure-ai/deploy-large-language-models/azure-ml-deployment.png" alt="Azure AI Deployment from Azure ML Studio"/>',fl,it,Ya='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/microsoft-azure/azure-ai/deploy-large-language-models/azure-ai-deployment.png" alt="Azure AI Deployment from Azure AI Foundry"/>',gl,W,wl,rt,Da='Once deployed, via either the Azure AI Foundry or the Azure ML Studio you’ll be able to inspect the endpoint details, the real-time logs, how to consume the endpoint, and even use the, still on preview, <a href="https://learn.microsoft.com/en-us/azure/machine-learning/concept-model-monitoring?view=azureml-api-2" rel="nofollow">monitoring feature</a>.',Tl,ut,Oa='Find more information about it at <a href="https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints-online?view=azureml-api-2#managed-online-endpoints" rel="nofollow">Azure ML Managed Online Endpoints</a>',$l,pt,Ul,ct,Ka="Finally, now that the Azure AI Endpoint is deployed, you can send requests to it. In this case, since the task of the model is <code>text-generation</code> (also known as <code>chat-completion</code>) you can either use the default scoring endpoint, being <code>/generate</code> which is the standard text generation endpoint without chat capabilities (as leveraging the chat template or having an OpenAI-compatible OpenAPI interface), or alternatively just benefit from the fact that Text Generation Inference (TGI) i.e., the inference engine in which the model is running on top, exposes OpenAI-compatible routes as <code>/v1/chat/completions</code>.",Jl,B,bl,q,Il,mt,jl,dt,es='You can invoke the Azure AI Endpoint on the scoring route, in this case <code>/generate</code> (more information about it in the <code>Qwen/Qwen2.5-32B-Instruct</code> page in either <a href="https://ml.azure.com/models/qwen-qwen2.5-32b-instruct/version/1/catalog/registry/HuggingFace" rel="nofollow">AzureML</a> or <a href="https://ai.azure.com/explore/models/qwen-qwen2.5-32b-instruct/version/1/registry/HuggingFace" rel="nofollow">Azure AI Foundry</a> catalogs), via the Azure Python SDK with the previously instantiated <code>azure.ai.ml.MLClient</code> (or instantiate a new one if working from a different session).',Al,Mt,Cl,Q,zl,ht,vl,yt,ts="Since Text Generation Inference (TGI) also exposes OpenAI-compatible routes, you can also leverage the OpenAI Python SDK to send requests to the deployed Azure AI Endpoint.",Ll,ft,kl,gt,ns="To use the OpenAI Python SDK with Azure ML Managed Online Endpoints, you need to first retrieve:",El,wt,ls="<li><code>api_url</code> with the <code>/v1</code> route (that contains the <code>v1/chat/completions</code> endpoint that the OpenAI Python SDK will send requests to)</li> <li><code>api_key</code> which is the API Key in Azure AI or the primary key in Azure ML (unless a dedicated Azure ML Token is used instead)</li>",_l,Tt,Zl,V,Rl,$t,as="Then you can use the OpenAI Python SDK normally, making sure to include the extra header <code>azureml-model-deployment</code> header that contains the Azure AI / ML Deployment name.",Hl,Ut,ss="Via the OpenAI Python SDK it can either be set within each call to <code>chat.completions.create</code> via the <code>extra_headers</code> parameter as commented below, or via the <code>default_headers</code> parameter when instantiating the <code>OpenAI</code> client (which is the recommended approach since the header needs to be present on each request, so setting it just once is preferred).",Wl,Jt,Bl,bt,ql,It,os="Alternatively, you can also just use <code>cURL</code> to send requests to the deployed endpoint, with the <code>api_url</code> and <code>api_key</code> values programmatically retrieved in the OpenAI snippet and now set as environment variables so that <code>cURL</code> can use those, as it follows:",Ql,jt,Vl,At,Fl,Ct,is="You can also just go to the Azure AI Endpoint in either the Azure AI Foundry under “My assets -&gt; Models + endpoints” or in the Azure ML Studio via “Endpoints”, and retrieve both the URL (note that it will default to the <code>/generate</code> endpoint, but to use the OpenAI-compatible layer you need to use the <code>/v1/chat/completions</code> endpoint instead) and the API Key values, as well as the Azure AI Deployment name for the given model, and then send the request as follows after replacing the values from Azure ML:",xl,zt,Gl,vt,Nl,Lt,rs='<a href="https://www.gradio.app/" rel="nofollow">Gradio</a> is the fastest way to demo your machine learning model with a friendly web interface so that anyone can use it. You can also leverage the OpenAI Python SDK to build a simple <code>ChatInterface</code> that you can use within the Jupyter Notebook cell where you are running it.',Sl,F,Xl,kt,Pl,Et,us='See below an example on how to leverage Gradio’s <code>ChatInterface</code>, or find more information about it at <a href="https://www.gradio.app/docs/gradio/chatinterface" rel="nofollow">Gradio ChatInterface Docs</a>.',Yl,_t,Dl,Zt,ps='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/microsoft-azure/azure-ai/deploy-large-language-models/azure-ml-gradio.png" alt="Gradio Chat Interface with Azure AI Endpoint"/>',Ol,Rt,Kl,Ht,cs="Once you are done using the Azure AI Endpoint / Deployment, you can delete the resources as it follows, meaning that you will stop paying for the instance on which the model is running and all the attached costs will be stopped.",ea,Wt,ta,Bt,na,qt,ms="Throughout this example you learnt how to create and configure your Azure account for Azure ML and Azure AI Foundry, how to then create a Managed Online Endpoint running an open model from the Hugging Face Collection in the Azure ML / Azure AI Foundry model catalog, how to send inference requests to it afterwards with different alternatives, how to build a simple Gradio chat interface around it, and finally, how to stop and release the resources.",la,Qt,ds='If you have any doubt, issue or question about this example, feel free to <a href="https://github.com/huggingface/Microsoft-Azure/issues/new" rel="nofollow">open an issue</a> and we’ll do our best to help!',aa,sa,oa,x,ia,Vt,ra,Ft,ua;return j=new J({props:{title:"Deploy Large Language Models (LLMs) on Azure AI",local:"deploy-large-language-models-llms-on-azure-ai",headingTag:"h1"}}),I=new U({props:{$$slots:{default:[qs]},$$scope:{ctx:w}}}),z=new U({props:{$$slots:{default:[Qs]},$$scope:{ctx:w}}}),ee=new J({props:{title:"Pre-requisites",local:"pre-requisites",headingTag:"h2"}}),ne=new J({props:{title:"Azure Account",local:"azure-account",headingTag:"h3"}}),ae=new J({props:{title:"Azure CLI",local:"azure-cli",headingTag:"h3"}}),oe=new T({props:{code:"YXolMjBsb2dpbg==",highlighted:"az login",wrap:!1}}),re=new J({props:{title:"Azure CLI extension for Azure ML",local:"azure-cli-extension-for-azure-ml",headingTag:"h3"}}),ce=new T({props:{code:"YXolMjBleHRlbnNpb24lMjBsaXN0JTBBYXolMjBleHRlbnNpb24lMjByZW1vdmUlMjAtLW5hbWUlMjBhenVyZS1jbGktbWwlMEFheiUyMGV4dGVuc2lvbiUyMHJlbW92ZSUyMC0tbmFtZSUyMG1s",highlighted:`az extension list
az extension remove --name azure-cli-ml
az extension remove --name ml`,wrap:!1}}),de=new T({props:{code:"YXolMjBleHRlbnNpb24lMjBhZGQlMjAtLW5hbWUlMjBtbA==",highlighted:"az extension add --name ml",wrap:!1}}),he=new J({props:{title:"Azure Resource Group",local:"azure-resource-group",headingTag:"h3"}}),fe=new T({props:{code:"YXolMjBncm91cCUyMGNyZWF0ZSUyMC0tbmFtZSUyMGh1Z2dpbmdmYWNlLWF6dXJlLXJnJTIwLS1sb2NhdGlvbiUyMGVhc3R1cw==",highlighted:"az group create --name huggingface-azure-rg --location eastus",wrap:!1}}),we=new T({props:{code:"YXolMjBncm91cCUyMGxpc3QlMjAtLW91dHB1dCUyMHRhYmxl",highlighted:"az group list --output table",wrap:!1}}),v=new U({props:{$$slots:{default:[Vs]},$$scope:{ctx:w}}}),$e=new J({props:{title:"Azure AI Foundry Hub-based project",local:"azure-ai-foundry-hub-based-project",headingTag:"h3"}}),Je=new T({props:{code:"YXolMjBtbCUyMHdvcmtzcGFjZSUyMGNyZWF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0ta2luZCUyMGh1YiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbmFtZSUyMGh1Z2dpbmdmYWNlLWF6dXJlLWh1YiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tcmVzb3VyY2UtZ3JvdXAlMjBodWdnaW5nZmFjZS1henVyZS1yZyUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbG9jYXRpb24lMjBlYXN0dXM=",highlighted:`az ml workspace create \\
--kind hub \\
--name huggingface-azure-hub \\
--resource-group huggingface-azure-rg \\
--location eastus`,wrap:!1}}),L=new U({props:{$$slots:{default:[Fs]},$$scope:{ctx:w}}}),Ie=new T({props:{code:"YXolMjBtbCUyMHdvcmtzcGFjZSUyMGxpc3QlMjAtLWZpbHRlcmVkLWtpbmRzJTIwaHViJTIwLS1xdWVyeSUyMCUyMiU1QiU1RC4lN0JOYW1lJTNBbmFtZSUyQyUyMEtpbmQlM0FraW5kJTdEJTIyJTIwLS1yZXNvdXJjZS1ncm91cCUyMGh1Z2dpbmdmYWNlLWF6dXJlLXJnJTIwLS1vdXRwdXQlMjB0YWJsZQ==",highlighted:'az ml workspace list --filtered-kinds hub --query <span class="hljs-string">&quot;[].{Name:name, Kind:kind}&quot;</span> --resource-group huggingface-azure-rg --output table',wrap:!1}}),k=new U({props:{warning:!0,$$slots:{default:[xs]},$$scope:{ctx:w}}}),Ae=new T({props:{code:"YXolMjBtbCUyMHdvcmtzcGFjZSUyMHNob3clMjAlNUMlMEElMjAlMjAlMjAlMjAtLW5hbWUlMjBodWdnaW5nZmFjZS1henVyZS1odWIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLXJlc291cmNlLWdyb3VwJTIwaHVnZ2luZ2ZhY2UtYXp1cmUtcmclMjAlNUMlMEElMjAlMjAlMjAlMjAtLXF1ZXJ5JTIwJTIyaWQlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAtbyUyMHRzdg==",highlighted:`az ml workspace show \\
--name huggingface-azure-hub \\
--resource-group huggingface-azure-rg \\
--query <span class="hljs-string">&quot;id&quot;</span> \\
-o tsv`,wrap:!1}}),ze=new T({props:{code:"YXolMjBtbCUyMHdvcmtzcGFjZSUyMGNyZWF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0ta2luZCUyMHByb2plY3QlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWh1Yi1pZCUyMCUyNChheiUyMG1sJTIwd29ya3NwYWNlJTIwc2hvdyUyMC0tbmFtZSUyMGh1Z2dpbmdmYWNlLWF6dXJlLWh1YiUyMC0tcmVzb3VyY2UtZ3JvdXAlMjBodWdnaW5nZmFjZS1henVyZS1yZyUyMC0tcXVlcnklMjAlMjJpZCUyMiUyMC1vJTIwdHN2KSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbmFtZSUyMGh1Z2dpbmdmYWNlLWF6dXJlLXByb2plY3QlMjAlNUMlMEElMjAlMjAlMjAlMjAtLXJlc291cmNlLWdyb3VwJTIwaHVnZ2luZ2ZhY2UtYXp1cmUtcmclMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvY2F0aW9uJTIwZWFzdHVz",highlighted:`az ml workspace create \\
--kind project \\
--hub-id $(az ml workspace show --name huggingface-azure-hub --resource-group huggingface-azure-rg --query <span class="hljs-string">&quot;id&quot;</span> -o tsv) \\
--name huggingface-azure-project \\
--resource-group huggingface-azure-rg \\
--location eastus`,wrap:!1}}),Le=new T({props:{code:"YXolMjBtbCUyMHdvcmtzcGFjZSUyMGxpc3QlMjAtLWZpbHRlcmVkLWtpbmRzJTIwcHJvamVjdCUyMC0tcXVlcnklMjAlMjIlNUIlNUQuJTdCTmFtZSUzQW5hbWUlMkMlMjBLaW5kJTNBa2luZCU3RCUyMiUyMC0tcmVzb3VyY2UtZ3JvdXAlMjBodWdnaW5nZmFjZS1henVyZS1yZyUyMC0tb3V0cHV0JTIwdGFibGU=",highlighted:'az ml workspace list --filtered-kinds project --query <span class="hljs-string">&quot;[].{Name:name, Kind:kind}&quot;</span> --resource-group huggingface-azure-rg --output table',wrap:!1}}),E=new U({props:{$$slots:{default:[Gs]},$$scope:{ctx:w}}}),Ee=new J({props:{title:"Setup and installation",local:"setup-and-installation",headingTag:"h2"}}),Ze=new T({props:{code:"JTI1cGlwJTIwaW5zdGFsbCUyMGF6dXJlLWFpLW1sJTIwYXp1cmUtaWRlbnRpdHklMjAtLXVwZ3JhZGUlMjAtLXF1aWV0",highlighted:"%pip install azure-ai-ml azure-identity --upgrade --quiet",wrap:!1}}),We=new T({props:{code:"JTI1ZW52JTIwTE9DQVRJT04lMjBlYXN0dXMlMEElMjVlbnYlMjBTVUJTQ1JJUFRJT05fSUQlMjAlM0NZT1VSX1NVQlNDUklQVElPTl9JRCUzRSUwQSUyNWVudiUyMFJFU09VUkNFX0dST1VQJTIwJTNDWU9VUl9SRVNPVVJDRV9HUk9VUCUzRSUwQSUyNWVudiUyMEFJX0ZPVU5EUllfSFVCX1BST0pFQ1QlMjAlM0NZT1VSX0FJX0ZPVU5EUllfSFVCX1BST0pFQ1QlM0U=",highlighted:`%env LOCATION eastus
%env SUBSCRIPTION_ID &lt;YOUR_SUBSCRIPTION_ID&gt;
%env RESOURCE_GROUP &lt;YOUR_RESOURCE_GROUP&gt;
%env AI_FOUNDRY_HUB_PROJECT &lt;YOUR_AI_FOUNDRY_HUB_PROJECT&gt;`,wrap:!1}}),_=new U({props:{$$slots:{default:[Ns]},$$scope:{ctx:w}}}),qe=new T({props:{code:"aW1wb3J0JTIwb3MlMEFmcm9tJTIwdXVpZCUyMGltcG9ydCUyMHV1aWQ0JTBBJTBBb3MuZW52aXJvbiU1QiUyMkVORFBPSU5UX05BTUUlMjIlNUQlMjAlM0QlMjBmJTIycXdlbi1lbmRwb2ludC0lN0JzdHIodXVpZDQoKSklNUIlM0E4JTVEJTdEJTIyJTBBb3MuZW52aXJvbiU1QiUyMkRFUExPWU1FTlRfTkFNRSUyMiU1RCUyMCUzRCUyMGYlMjJxd2VuLWRlcGxveW1lbnQtJTdCc3RyKHV1aWQ0KCkpJTVCJTNBOCU1RCU3RCUyMg==",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> uuid <span class="hljs-keyword">import</span> uuid4
os.environ[<span class="hljs-string">&quot;ENDPOINT_NAME&quot;</span>] = <span class="hljs-string">f&quot;qwen-endpoint-<span class="hljs-subst">{<span class="hljs-built_in">str</span>(uuid4())[:<span class="hljs-number">8</span>]}</span>&quot;</span>
os.environ[<span class="hljs-string">&quot;DEPLOYMENT_NAME&quot;</span>] = <span class="hljs-string">f&quot;qwen-deployment-<span class="hljs-subst">{<span class="hljs-built_in">str</span>(uuid4())[:<span class="hljs-number">8</span>]}</span>&quot;</span>`,wrap:!1}}),Qe=new T({props:{code:"IWVjaG8lMjAlMjRFTkRQT0lOVF9OQU1FJTBBIWVjaG8lMjAlMjRERVBMT1lNRU5UX05BTUU=",highlighted:`!echo $ENDPOINT_NAME
!echo $DEPLOYMENT_NAME`,wrap:!1}}),Ve=new J({props:{title:"Authenticate to Azure ML",local:"authenticate-to-azure-ml",headingTag:"h2"}}),Z=new U({props:{$$slots:{default:[Ss]},$$scope:{ctx:w}}}),xe=new T({props:{code:"aW1wb3J0JTIwb3MlMEFmcm9tJTIwYXp1cmUuYWkubWwlMjBpbXBvcnQlMjBNTENsaWVudCUwQWZyb20lMjBhenVyZS5pZGVudGl0eSUyMGltcG9ydCUyMERlZmF1bHRBenVyZUNyZWRlbnRpYWwlMEElMEFjbGllbnQlMjAlM0QlMjBNTENsaWVudCglMEElMjAlMjAlMjAlMjBjcmVkZW50aWFsJTNERGVmYXVsdEF6dXJlQ3JlZGVudGlhbCgpJTJDJTBBJTIwJTIwJTIwJTIwc3Vic2NyaXB0aW9uX2lkJTNEb3MuZ2V0ZW52KCUyMlNVQlNDUklQVElPTl9JRCUyMiklMkMlMEElMjAlMjAlMjAlMjByZXNvdXJjZV9ncm91cF9uYW1lJTNEb3MuZ2V0ZW52KCUyMlJFU09VUkNFX0dST1VQJTIyKSUyQyUwQSUyMCUyMCUyMCUyMHdvcmtzcGFjZV9uYW1lJTNEb3MuZ2V0ZW52KCUyMkFJX0ZPVU5EUllfSFVCX1BST0pFQ1QlMjIpJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> azure.ai.ml <span class="hljs-keyword">import</span> MLClient
<span class="hljs-keyword">from</span> azure.identity <span class="hljs-keyword">import</span> DefaultAzureCredential
client = MLClient(
credential=DefaultAzureCredential(),
subscription_id=os.getenv(<span class="hljs-string">&quot;SUBSCRIPTION_ID&quot;</span>),
resource_group_name=os.getenv(<span class="hljs-string">&quot;RESOURCE_GROUP&quot;</span>),
workspace_name=os.getenv(<span class="hljs-string">&quot;AI_FOUNDRY_HUB_PROJECT&quot;</span>),
)`,wrap:!1}}),Ge=new J({props:{title:"Create and Deploy Azure AI Endpoint",local:"create-and-deploy-azure-ai-endpoint",headingTag:"h2"}}),Se=new T({props:{code:"bW9kZWxfaWQlMjAlM0QlMjAlMjJRd2VuJTJGUXdlbjIuNS0zMkItSW5zdHJ1Y3QlMjIlMEElMEFtb2RlbF91cmklMjAlM0QlMjAoJTBBJTIwJTIwJTIwJTIwZiUyMmF6dXJlbWwlM0ElMkYlMkZyZWdpc3RyaWVzJTJGSHVnZ2luZ0ZhY2UlMkZtb2RlbHMlMkYlN0Jtb2RlbF9pZC5yZXBsYWNlKCclMkYnJTJDJTIwJy0nKS5yZXBsYWNlKCdfJyUyQyUyMCctJykubG93ZXIoKSU3RCUyRmxhYmVscyUyRmxhdGVzdCUyMiUwQSklMEFtb2RlbF91cmk=",highlighted:`model_id = <span class="hljs-string">&quot;Qwen/Qwen2.5-32B-Instruct&quot;</span>
model_uri = (
<span class="hljs-string">f&quot;azureml://registries/HuggingFace/models/<span class="hljs-subst">{model_id.replace(<span class="hljs-string">&#x27;/&#x27;</span>, <span class="hljs-string">&#x27;-&#x27;</span>).replace(<span class="hljs-string">&#x27;_&#x27;</span>, <span class="hljs-string">&#x27;-&#x27;</span>).lower()}</span>/labels/latest&quot;</span>
)
model_uri`,wrap:!1}}),Ye=new T({props:{code:"aW1wb3J0JTIwcmVxdWVzdHMlMEElMEFyZXNwb25zZSUyMCUzRCUyMHJlcXVlc3RzLmdldChmJTIyaHR0cHMlM0ElMkYlMkZnZW5lcmF0ZS1henVyZW1sLXVybHMuYXp1cmV3ZWJzaXRlcy5uZXQlMkZhcGklMkZnZW5lcmF0ZSUzRm1vZGVsSWQlM0QlN0Jtb2RlbF9pZCU3RCUyMiklMEFpZiUyMHJlc3BvbnNlLnN0YXR1c19jb2RlJTIwISUzRCUyMDIwMCUzQSUwQSUyMCUyMCUyMCUyMHByaW50KCUyMiU1QiU3QnJlc3BvbnNlLnN0YXR1c19jb2RlJTNEJTdEJTVEJTIwJTdCbW9kZWxfaWQlM0QlN0QlMjBub3QlMjBhdmFpbGFibGUlMjBvbiUyMHRoZSUyMEh1Z2dpbmclMjBGYWNlJTIwQ29sbGVjdGlvbiUyMGluJTIwQXp1cmUlMjBNb2RlbCUyMENhdGFsb2clMjIp",highlighted:`<span class="hljs-keyword">import</span> requests
response = requests.get(<span class="hljs-string">f&quot;https://generate-azureml-urls.azurewebsites.net/api/generate?modelId=<span class="hljs-subst">{model_id}</span>&quot;</span>)
<span class="hljs-keyword">if</span> response.status_code != <span class="hljs-number">200</span>:
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;[{response.status_code=}] {model_id=} not available on the Hugging Face Collection in Azure Model Catalog&quot;</span>)`,wrap:!1}}),R=new U({props:{warning:!0,$$slots:{default:[Xs]},$$scope:{ctx:w}}}),tt=new T({props:{code:"ZnJvbSUyMGF6dXJlLmFpLm1sLmVudGl0aWVzJTIwaW1wb3J0JTIwTWFuYWdlZE9ubGluZUVuZHBvaW50JTJDJTIwTWFuYWdlZE9ubGluZURlcGxveW1lbnQlMEElMEFlbmRwb2ludCUyMCUzRCUyME1hbmFnZWRPbmxpbmVFbmRwb2ludChuYW1lJTNEb3MuZ2V0ZW52KCUyMkVORFBPSU5UX05BTUUlMjIpKSUwQSUwQWRlcGxveW1lbnQlMjAlM0QlMjBNYW5hZ2VkT25saW5lRGVwbG95bWVudCglMEElMjAlMjAlMjAlMjBuYW1lJTNEb3MuZ2V0ZW52KCUyMkRFUExPWU1FTlRfTkFNRSUyMiklMkMlMEElMjAlMjAlMjAlMjBlbmRwb2ludF9uYW1lJTNEb3MuZ2V0ZW52KCUyMkVORFBPSU5UX05BTUUlMjIpJTJDJTBBJTIwJTIwJTIwJTIwbW9kZWwlM0Rtb2RlbF91cmklMkMlMEElMjAlMjAlMjAlMjBpbnN0YW5jZV90eXBlJTNEJTIyU3RhbmRhcmRfTkM0MGFkc19IMTAwX3Y1JTIyJTJDJTBBJTIwJTIwJTIwJTIwaW5zdGFuY2VfY291bnQlM0QxJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> azure.ai.ml.entities <span class="hljs-keyword">import</span> ManagedOnlineEndpoint, ManagedOnlineDeployment
endpoint = ManagedOnlineEndpoint(name=os.getenv(<span class="hljs-string">&quot;ENDPOINT_NAME&quot;</span>))
deployment = ManagedOnlineDeployment(
name=os.getenv(<span class="hljs-string">&quot;DEPLOYMENT_NAME&quot;</span>),
endpoint_name=os.getenv(<span class="hljs-string">&quot;ENDPOINT_NAME&quot;</span>),
model=model_uri,
instance_type=<span class="hljs-string">&quot;Standard_NC40ads_H100_v5&quot;</span>,
instance_count=<span class="hljs-number">1</span>,
)`,wrap:!1}}),nt=new T({props:{code:"Y2xpZW50LmJlZ2luX2NyZWF0ZV9vcl91cGRhdGUoZW5kcG9pbnQpLndhaXQoKQ==",highlighted:"client.begin_create_or_update(endpoint).wait()",wrap:!1}}),H=new U({props:{$$slots:{default:[Ps]},$$scope:{ctx:w}}}),st=new T({props:{code:"Y2xpZW50Lm9ubGluZV9kZXBsb3ltZW50cy5iZWdpbl9jcmVhdGVfb3JfdXBkYXRlKGRlcGxveW1lbnQpLndhaXQoKQ==",highlighted:"client.online_deployments.begin_create_or_update(deployment).wait()",wrap:!1}}),W=new U({props:{$$slots:{default:[Ys]},$$scope:{ctx:w}}}),pt=new J({props:{title:"Send requests to the Azure AI Endpoint",local:"send-requests-to-the-azure-ai-endpoint",headingTag:"h2"}}),B=new U({props:{$$slots:{default:[Ds]},$$scope:{ctx:w}}}),q=new U({props:{warning:!0,$$slots:{default:[Os]},$$scope:{ctx:w}}}),mt=new J({props:{title:"Azure Python SDK",local:"azure-python-sdk",headingTag:"h3"}}),Mt=new T({props:{code:"aW1wb3J0JTIwanNvbiUwQWltcG9ydCUyMG9zJTBBaW1wb3J0JTIwdGVtcGZpbGUlMEElMEF3aXRoJTIwdGVtcGZpbGUuTmFtZWRUZW1wb3JhcnlGaWxlKG1vZGUlM0QlMjJ3JTJCJTIyJTJDJTIwZGVsZXRlJTNEVHJ1ZSUyQyUyMHN1ZmZpeCUzRCUyMi5qc29uJTIyKSUyMGFzJTIwdG1wJTNBJTBBJTIwJTIwJTIwJTIwanNvbi5kdW1wKCU3QiUyMmlucHV0cyUyMiUzQSUyMCUyMldoYXQlMjBpcyUyMERlZXAlMjBMZWFybmluZyUzRiUyMiUyQyUyMCUyMnBhcmFtZXRlcnMlMjIlM0ElMjAlN0IlMjJtYXhfbmV3X3Rva2VucyUyMiUzQSUyMDEyOCU3RCU3RCUyQyUyMHRtcCklMEElMjAlMjAlMjAlMjB0bXAuZmx1c2goKSUwQSUwQSUyMCUyMCUyMCUyMHJlc3BvbnNlJTIwJTNEJTIwY2xpZW50Lm9ubGluZV9lbmRwb2ludHMuaW52b2tlKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGVuZHBvaW50X25hbWUlM0Rvcy5nZXRlbnYoJTIyRU5EUE9JTlRfTkFNRSUyMiklMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBkZXBsb3ltZW50X25hbWUlM0Rvcy5nZXRlbnYoJTIyREVQTE9ZTUVOVF9OQU1FJTIyKSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHJlcXVlc3RfZmlsZSUzRHRtcC5uYW1lJTJDJTBBJTIwJTIwJTIwJTIwKSUwQSUwQXByaW50KGpzb24ubG9hZHMocmVzcG9uc2UpKQ==",highlighted:`<span class="hljs-keyword">import</span> json
<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">import</span> tempfile
<span class="hljs-keyword">with</span> tempfile.NamedTemporaryFile(mode=<span class="hljs-string">&quot;w+&quot;</span>, delete=<span class="hljs-literal">True</span>, suffix=<span class="hljs-string">&quot;.json&quot;</span>) <span class="hljs-keyword">as</span> tmp:
json.dump({<span class="hljs-string">&quot;inputs&quot;</span>: <span class="hljs-string">&quot;What is Deep Learning?&quot;</span>, <span class="hljs-string">&quot;parameters&quot;</span>: {<span class="hljs-string">&quot;max_new_tokens&quot;</span>: <span class="hljs-number">128</span>}}, tmp)
tmp.flush()
response = client.online_endpoints.invoke(
endpoint_name=os.getenv(<span class="hljs-string">&quot;ENDPOINT_NAME&quot;</span>),
deployment_name=os.getenv(<span class="hljs-string">&quot;DEPLOYMENT_NAME&quot;</span>),
request_file=tmp.name,
)
<span class="hljs-built_in">print</span>(json.loads(response))`,wrap:!1}}),Q=new U({props:{$$slots:{default:[Ks]},$$scope:{ctx:w}}}),ht=new J({props:{title:"OpenAI Python SDK",local:"openai-python-sdk",headingTag:"h3"}}),ft=new T({props:{code:"JTI1cGlwJTIwaW5zdGFsbCUyMG9wZW5haSUyMC0tdXBncmFkZSUyMC0tcXVpZXQ=",highlighted:"%pip install openai --upgrade --quiet",wrap:!1}}),Tt=new T({props:{code:"YXBpX2tleSUyMCUzRCUyMGNsaWVudC5vbmxpbmVfZW5kcG9pbnRzLmdldF9rZXlzKG9zLmdldGVudiglMjJFTkRQT0lOVF9OQU1FJTIyKSkucHJpbWFyeV9rZXklMEFhcGlfdXJsJTIwJTNEJTIwY2xpZW50Lm9ubGluZV9lbmRwb2ludHMuZ2V0KG9zLmdldGVudiglMjJFTkRQT0lOVF9OQU1FJTIyKSkuc2NvcmluZ191cmkucmVwbGFjZSglMjIlMkZnZW5lcmF0ZSUyMiUyQyUyMCUyMiUyRnYxJTIyKQ==",highlighted:`api_key = client.online_endpoints.get_keys(os.getenv(<span class="hljs-string">&quot;ENDPOINT_NAME&quot;</span>)).primary_key
api_url = client.online_endpoints.get(os.getenv(<span class="hljs-string">&quot;ENDPOINT_NAME&quot;</span>)).scoring_uri.replace(<span class="hljs-string">&quot;/generate&quot;</span>, <span class="hljs-string">&quot;/v1&quot;</span>)`,wrap:!1}}),V=new U({props:{$$slots:{default:[eo]},$$scope:{ctx:w}}}),Jt=new T({props:{code:"aW1wb3J0JTIwb3MlMEFmcm9tJTIwb3BlbmFpJTIwaW1wb3J0JTIwT3BlbkFJJTBBJTBBb3BlbmFpX2NsaWVudCUyMCUzRCUyME9wZW5BSSglMEElMjAlMjAlMjAlMjBiYXNlX3VybCUzRGFwaV91cmwlMkMlMEElMjAlMjAlMjAlMjBhcGlfa2V5JTNEYXBpX2tleSUyQyUwQSUyMCUyMCUyMCUyMGRlZmF1bHRfaGVhZGVycyUzRCU3QiUyMmF6dXJlbWwtbW9kZWwtZGVwbG95bWVudCUyMiUzQSUyMG9zLmdldGVudiglMjJERVBMT1lNRU5UX05BTUUlMjIpJTdEJTJDJTBBKSUwQSUwQWNvbXBsZXRpb24lMjAlM0QlMjBvcGVuYWlfY2xpZW50LmNoYXQuY29tcGxldGlvbnMuY3JlYXRlKCUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEJTIyUXdlbiUyRlF3ZW4yLjUtMzJCLUluc3RydWN0JTIyJTJDJTBBJTIwJTIwJTIwJTIwbWVzc2FnZXMlM0QlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIyc3lzdGVtJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMllvdSUyMGFyZSUyMGFuJTIwYXNzaXN0YW50JTIwdGhhdCUyMHJlc3BvbmRzJTIwbGlrZSUyMGElMjBwaXJhdGUuJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycm9sZSUyMiUzQSUyMCUyMnVzZXIlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIyV2hhdCUyMGlzJTIwRGVlcCUyMExlYXJuaW5nJTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwbWF4X3Rva2VucyUzRDEyOCUyQyUwQSUyMCUyMCUyMCUyMCUyMyUyMGV4dHJhX2hlYWRlcnMlM0QlN0IlMjJhenVyZW1sLW1vZGVsLWRlcGxveW1lbnQlMjIlM0ElMjBvcy5nZXRlbnYoJTIyREVQTE9ZTUVOVF9OQU1FJTIyKSU3RCUyQyUwQSklMEFwcmludChjb21wbGV0aW9uKQ==",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI
openai_client = OpenAI(
base_url=api_url,
api_key=api_key,
default_headers={<span class="hljs-string">&quot;azureml-model-deployment&quot;</span>: os.getenv(<span class="hljs-string">&quot;DEPLOYMENT_NAME&quot;</span>)},
)
completion = openai_client.chat.completions.create(
model=<span class="hljs-string">&quot;Qwen/Qwen2.5-32B-Instruct&quot;</span>,
messages=[
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;system&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;You are an assistant that responds like a pirate.&quot;</span>},
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What is Deep Learning?&quot;</span>,
},
],
max_tokens=<span class="hljs-number">128</span>,
<span class="hljs-comment"># extra_headers={&quot;azureml-model-deployment&quot;: os.getenv(&quot;DEPLOYMENT_NAME&quot;)},</span>
)
<span class="hljs-built_in">print</span>(completion)`,wrap:!1}}),bt=new J({props:{title:"cURL",local:"curl",headingTag:"h3"}}),jt=new T({props:{code:"b3MuZW52aXJvbiU1QiUyMkFQSV9VUkwlMjIlNUQlMjAlM0QlMjBhcGlfdXJsJTBBb3MuZW52aXJvbiU1QiUyMkFQSV9LRVklMjIlNUQlMjAlM0QlMjBhcGlfa2V5",highlighted:`os.environ[<span class="hljs-string">&quot;API_URL&quot;</span>] = api_url
os.environ[<span class="hljs-string">&quot;API_KEY&quot;</span>] = api_key`,wrap:!1}}),At=new T({props:{code:"IWN1cmwlMjAtc1MlMjAlMjRBUElfVVJMJTJGY2hhdCUyRmNvbXBsZXRpb25zJTIwJTVDJTBBJTIwJTIwJTIwJTIwLUglMjAlMjJBdXRob3JpemF0aW9uJTNBJTIwQmVhcmVyJTIwJTI0QVBJX0tFWSUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC1IJTIwJTIyQ29udGVudC1UeXBlJTNBJTIwYXBwbGljYXRpb24lMkZqc29uJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLUglMjAlMjJhenVyZW1sLW1vZGVsLWRlcGxveW1lbnQlM0ElMjAlMjRERVBMT1lNRU5UX05BTUUlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAtZCUyMCclN0IlMjAlNUMlMEElMjJtZXNzYWdlcyUyMiUzQSU1QiUyMCU1QyUwQSUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjJzeXN0ZW0lMjIlMkMlMjJjb250ZW50JTIyJTNBJTIyWW91JTIwYXJlJTIwYW4lMjBhc3Npc3RhbnQlMjB0aGF0JTIwcmVwbGllcyUyMGxpa2UlMjBhJTIwcGlyYXRlLiUyMiU3RCUyQyUyMCU1QyUwQSUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjJ1c2VyJTIyJTJDJTIyY29udGVudCUyMiUzQSUyMldoYXQlMjBpcyUyMERlZXAlMjBMZWFybmluZyUzRiUyMiU3RCUyMCU1QyUwQSU1RCUyQyUyMCU1QyUwQSUyMm1heF90b2tlbnMlMjIlM0ExMjglMjAlNUMlMEElN0QnJTIwJTdDJTIwanE=",highlighted:`!curl -sS $API_URL/chat/completions \\
-H <span class="hljs-string">&quot;Authorization: Bearer $API_KEY&quot;</span> \\
-H <span class="hljs-string">&quot;Content-Type: application/json&quot;</span> \\
-H <span class="hljs-string">&quot;azureml-model-deployment: $DEPLOYMENT_NAME&quot;</span> \\
-d <span class="hljs-string">&#x27;{ \\
&quot;messages&quot;:[ \\
{&quot;role&quot;:&quot;system&quot;,&quot;content&quot;:&quot;You are an assistant that replies like a pirate.&quot;}, \\
{&quot;role&quot;:&quot;user&quot;,&quot;content&quot;:&quot;What is Deep Learning?&quot;} \\
], \\
&quot;max_tokens&quot;:128 \\
}&#x27;</span> | jq`,wrap:!1}}),zt=new T({props:{code:"Y3VybCUyMC1zUyUyMCUzQ0FQSV9VUkwlM0UlMkZ2MSUyRmNoYXQlMkZjb21wbGV0aW9ucyUyMCU1QyUwQSUyMCUyMCUyMCUyMC1IJTIwJTIyQXV0aG9yaXphdGlvbiUzQSUyMEJlYXJlciUyMCUzQ1BSSU1BUllfS0VZJTNFJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLUglMjAlMjJDb250ZW50LVR5cGUlM0ElMjBhcHBsaWNhdGlvbiUyRmpzb24lMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAtSCUyMCUyMmF6dXJlbWwtbW9kZWwtZGVwbG95bWVudCUzQSUyMCUyNERFUExPWU1FTlRfTkFNRSUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC1kJTIwJyU3QiUyMCU1QyUwQSUyMm1lc3NhZ2VzJTIyJTNBJTVCJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMnN5c3RlbSUyMiUyQyUyMmNvbnRlbnQlMjIlM0ElMjJZb3UlMjBhcmUlMjBhbiUyMGFzc2lzdGFudCUyMHRoYXQlMjByZXBsaWVzJTIwbGlrZSUyMGElMjBwaXJhdGUuJTIyJTdEJTJDJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMnVzZXIlMjIlMkMlMjJjb250ZW50JTIyJTNBJTIyV2hhdCUyMGlzJTIwRGVlcCUyMExlYXJuaW5nJTNGJTIyJTdEJTIwJTVDJTBBJTVEJTJDJTIwJTVDJTBBJTIybWF4X3Rva2VucyUyMiUzQTEyOCUyMCU1QyUwQSU3RCclMjAlN0MlMjBqcQ==",highlighted:`curl -sS &lt;API_URL&gt;/v1/chat/completions \\
-H <span class="hljs-string">&quot;Authorization: Bearer &lt;PRIMARY_KEY&gt;&quot;</span> \\
-H <span class="hljs-string">&quot;Content-Type: application/json&quot;</span> \\
-H <span class="hljs-string">&quot;azureml-model-deployment: <span class="hljs-variable">$DEPLOYMENT_NAME</span>&quot;</span> \\
-d <span class="hljs-string">&#x27;{ \\
&quot;messages&quot;:[ \\
{&quot;role&quot;:&quot;system&quot;,&quot;content&quot;:&quot;You are an assistant that replies like a pirate.&quot;}, \\
{&quot;role&quot;:&quot;user&quot;,&quot;content&quot;:&quot;What is Deep Learning?&quot;} \\
], \\
&quot;max_tokens&quot;:128 \\
}&#x27;</span> | jq`,wrap:!1}}),vt=new J({props:{title:"Gradio",local:"gradio",headingTag:"h3"}}),F=new U({props:{$$slots:{default:[to]},$$scope:{ctx:w}}}),kt=new T({props:{code:"JTI1cGlwJTIwaW5zdGFsbCUyMGdyYWRpbyUyMC0tdXBncmFkZSUyMC0tcXVpZXQ=",highlighted:"%pip install gradio --upgrade --quiet",wrap:!1}}),_t=new T({props:{code:"aW1wb3J0JTIwb3MlMEFmcm9tJTIwdHlwaW5nJTIwaW1wb3J0JTIwRGljdCUyQyUyMEl0ZXJhdG9yJTJDJTIwTGlzdCUyQyUyMExpdGVyYWwlMEElMEFpbXBvcnQlMjBncmFkaW8lMjBhcyUyMGdyJTBBZnJvbSUyMG9wZW5haSUyMGltcG9ydCUyME9wZW5BSSUwQSUwQW9wZW5haV9jbGllbnQlMjAlM0QlMjBPcGVuQUkoJTBBJTIwJTIwJTIwJTIwYmFzZV91cmwlM0RhcGlfdXJsJTJDJTBBJTIwJTIwJTIwJTIwYXBpX2tleSUzRGFwaV9rZXklMkMlMEElMjAlMjAlMjAlMjBkZWZhdWx0X2hlYWRlcnMlM0QlN0IlMjJhenVyZW1sLW1vZGVsLWRlcGxveW1lbnQlMjIlM0ElMjBvcy5nZXRlbnYoJTIyREVQTE9ZTUVOVF9OQU1FJTIyKSU3RCUyQyUwQSklMEElMEElMEFkZWYlMjBwcmVkaWN0KG1lc3NhZ2UlM0ElMjBzdHIlMkMlMjBoaXN0b3J5JTNBJTIwTGlzdCU1QkRpY3QlNUJMaXRlcmFsJTVCJTIycm9sZSUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlNUQlMkMlMjBzdHIlNUQlNUQpJTIwLSUzRSUyMEl0ZXJhdG9yJTVCc3RyJTVEJTNBJTBBJTIwJTIwJTIwJTIwaGlzdG9yeS5hcHBlbmQoJTdCJTIycm9sZSUyMiUzQSUyMCUyMnVzZXIlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwbWVzc2FnZSU3RCklMEElMEElMjAlMjAlMjAlMjBzdHJlYW0lMjAlM0QlMjBvcGVuYWlfY2xpZW50LmNoYXQuY29tcGxldGlvbnMuY3JlYXRlKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1vZGVsJTNEJTIyUXdlbiUyRlF3ZW4yLjUtMzJCLUluc3RydWN0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbWVzc2FnZXMlM0RoaXN0b3J5JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3RyZWFtJTNEVHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMCklMEElMjAlMjAlMjAlMjBjaHVua3MlMjAlM0QlMjAlNUIlNUQlMEElMjAlMjAlMjAlMjBmb3IlMjBjaHVuayUyMGluJTIwc3RyZWFtJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY2h1bmtzLmFwcGVuZChjaHVuay5jaG9pY2VzJTVCMCU1RC5kZWx0YS5jb250ZW50JTIwb3IlMjAlMjIlMjIpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIweWllbGQlMjAlMjIlMjIuam9pbihjaHVua3MpJTBBJTBBJTBBZGVtbyUyMCUzRCUyMGdyLkNoYXRJbnRlcmZhY2UocHJlZGljdCUyQyUyMHR5cGUlM0QlMjJtZXNzYWdlcyUyMiklMEFkZW1vLmxhdW5jaCgp",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> typing <span class="hljs-keyword">import</span> <span class="hljs-type">Dict</span>, Iterator, <span class="hljs-type">List</span>, <span class="hljs-type">Literal</span>
<span class="hljs-keyword">import</span> gradio <span class="hljs-keyword">as</span> gr
<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI
openai_client = OpenAI(
base_url=api_url,
api_key=api_key,
default_headers={<span class="hljs-string">&quot;azureml-model-deployment&quot;</span>: os.getenv(<span class="hljs-string">&quot;DEPLOYMENT_NAME&quot;</span>)},
)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">predict</span>(<span class="hljs-params">message: <span class="hljs-built_in">str</span>, history: <span class="hljs-type">List</span>[<span class="hljs-type">Dict</span>[<span class="hljs-type">Literal</span>[<span class="hljs-string">&quot;role&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>], <span class="hljs-built_in">str</span>]]</span>) -&gt; Iterator[<span class="hljs-built_in">str</span>]:
history.append({<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: message})
stream = openai_client.chat.completions.create(
model=<span class="hljs-string">&quot;Qwen/Qwen2.5-32B-Instruct&quot;</span>,
messages=history,
stream=<span class="hljs-literal">True</span>,
)
chunks = []
<span class="hljs-keyword">for</span> chunk <span class="hljs-keyword">in</span> stream:
chunks.append(chunk.choices[<span class="hljs-number">0</span>].delta.content <span class="hljs-keyword">or</span> <span class="hljs-string">&quot;&quot;</span>)
<span class="hljs-keyword">yield</span> <span class="hljs-string">&quot;&quot;</span>.join(chunks)
demo = gr.ChatInterface(predict, <span class="hljs-built_in">type</span>=<span class="hljs-string">&quot;messages&quot;</span>)
demo.launch()`,wrap:!1}}),Rt=new J({props:{title:"Release resources",local:"release-resources",headingTag:"h2"}}),Wt=new T({props:{code:"Y2xpZW50Lm9ubGluZV9lbmRwb2ludHMuYmVnaW5fZGVsZXRlKG5hbWUlM0Rvcy5nZXRlbnYoJTIyRU5EUE9JTlRfTkFNRSUyMikpLnJlc3VsdCgp",highlighted:'client.online_endpoints.begin_delete(name=os.getenv(<span class="hljs-string">&quot;ENDPOINT_NAME&quot;</span>)).result()',wrap:!1}}),Bt=new J({props:{title:"Conclusion",local:"conclusion",headingTag:"h2"}}),x=new U({props:{$$slots:{default:[no]},$$scope:{ctx:w}}}),Vt=new Bs({props:{source:"https://github.com/huggingface/Microsoft-Azure/blob/main/docs/source/azure-ai/examples/deploy-large-language-models.mdx"}}),{c(){a=r("meta"),f=s(),i=r("p"),g=s(),c(j.$$.fragment),A=s(),C=r("p"),C.textContent=G,$=s(),c(I.$$.fragment),xt=s(),N=r("p"),N.textContent=pa,Gt=s(),Nt=r("hr"),St=s(),S=r("p"),S.innerHTML=ca,Xt=s(),X=r("p"),X.textContent=ma,Pt=s(),P=r("ul"),P.innerHTML=da,Yt=s(),Y=r("p"),Y.innerHTML=Ma,Dt=s(),D=r("p"),D.innerHTML=ha,Ot=s(),O=r("p"),O.innerHTML=ya,Kt=s(),K=r("p"),K.innerHTML=fa,en=s(),c(z.$$.fragment),tn=s(),c(ee.$$.fragment),nn=s(),te=r("p"),te.innerHTML=ga,ln=s(),c(ne.$$.fragment),an=s(),le=r("p"),le.innerHTML=wa,sn=s(),c(ae.$$.fragment),on=s(),se=r("p"),se.innerHTML=Ta,rn=s(),c(oe.$$.fragment),un=s(),ie=r("p"),ie.innerHTML=$a,pn=s(),c(re.$$.fragment),cn=s(),ue=r("p"),ue.innerHTML=Ua,mn=s(),pe=r("p"),pe.innerHTML=Ja,dn=s(),c(ce.$$.fragment),Mn=s(),me=r("p"),me.innerHTML=ba,hn=s(),c(de.$$.fragment),yn=s(),Me=r("p"),Me.innerHTML=Ia,fn=s(),c(he.$$.fragment),gn=s(),ye=r("p"),ye.textContent=ja,wn=s(),c(fe.$$.fragment),Tn=s(),ge=r("p"),ge.textContent=Aa,$n=s(),c(we.$$.fragment),Un=s(),Te=r("p"),Te.innerHTML=Ca,Jn=s(),c(v.$$.fragment),bn=s(),c($e.$$.fragment),In=s(),Ue=r("p"),Ue.textContent=za,jn=s(),c(Je.$$.fragment),An=s(),c(L.$$.fragment),Cn=s(),be=r("p"),be.textContent=va,zn=s(),c(Ie.$$.fragment),vn=s(),c(k.$$.fragment),Ln=s(),je=r("p"),je.textContent=La,kn=s(),c(Ae.$$.fragment),En=s(),Ce=r("p"),Ce.innerHTML=ka,_n=s(),c(ze.$$.fragment),Zn=s(),ve=r("p"),ve.textContent=Ea,Rn=s(),c(Le.$$.fragment),Hn=s(),ke=r("p"),ke.innerHTML=_a,Wn=s(),c(E.$$.fragment),Bn=s(),c(Ee.$$.fragment),qn=s(),_e=r("p"),_e.innerHTML=Za,Qn=s(),c(Ze.$$.fragment),Vn=s(),Re=r("p"),Re.innerHTML=Ra,Fn=s(),He=r("p"),He.textContent=Ha,xn=s(),c(We.$$.fragment),Gn=s(),Be=r("p"),Be.textContent=Wa,Nn=s(),c(_.$$.fragment),Sn=s(),c(qe.$$.fragment),Xn=s(),c(Qe.$$.fragment),Pn=s(),c(Ve.$$.fragment),Yn=s(),Fe=r("p"),Fe.innerHTML=Ba,Dn=s(),c(Z.$$.fragment),On=s(),c(xe.$$.fragment),Kn=s(),c(Ge.$$.fragment),el=s(),Ne=r("p"),Ne.innerHTML=qa,tl=s(),c(Se.$$.fragment),nl=s(),Xe=r("p"),Xe.innerHTML=Qa,ll=s(),Pe=r("p"),Pe.textContent=Va,al=s(),c(Ye.$$.fragment),sl=s(),De=r("p"),De.innerHTML=Fa,ol=s(),Oe=r("p"),Oe.innerHTML=xa,il=s(),Ke=r("p"),Ke.innerHTML=Ga,rl=s(),et=r("p"),et.innerHTML=Na,ul=s(),c(R.$$.fragment),pl=s(),c(tt.$$.fragment),cl=s(),c(nt.$$.fragment),ml=s(),lt=r("p"),lt.innerHTML=Sa,dl=s(),at=r("p"),at.innerHTML=Xa,Ml=s(),c(H.$$.fragment),hl=s(),c(st.$$.fragment),yl=s(),ot=r("p"),ot.innerHTML=Pa,fl=s(),it=r("p"),it.innerHTML=Ya,gl=s(),c(W.$$.fragment),wl=s(),rt=r("p"),rt.innerHTML=Da,Tl=s(),ut=r("p"),ut.innerHTML=Oa,$l=s(),c(pt.$$.fragment),Ul=s(),ct=r("p"),ct.innerHTML=Ka,Jl=s(),c(B.$$.fragment),bl=s(),c(q.$$.fragment),Il=s(),c(mt.$$.fragment),jl=s(),dt=r("p"),dt.innerHTML=es,Al=s(),c(Mt.$$.fragment),Cl=s(),c(Q.$$.fragment),zl=s(),c(ht.$$.fragment),vl=s(),yt=r("p"),yt.textContent=ts,Ll=s(),c(ft.$$.fragment),kl=s(),gt=r("p"),gt.textContent=ns,El=s(),wt=r("ul"),wt.innerHTML=ls,_l=s(),c(Tt.$$.fragment),Zl=s(),c(V.$$.fragment),Rl=s(),$t=r("p"),$t.innerHTML=as,Hl=s(),Ut=r("p"),Ut.innerHTML=ss,Wl=s(),c(Jt.$$.fragment),Bl=s(),c(bt.$$.fragment),ql=s(),It=r("p"),It.innerHTML=os,Ql=s(),c(jt.$$.fragment),Vl=s(),c(At.$$.fragment),Fl=s(),Ct=r("p"),Ct.innerHTML=is,xl=s(),c(zt.$$.fragment),Gl=s(),c(vt.$$.fragment),Nl=s(),Lt=r("p"),Lt.innerHTML=rs,Sl=s(),c(F.$$.fragment),Xl=s(),c(kt.$$.fragment),Pl=s(),Et=r("p"),Et.innerHTML=us,Yl=s(),c(_t.$$.fragment),Dl=s(),Zt=r("p"),Zt.innerHTML=ps,Ol=s(),c(Rt.$$.fragment),Kl=s(),Ht=r("p"),Ht.textContent=cs,ea=s(),c(Wt.$$.fragment),ta=s(),c(Bt.$$.fragment),na=s(),qt=r("p"),qt.textContent=ms,la=s(),Qt=r("p"),Qt.innerHTML=ds,aa=s(),sa=r("hr"),oa=s(),c(x.$$.fragment),ia=s(),c(Vt.$$.fragment),ra=s(),Ft=r("p"),this.h()},l(e){const t=Hs("svelte-u9bgzb",document.head);a=u(t,"META",{name:!0,content:!0}),t.forEach(n),f=o(e),i=u(e,"P",{}),Ls(i).forEach(n),g=o(e),m(j.$$.fragment,e),A=o(e),C=u(e,"P",{"data-svelte-h":!0}),p(C)!=="svelte-349stc"&&(C.textContent=G),$=o(e),m(I.$$.fragment,e),xt=o(e),N=u(e,"P",{"data-svelte-h":!0}),p(N)!=="svelte-ax1atv"&&(N.textContent=pa),Gt=o(e),Nt=u(e,"HR",{}),St=o(e),S=u(e,"P",{"data-svelte-h":!0}),p(S)!=="svelte-1s5xzdd"&&(S.innerHTML=ca),Xt=o(e),X=u(e,"P",{"data-svelte-h":!0}),p(X)!=="svelte-zv8h7c"&&(X.textContent=ma),Pt=o(e),P=u(e,"UL",{"data-svelte-h":!0}),p(P)!=="svelte-fyu9n9"&&(P.innerHTML=da),Yt=o(e),Y=u(e,"P",{"data-svelte-h":!0}),p(Y)!=="svelte-187l7cr"&&(Y.innerHTML=Ma),Dt=o(e),D=u(e,"P",{"data-svelte-h":!0}),p(D)!=="svelte-1a7umw9"&&(D.innerHTML=ha),Ot=o(e),O=u(e,"P",{"data-svelte-h":!0}),p(O)!=="svelte-1v6b7se"&&(O.innerHTML=ya),Kt=o(e),K=u(e,"P",{"data-svelte-h":!0}),p(K)!=="svelte-19k6u5u"&&(K.innerHTML=fa),en=o(e),m(z.$$.fragment,e),tn=o(e),m(ee.$$.fragment,e),nn=o(e),te=u(e,"P",{"data-svelte-h":!0}),p(te)!=="svelte-dnkqle"&&(te.innerHTML=ga),ln=o(e),m(ne.$$.fragment,e),an=o(e),le=u(e,"P",{"data-svelte-h":!0}),p(le)!=="svelte-17v2nqh"&&(le.innerHTML=wa),sn=o(e),m(ae.$$.fragment,e),on=o(e),se=u(e,"P",{"data-svelte-h":!0}),p(se)!=="svelte-g35vg4"&&(se.innerHTML=Ta),rn=o(e),m(oe.$$.fragment,e),un=o(e),ie=u(e,"P",{"data-svelte-h":!0}),p(ie)!=="svelte-14xwk93"&&(ie.innerHTML=$a),pn=o(e),m(re.$$.fragment,e),cn=o(e),ue=u(e,"P",{"data-svelte-h":!0}),p(ue)!=="svelte-1wqr6hs"&&(ue.innerHTML=Ua),mn=o(e),pe=u(e,"P",{"data-svelte-h":!0}),p(pe)!=="svelte-122adhi"&&(pe.innerHTML=Ja),dn=o(e),m(ce.$$.fragment,e),Mn=o(e),me=u(e,"P",{"data-svelte-h":!0}),p(me)!=="svelte-ujmpki"&&(me.innerHTML=ba),hn=o(e),m(de.$$.fragment,e),yn=o(e),Me=u(e,"P",{"data-svelte-h":!0}),p(Me)!=="svelte-ccsgoj"&&(Me.innerHTML=Ia),fn=o(e),m(he.$$.fragment,e),gn=o(e),ye=u(e,"P",{"data-svelte-h":!0}),p(ye)!=="svelte-1s7uroj"&&(ye.textContent=ja),wn=o(e),m(fe.$$.fragment,e),Tn=o(e),ge=u(e,"P",{"data-svelte-h":!0}),p(ge)!=="svelte-hqf5d7"&&(ge.textContent=Aa),$n=o(e),m(we.$$.fragment,e),Un=o(e),Te=u(e,"P",{"data-svelte-h":!0}),p(Te)!=="svelte-hb8jw0"&&(Te.innerHTML=Ca),Jn=o(e),m(v.$$.fragment,e),bn=o(e),m($e.$$.fragment,e),In=o(e),Ue=u(e,"P",{"data-svelte-h":!0}),p(Ue)!=="svelte-13d894i"&&(Ue.textContent=za),jn=o(e),m(Je.$$.fragment,e),An=o(e),m(L.$$.fragment,e),Cn=o(e),be=u(e,"P",{"data-svelte-h":!0}),p(be)!=="svelte-1rsj5c5"&&(be.textContent=va),zn=o(e),m(Ie.$$.fragment,e),vn=o(e),m(k.$$.fragment,e),Ln=o(e),je=u(e,"P",{"data-svelte-h":!0}),p(je)!=="svelte-1pa207a"&&(je.textContent=La),kn=o(e),m(Ae.$$.fragment,e),En=o(e),Ce=u(e,"P",{"data-svelte-h":!0}),p(Ce)!=="svelte-15ilo8u"&&(Ce.innerHTML=ka),_n=o(e),m(ze.$$.fragment,e),Zn=o(e),ve=u(e,"P",{"data-svelte-h":!0}),p(ve)!=="svelte-gxgvr2"&&(ve.textContent=Ea),Rn=o(e),m(Le.$$.fragment,e),Hn=o(e),ke=u(e,"P",{"data-svelte-h":!0}),p(ke)!=="svelte-yg6r2g"&&(ke.innerHTML=_a),Wn=o(e),m(E.$$.fragment,e),Bn=o(e),m(Ee.$$.fragment,e),qn=o(e),_e=u(e,"P",{"data-svelte-h":!0}),p(_e)!=="svelte-bexho5"&&(_e.innerHTML=Za),Qn=o(e),m(Ze.$$.fragment,e),Vn=o(e),Re=u(e,"P",{"data-svelte-h":!0}),p(Re)!=="svelte-1v277rw"&&(Re.innerHTML=Ra),Fn=o(e),He=u(e,"P",{"data-svelte-h":!0}),p(He)!=="svelte-11uslf2"&&(He.textContent=Ha),xn=o(e),m(We.$$.fragment,e),Gn=o(e),Be=u(e,"P",{"data-svelte-h":!0}),p(Be)!=="svelte-1wrtw53"&&(Be.textContent=Wa),Nn=o(e),m(_.$$.fragment,e),Sn=o(e),m(qe.$$.fragment,e),Xn=o(e),m(Qe.$$.fragment,e),Pn=o(e),m(Ve.$$.fragment,e),Yn=o(e),Fe=u(e,"P",{"data-svelte-h":!0}),p(Fe)!=="svelte-1fy23dh"&&(Fe.innerHTML=Ba),Dn=o(e),m(Z.$$.fragment,e),On=o(e),m(xe.$$.fragment,e),Kn=o(e),m(Ge.$$.fragment,e),el=o(e),Ne=u(e,"P",{"data-svelte-h":!0}),p(Ne)!=="svelte-thijw5"&&(Ne.innerHTML=qa),tl=o(e),m(Se.$$.fragment,e),nl=o(e),Xe=u(e,"P",{"data-svelte-h":!0}),p(Xe)!=="svelte-1hy4n1w"&&(Xe.innerHTML=Qa),ll=o(e),Pe=u(e,"P",{"data-svelte-h":!0}),p(Pe)!=="svelte-1kr526q"&&(Pe.textContent=Va),al=o(e),m(Ye.$$.fragment,e),sl=o(e),De=u(e,"P",{"data-svelte-h":!0}),p(De)!=="svelte-1qpei6p"&&(De.innerHTML=Fa),ol=o(e),Oe=u(e,"P",{"data-svelte-h":!0}),p(Oe)!=="svelte-1kte4k3"&&(Oe.innerHTML=xa),il=o(e),Ke=u(e,"P",{"data-svelte-h":!0}),p(Ke)!=="svelte-mdv3ng"&&(Ke.innerHTML=Ga),rl=o(e),et=u(e,"P",{"data-svelte-h":!0}),p(et)!=="svelte-1weda3f"&&(et.innerHTML=Na),ul=o(e),m(R.$$.fragment,e),pl=o(e),m(tt.$$.fragment,e),cl=o(e),m(nt.$$.fragment,e),ml=o(e),lt=u(e,"P",{"data-svelte-h":!0}),p(lt)!=="svelte-8p2gbc"&&(lt.innerHTML=Sa),dl=o(e),at=u(e,"P",{"data-svelte-h":!0}),p(at)!=="svelte-cd51sx"&&(at.innerHTML=Xa),Ml=o(e),m(H.$$.fragment,e),hl=o(e),m(st.$$.fragment,e),yl=o(e),ot=u(e,"P",{"data-svelte-h":!0}),p(ot)!=="svelte-16iyq4g"&&(ot.innerHTML=Pa),fl=o(e),it=u(e,"P",{"data-svelte-h":!0}),p(it)!=="svelte-1cyaahl"&&(it.innerHTML=Ya),gl=o(e),m(W.$$.fragment,e),wl=o(e),rt=u(e,"P",{"data-svelte-h":!0}),p(rt)!=="svelte-1v1bv0"&&(rt.innerHTML=Da),Tl=o(e),ut=u(e,"P",{"data-svelte-h":!0}),p(ut)!=="svelte-kgt4mo"&&(ut.innerHTML=Oa),$l=o(e),m(pt.$$.fragment,e),Ul=o(e),ct=u(e,"P",{"data-svelte-h":!0}),p(ct)!=="svelte-b8ajgt"&&(ct.innerHTML=Ka),Jl=o(e),m(B.$$.fragment,e),bl=o(e),m(q.$$.fragment,e),Il=o(e),m(mt.$$.fragment,e),jl=o(e),dt=u(e,"P",{"data-svelte-h":!0}),p(dt)!=="svelte-nyu5ju"&&(dt.innerHTML=es),Al=o(e),m(Mt.$$.fragment,e),Cl=o(e),m(Q.$$.fragment,e),zl=o(e),m(ht.$$.fragment,e),vl=o(e),yt=u(e,"P",{"data-svelte-h":!0}),p(yt)!=="svelte-q0evy3"&&(yt.textContent=ts),Ll=o(e),m(ft.$$.fragment,e),kl=o(e),gt=u(e,"P",{"data-svelte-h":!0}),p(gt)!=="svelte-9zyo13"&&(gt.textContent=ns),El=o(e),wt=u(e,"UL",{"data-svelte-h":!0}),p(wt)!=="svelte-kvalv2"&&(wt.innerHTML=ls),_l=o(e),m(Tt.$$.fragment,e),Zl=o(e),m(V.$$.fragment,e),Rl=o(e),$t=u(e,"P",{"data-svelte-h":!0}),p($t)!=="svelte-1t4oej1"&&($t.innerHTML=as),Hl=o(e),Ut=u(e,"P",{"data-svelte-h":!0}),p(Ut)!=="svelte-1t8h4hs"&&(Ut.innerHTML=ss),Wl=o(e),m(Jt.$$.fragment,e),Bl=o(e),m(bt.$$.fragment,e),ql=o(e),It=u(e,"P",{"data-svelte-h":!0}),p(It)!=="svelte-6rxgjf"&&(It.innerHTML=os),Ql=o(e),m(jt.$$.fragment,e),Vl=o(e),m(At.$$.fragment,e),Fl=o(e),Ct=u(e,"P",{"data-svelte-h":!0}),p(Ct)!=="svelte-nd1bhb"&&(Ct.innerHTML=is),xl=o(e),m(zt.$$.fragment,e),Gl=o(e),m(vt.$$.fragment,e),Nl=o(e),Lt=u(e,"P",{"data-svelte-h":!0}),p(Lt)!=="svelte-e42t4p"&&(Lt.innerHTML=rs),Sl=o(e),m(F.$$.fragment,e),Xl=o(e),m(kt.$$.fragment,e),Pl=o(e),Et=u(e,"P",{"data-svelte-h":!0}),p(Et)!=="svelte-1hthzd7"&&(Et.innerHTML=us),Yl=o(e),m(_t.$$.fragment,e),Dl=o(e),Zt=u(e,"P",{"data-svelte-h":!0}),p(Zt)!=="svelte-r0xzrm"&&(Zt.innerHTML=ps),Ol=o(e),m(Rt.$$.fragment,e),Kl=o(e),Ht=u(e,"P",{"data-svelte-h":!0}),p(Ht)!=="svelte-rntoeu"&&(Ht.textContent=cs),ea=o(e),m(Wt.$$.fragment,e),ta=o(e),m(Bt.$$.fragment,e),na=o(e),qt=u(e,"P",{"data-svelte-h":!0}),p(qt)!=="svelte-heicvy"&&(qt.textContent=ms),la=o(e),Qt=u(e,"P",{"data-svelte-h":!0}),p(Qt)!=="svelte-1nopug0"&&(Qt.innerHTML=ds),aa=o(e),sa=u(e,"HR",{}),oa=o(e),m(x.$$.fragment,e),ia=o(e),m(Vt.$$.fragment,e),ra=o(e),Ft=u(e,"P",{}),Ls(Ft).forEach(n),this.h()},h(){ks(a,"name","hf:doc:metadata"),ks(a,"content",ao)},m(e,t){Ws(document.head,a),l(e,f,t),l(e,i,t),l(e,g,t),d(j,e,t),l(e,A,t),l(e,C,t),l(e,$,t),d(I,e,t),l(e,xt,t),l(e,N,t),l(e,Gt,t),l(e,Nt,t),l(e,St,t),l(e,S,t),l(e,Xt,t),l(e,X,t),l(e,Pt,t),l(e,P,t),l(e,Yt,t),l(e,Y,t),l(e,Dt,t),l(e,D,t),l(e,Ot,t),l(e,O,t),l(e,Kt,t),l(e,K,t),l(e,en,t),d(z,e,t),l(e,tn,t),d(ee,e,t),l(e,nn,t),l(e,te,t),l(e,ln,t),d(ne,e,t),l(e,an,t),l(e,le,t),l(e,sn,t),d(ae,e,t),l(e,on,t),l(e,se,t),l(e,rn,t),d(oe,e,t),l(e,un,t),l(e,ie,t),l(e,pn,t),d(re,e,t),l(e,cn,t),l(e,ue,t),l(e,mn,t),l(e,pe,t),l(e,dn,t),d(ce,e,t),l(e,Mn,t),l(e,me,t),l(e,hn,t),d(de,e,t),l(e,yn,t),l(e,Me,t),l(e,fn,t),d(he,e,t),l(e,gn,t),l(e,ye,t),l(e,wn,t),d(fe,e,t),l(e,Tn,t),l(e,ge,t),l(e,$n,t),d(we,e,t),l(e,Un,t),l(e,Te,t),l(e,Jn,t),d(v,e,t),l(e,bn,t),d($e,e,t),l(e,In,t),l(e,Ue,t),l(e,jn,t),d(Je,e,t),l(e,An,t),d(L,e,t),l(e,Cn,t),l(e,be,t),l(e,zn,t),d(Ie,e,t),l(e,vn,t),d(k,e,t),l(e,Ln,t),l(e,je,t),l(e,kn,t),d(Ae,e,t),l(e,En,t),l(e,Ce,t),l(e,_n,t),d(ze,e,t),l(e,Zn,t),l(e,ve,t),l(e,Rn,t),d(Le,e,t),l(e,Hn,t),l(e,ke,t),l(e,Wn,t),d(E,e,t),l(e,Bn,t),d(Ee,e,t),l(e,qn,t),l(e,_e,t),l(e,Qn,t),d(Ze,e,t),l(e,Vn,t),l(e,Re,t),l(e,Fn,t),l(e,He,t),l(e,xn,t),d(We,e,t),l(e,Gn,t),l(e,Be,t),l(e,Nn,t),d(_,e,t),l(e,Sn,t),d(qe,e,t),l(e,Xn,t),d(Qe,e,t),l(e,Pn,t),d(Ve,e,t),l(e,Yn,t),l(e,Fe,t),l(e,Dn,t),d(Z,e,t),l(e,On,t),d(xe,e,t),l(e,Kn,t),d(Ge,e,t),l(e,el,t),l(e,Ne,t),l(e,tl,t),d(Se,e,t),l(e,nl,t),l(e,Xe,t),l(e,ll,t),l(e,Pe,t),l(e,al,t),d(Ye,e,t),l(e,sl,t),l(e,De,t),l(e,ol,t),l(e,Oe,t),l(e,il,t),l(e,Ke,t),l(e,rl,t),l(e,et,t),l(e,ul,t),d(R,e,t),l(e,pl,t),d(tt,e,t),l(e,cl,t),d(nt,e,t),l(e,ml,t),l(e,lt,t),l(e,dl,t),l(e,at,t),l(e,Ml,t),d(H,e,t),l(e,hl,t),d(st,e,t),l(e,yl,t),l(e,ot,t),l(e,fl,t),l(e,it,t),l(e,gl,t),d(W,e,t),l(e,wl,t),l(e,rt,t),l(e,Tl,t),l(e,ut,t),l(e,$l,t),d(pt,e,t),l(e,Ul,t),l(e,ct,t),l(e,Jl,t),d(B,e,t),l(e,bl,t),d(q,e,t),l(e,Il,t),d(mt,e,t),l(e,jl,t),l(e,dt,t),l(e,Al,t),d(Mt,e,t),l(e,Cl,t),d(Q,e,t),l(e,zl,t),d(ht,e,t),l(e,vl,t),l(e,yt,t),l(e,Ll,t),d(ft,e,t),l(e,kl,t),l(e,gt,t),l(e,El,t),l(e,wt,t),l(e,_l,t),d(Tt,e,t),l(e,Zl,t),d(V,e,t),l(e,Rl,t),l(e,$t,t),l(e,Hl,t),l(e,Ut,t),l(e,Wl,t),d(Jt,e,t),l(e,Bl,t),d(bt,e,t),l(e,ql,t),l(e,It,t),l(e,Ql,t),d(jt,e,t),l(e,Vl,t),d(At,e,t),l(e,Fl,t),l(e,Ct,t),l(e,xl,t),d(zt,e,t),l(e,Gl,t),d(vt,e,t),l(e,Nl,t),l(e,Lt,t),l(e,Sl,t),d(F,e,t),l(e,Xl,t),d(kt,e,t),l(e,Pl,t),l(e,Et,t),l(e,Yl,t),d(_t,e,t),l(e,Dl,t),l(e,Zt,t),l(e,Ol,t),d(Rt,e,t),l(e,Kl,t),l(e,Ht,t),l(e,ea,t),d(Wt,e,t),l(e,ta,t),d(Bt,e,t),l(e,na,t),l(e,qt,t),l(e,la,t),l(e,Qt,t),l(e,aa,t),l(e,sa,t),l(e,oa,t),d(x,e,t),l(e,ia,t),d(Vt,e,t),l(e,ra,t),l(e,Ft,t),ua=!0},p(e,[t]){const Ms={};t&2&&(Ms.$$scope={dirty:t,ctx:e}),I.$set(Ms);const hs={};t&2&&(hs.$$scope={dirty:t,ctx:e}),z.$set(hs);const ys={};t&2&&(ys.$$scope={dirty:t,ctx:e}),v.$set(ys);const fs={};t&2&&(fs.$$scope={dirty:t,ctx:e}),L.$set(fs);const gs={};t&2&&(gs.$$scope={dirty:t,ctx:e}),k.$set(gs);const ws={};t&2&&(ws.$$scope={dirty:t,ctx:e}),E.$set(ws);const Ts={};t&2&&(Ts.$$scope={dirty:t,ctx:e}),_.$set(Ts);const $s={};t&2&&($s.$$scope={dirty:t,ctx:e}),Z.$set($s);const Us={};t&2&&(Us.$$scope={dirty:t,ctx:e}),R.$set(Us);const Js={};t&2&&(Js.$$scope={dirty:t,ctx:e}),H.$set(Js);const bs={};t&2&&(bs.$$scope={dirty:t,ctx:e}),W.$set(bs);const Is={};t&2&&(Is.$$scope={dirty:t,ctx:e}),B.$set(Is);const js={};t&2&&(js.$$scope={dirty:t,ctx:e}),q.$set(js);const As={};t&2&&(As.$$scope={dirty:t,ctx:e}),Q.$set(As);const Cs={};t&2&&(Cs.$$scope={dirty:t,ctx:e}),V.$set(Cs);const zs={};t&2&&(zs.$$scope={dirty:t,ctx:e}),F.$set(zs);const vs={};t&2&&(vs.$$scope={dirty:t,ctx:e}),x.$set(vs)},i(e){ua||(M(j.$$.fragment,e),M(I.$$.fragment,e),M(z.$$.fragment,e),M(ee.$$.fragment,e),M(ne.$$.fragment,e),M(ae.$$.fragment,e),M(oe.$$.fragment,e),M(re.$$.fragment,e),M(ce.$$.fragment,e),M(de.$$.fragment,e),M(he.$$.fragment,e),M(fe.$$.fragment,e),M(we.$$.fragment,e),M(v.$$.fragment,e),M($e.$$.fragment,e),M(Je.$$.fragment,e),M(L.$$.fragment,e),M(Ie.$$.fragment,e),M(k.$$.fragment,e),M(Ae.$$.fragment,e),M(ze.$$.fragment,e),M(Le.$$.fragment,e),M(E.$$.fragment,e),M(Ee.$$.fragment,e),M(Ze.$$.fragment,e),M(We.$$.fragment,e),M(_.$$.fragment,e),M(qe.$$.fragment,e),M(Qe.$$.fragment,e),M(Ve.$$.fragment,e),M(Z.$$.fragment,e),M(xe.$$.fragment,e),M(Ge.$$.fragment,e),M(Se.$$.fragment,e),M(Ye.$$.fragment,e),M(R.$$.fragment,e),M(tt.$$.fragment,e),M(nt.$$.fragment,e),M(H.$$.fragment,e),M(st.$$.fragment,e),M(W.$$.fragment,e),M(pt.$$.fragment,e),M(B.$$.fragment,e),M(q.$$.fragment,e),M(mt.$$.fragment,e),M(Mt.$$.fragment,e),M(Q.$$.fragment,e),M(ht.$$.fragment,e),M(ft.$$.fragment,e),M(Tt.$$.fragment,e),M(V.$$.fragment,e),M(Jt.$$.fragment,e),M(bt.$$.fragment,e),M(jt.$$.fragment,e),M(At.$$.fragment,e),M(zt.$$.fragment,e),M(vt.$$.fragment,e),M(F.$$.fragment,e),M(kt.$$.fragment,e),M(_t.$$.fragment,e),M(Rt.$$.fragment,e),M(Wt.$$.fragment,e),M(Bt.$$.fragment,e),M(x.$$.fragment,e),M(Vt.$$.fragment,e),ua=!0)},o(e){h(j.$$.fragment,e),h(I.$$.fragment,e),h(z.$$.fragment,e),h(ee.$$.fragment,e),h(ne.$$.fragment,e),h(ae.$$.fragment,e),h(oe.$$.fragment,e),h(re.$$.fragment,e),h(ce.$$.fragment,e),h(de.$$.fragment,e),h(he.$$.fragment,e),h(fe.$$.fragment,e),h(we.$$.fragment,e),h(v.$$.fragment,e),h($e.$$.fragment,e),h(Je.$$.fragment,e),h(L.$$.fragment,e),h(Ie.$$.fragment,e),h(k.$$.fragment,e),h(Ae.$$.fragment,e),h(ze.$$.fragment,e),h(Le.$$.fragment,e),h(E.$$.fragment,e),h(Ee.$$.fragment,e),h(Ze.$$.fragment,e),h(We.$$.fragment,e),h(_.$$.fragment,e),h(qe.$$.fragment,e),h(Qe.$$.fragment,e),h(Ve.$$.fragment,e),h(Z.$$.fragment,e),h(xe.$$.fragment,e),h(Ge.$$.fragment,e),h(Se.$$.fragment,e),h(Ye.$$.fragment,e),h(R.$$.fragment,e),h(tt.$$.fragment,e),h(nt.$$.fragment,e),h(H.$$.fragment,e),h(st.$$.fragment,e),h(W.$$.fragment,e),h(pt.$$.fragment,e),h(B.$$.fragment,e),h(q.$$.fragment,e),h(mt.$$.fragment,e),h(Mt.$$.fragment,e),h(Q.$$.fragment,e),h(ht.$$.fragment,e),h(ft.$$.fragment,e),h(Tt.$$.fragment,e),h(V.$$.fragment,e),h(Jt.$$.fragment,e),h(bt.$$.fragment,e),h(jt.$$.fragment,e),h(At.$$.fragment,e),h(zt.$$.fragment,e),h(vt.$$.fragment,e),h(F.$$.fragment,e),h(kt.$$.fragment,e),h(_t.$$.fragment,e),h(Rt.$$.fragment,e),h(Wt.$$.fragment,e),h(Bt.$$.fragment,e),h(x.$$.fragment,e),h(Vt.$$.fragment,e),ua=!1},d(e){e&&(n(f),n(i),n(g),n(A),n(C),n($),n(xt),n(N),n(Gt),n(Nt),n(St),n(S),n(Xt),n(X),n(Pt),n(P),n(Yt),n(Y),n(Dt),n(D),n(Ot),n(O),n(Kt),n(K),n(en),n(tn),n(nn),n(te),n(ln),n(an),n(le),n(sn),n(on),n(se),n(rn),n(un),n(ie),n(pn),n(cn),n(ue),n(mn),n(pe),n(dn),n(Mn),n(me),n(hn),n(yn),n(Me),n(fn),n(gn),n(ye),n(wn),n(Tn),n(ge),n($n),n(Un),n(Te),n(Jn),n(bn),n(In),n(Ue),n(jn),n(An),n(Cn),n(be),n(zn),n(vn),n(Ln),n(je),n(kn),n(En),n(Ce),n(_n),n(Zn),n(ve),n(Rn),n(Hn),n(ke),n(Wn),n(Bn),n(qn),n(_e),n(Qn),n(Vn),n(Re),n(Fn),n(He),n(xn),n(Gn),n(Be),n(Nn),n(Sn),n(Xn),n(Pn),n(Yn),n(Fe),n(Dn),n(On),n(Kn),n(el),n(Ne),n(tl),n(nl),n(Xe),n(ll),n(Pe),n(al),n(sl),n(De),n(ol),n(Oe),n(il),n(Ke),n(rl),n(et),n(ul),n(pl),n(cl),n(ml),n(lt),n(dl),n(at),n(Ml),n(hl),n(yl),n(ot),n(fl),n(it),n(gl),n(wl),n(rt),n(Tl),n(ut),n($l),n(Ul),n(ct),n(Jl),n(bl),n(Il),n(jl),n(dt),n(Al),n(Cl),n(zl),n(vl),n(yt),n(Ll),n(kl),n(gt),n(El),n(wt),n(_l),n(Zl),n(Rl),n($t),n(Hl),n(Ut),n(Wl),n(Bl),n(ql),n(It),n(Ql),n(Vl),n(Fl),n(Ct),n(xl),n(Gl),n(Nl),n(Lt),n(Sl),n(Xl),n(Pl),n(Et),n(Yl),n(Dl),n(Zt),n(Ol),n(Kl),n(Ht),n(ea),n(ta),n(na),n(qt),n(la),n(Qt),n(aa),n(sa),n(oa),n(ia),n(ra),n(Ft)),n(a),y(j,e),y(I,e),y(z,e),y(ee,e),y(ne,e),y(ae,e),y(oe,e),y(re,e),y(ce,e),y(de,e),y(he,e),y(fe,e),y(we,e),y(v,e),y($e,e),y(Je,e),y(L,e),y(Ie,e),y(k,e),y(Ae,e),y(ze,e),y(Le,e),y(E,e),y(Ee,e),y(Ze,e),y(We,e),y(_,e),y(qe,e),y(Qe,e),y(Ve,e),y(Z,e),y(xe,e),y(Ge,e),y(Se,e),y(Ye,e),y(R,e),y(tt,e),y(nt,e),y(H,e),y(st,e),y(W,e),y(pt,e),y(B,e),y(q,e),y(mt,e),y(Mt,e),y(Q,e),y(ht,e),y(ft,e),y(Tt,e),y(V,e),y(Jt,e),y(bt,e),y(jt,e),y(At,e),y(zt,e),y(vt,e),y(F,e),y(kt,e),y(_t,e),y(Rt,e),y(Wt,e),y(Bt,e),y(x,e),y(Vt,e)}}}const ao='{"title":"Deploy Large Language Models (LLMs) on Azure AI","local":"deploy-large-language-models-llms-on-azure-ai","sections":[{"title":"Pre-requisites","local":"pre-requisites","sections":[{"title":"Azure Account","local":"azure-account","sections":[],"depth":3},{"title":"Azure CLI","local":"azure-cli","sections":[],"depth":3},{"title":"Azure CLI extension for Azure ML","local":"azure-cli-extension-for-azure-ml","sections":[],"depth":3},{"title":"Azure Resource Group","local":"azure-resource-group","sections":[],"depth":3},{"title":"Azure AI Foundry Hub-based project","local":"azure-ai-foundry-hub-based-project","sections":[],"depth":3}],"depth":2},{"title":"Setup and installation","local":"setup-and-installation","sections":[],"depth":2},{"title":"Authenticate to Azure ML","local":"authenticate-to-azure-ml","sections":[],"depth":2},{"title":"Create and Deploy Azure AI Endpoint","local":"create-and-deploy-azure-ai-endpoint","sections":[],"depth":2},{"title":"Send requests to the Azure AI Endpoint","local":"send-requests-to-the-azure-ai-endpoint","sections":[{"title":"Azure Python SDK","local":"azure-python-sdk","sections":[],"depth":3},{"title":"OpenAI Python SDK","local":"openai-python-sdk","sections":[],"depth":3},{"title":"cURL","local":"curl","sections":[],"depth":3},{"title":"Gradio","local":"gradio","sections":[],"depth":3}],"depth":2},{"title":"Release resources","local":"release-resources","sections":[],"depth":2},{"title":"Conclusion","local":"conclusion","sections":[],"depth":2}],"depth":1}';function so(w){return _s(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class co extends Zs{constructor(a){super(),Rs(this,a,so,lo,Es,{})}}export{co as component};

Xet Storage Details

Size:
76.9 kB
·
Xet hash:
9e745d8722b27e2cb4e520ef712fee373c3462d36f087f7a03d1c8a01312fa23

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.