Buckets:

rtrm's picture
download
raw
73.1 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Deploy SmolLM3 on Microsoft Foundry&quot;,&quot;local&quot;:&quot;deploy-smollm3-on-microsoft-foundry&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Pre-requisites&quot;,&quot;local&quot;:&quot;pre-requisites&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Setup and installation&quot;,&quot;local&quot;:&quot;setup-and-installation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Authenticate to Azure Machine Learning&quot;,&quot;local&quot;:&quot;authenticate-to-azure-machine-learning&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Create and Deploy Foundry Endpoint&quot;,&quot;local&quot;:&quot;create-and-deploy-foundry-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Send requests to the Foundry Endpoint&quot;,&quot;local&quot;:&quot;send-requests-to-the-foundry-endpoint&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Chat Completions&quot;,&quot;local&quot;:&quot;chat-completions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Extended Thinking Mode&quot;,&quot;local&quot;:&quot;extended-thinking-mode&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Multilingual capabilities&quot;,&quot;local&quot;:&quot;multilingual-capabilities&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Agentic use-cases and Tool Calling&quot;,&quot;local&quot;:&quot;agentic-use-cases-and-tool-calling&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Release resources&quot;,&quot;local&quot;:&quot;release-resources&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Conclusion&quot;,&quot;local&quot;:&quot;conclusion&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/microsoft-azure/pr_39/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/microsoft-azure/pr_39/en/_app/immutable/entry/start.d16ed975.js">
<link rel="modulepreload" href="/docs/microsoft-azure/pr_39/en/_app/immutable/chunks/scheduler.35aab934.js">
<link rel="modulepreload" href="/docs/microsoft-azure/pr_39/en/_app/immutable/chunks/singletons.69755a92.js">
<link rel="modulepreload" href="/docs/microsoft-azure/pr_39/en/_app/immutable/chunks/paths.2d1ffef0.js">
<link rel="modulepreload" href="/docs/microsoft-azure/pr_39/en/_app/immutable/entry/app.7655f7f9.js">
<link rel="modulepreload" href="/docs/microsoft-azure/pr_39/en/_app/immutable/chunks/preload-helper.3b5fbb1a.js">
<link rel="modulepreload" href="/docs/microsoft-azure/pr_39/en/_app/immutable/chunks/index.b7be2227.js">
<link rel="modulepreload" href="/docs/microsoft-azure/pr_39/en/_app/immutable/nodes/0.25c66cff.js">
<link rel="modulepreload" href="/docs/microsoft-azure/pr_39/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/microsoft-azure/pr_39/en/_app/immutable/nodes/7.c584d541.js">
<link rel="modulepreload" href="/docs/microsoft-azure/pr_39/en/_app/immutable/chunks/Tip.53e7a084.js">
<link rel="modulepreload" href="/docs/microsoft-azure/pr_39/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.696a7398.js">
<link rel="modulepreload" href="/docs/microsoft-azure/pr_39/en/_app/immutable/chunks/CodeBlock.39047ddb.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Deploy SmolLM3 on Microsoft Foundry&quot;,&quot;local&quot;:&quot;deploy-smollm3-on-microsoft-foundry&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Pre-requisites&quot;,&quot;local&quot;:&quot;pre-requisites&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Setup and installation&quot;,&quot;local&quot;:&quot;setup-and-installation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Authenticate to Azure Machine Learning&quot;,&quot;local&quot;:&quot;authenticate-to-azure-machine-learning&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Create and Deploy Foundry Endpoint&quot;,&quot;local&quot;:&quot;create-and-deploy-foundry-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Send requests to the Foundry Endpoint&quot;,&quot;local&quot;:&quot;send-requests-to-the-foundry-endpoint&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Chat Completions&quot;,&quot;local&quot;:&quot;chat-completions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Extended Thinking Mode&quot;,&quot;local&quot;:&quot;extended-thinking-mode&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Multilingual capabilities&quot;,&quot;local&quot;:&quot;multilingual-capabilities&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Agentic use-cases and Tool Calling&quot;,&quot;local&quot;:&quot;agentic-use-cases-and-tool-calling&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Release resources&quot;,&quot;local&quot;:&quot;release-resources&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Conclusion&quot;,&quot;local&quot;:&quot;conclusion&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="deploy-smollm3-on-microsoft-foundry" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deploy-smollm3-on-microsoft-foundry"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Deploy SmolLM3 on Microsoft Foundry</span></h1> <p data-svelte-h="svelte-1o96y8n">This example showcases how to deploy SmolLM3 from the Hugging Face collection on Microsoft Foundry as an Azure Machine Learning Managed Online Endpoint, powered by Transformers with an OpenAI compatible route. Additionally, this example also showcases how to run inference with the OpenAI Python SDK for different scenarios and use-cases.</p> <p data-svelte-h="svelte-1uny1k4"><img src="https://cdn-uploads.huggingface.co/production/uploads/61c141342aac764ce1654e43/zy0dqTCCt5IHmuzwoqtJ9.png" alt="SmolLM3 3B Logo image"></p> <p data-svelte-h="svelte-ti973g">TL;DR Transformers acts as the model-definition framework for state-of-the-art machine learning models in text, computer vision, audio, video, and multimodal model, for both inference and training. Microsoft Foundry (formerly Azure AI Foundry) provides a unified platform for enterprise AI operations, model builders, and application development. Azure Machine Learning is a cloud service for accelerating and managing the machine learning (ML) project lifecycle.</p> <hr> <p data-svelte-h="svelte-lg35v8">This example will specifically deploy <a href="https://huggingface.co/HuggingFaceTB/SmolLM3-3B" rel="nofollow"><code>HuggingFaceTB/SmolLM3-3B</code></a> from the Hugging Face Hub (or see it on <a href="https://ml.azure.com/models/huggingfacetb-smollm3-3b/version/3/catalog/registry/HuggingFace" rel="nofollow">AzureML</a> or on <a href="https://ai.azure.com/explore/models/huggingfacetb-smollm3-3b/version/3/registry/HuggingFace" rel="nofollow">Microsoft Foundry</a>) as an Azure Machine Learning Managed Online Endpoint on Microsoft Foundry.</p> <p data-svelte-h="svelte-1w9vywx">SmolLM3 is a 3B parameter language model designed to push the boundaries of small models. It supports dual mode reasoning, 6 languages and long context. SmolLM3 is a fully open model that offers strong performance at the 3B–4B scale.</p> <p data-svelte-h="svelte-kptas"><img src="https://cdn-uploads.huggingface.co/production/uploads/6200d0a443eb0913fa2df7cc/db3az7eGzs-Sb-8yUj-ff.png" alt="Small LLM win-rate on benchmarks per model size"></p> <p data-svelte-h="svelte-9udqy4">The model is a decoder-only transformer using GQA and NoPE (with 3:1 ratio), it was pretrained on 11.2T tokens with a staged curriculum of web, code, math and reasoning data. Post-training included midtraining on 140B reasoning tokens followed by supervised fine-tuning and alignment via Anchored Preference Optimization (APO).</p> <ul data-svelte-h="svelte-o8sml6"><li>Instruct model optimized for <strong>hybrid reasoning</strong></li> <li><strong>Fully open model</strong>: open weights + full training details including public data mixture and training configs</li> <li><strong>Long context:</strong> Trained on 64k context and supports up to <strong>128k tokens</strong> using YARN extrapolation</li> <li><strong>Multilingual</strong>: 6 natively supported (English, French, Spanish, German, Italian, and Portuguese)</li></ul> <p data-svelte-h="svelte-1sdfk7n"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/microsoft-azure/azure-ai/deploy-smollm3/smollm3-hub.png" alt="SmolLM3 3B on the Hugging Face Hub"></p> <p data-svelte-h="svelte-1lu30tc"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/microsoft-azure/azure-ai/deploy-smollm3/smollm3-azure-ai.png" alt="SmolLM3 3B on Azure AI Foundry"></p> <p data-svelte-h="svelte-1iaxoz5">For more information, make sure to check <a href="https://huggingface.co/HuggingFaceTB/SmolLM3-3B/blob/main/README.md" rel="nofollow">our model card on the Hugging Face Hub</a>.</p> <h2 class="relative group"><a id="pre-requisites" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pre-requisites"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Pre-requisites</span></h2> <p data-svelte-h="svelte-dnkqle">To run the following example, you will need to comply with the following pre-requisites, alternatively, you can also read more about those in the <a href="https://learn.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources?view=azureml-api-2" rel="nofollow">Azure Machine Learning Tutorial: Create resources you need to get started</a>.</p> <ul data-svelte-h="svelte-7cjyin"><li>An Azure account with an active subscription.</li> <li>The Azure CLI installed and logged in.</li> <li>The Azure Machine Learning extension for the Azure CLI.</li> <li>An Azure Resource Group.</li> <li>A Hub-based project on Microsoft Foundry.</li></ul> <p data-svelte-h="svelte-1z0klvb">For more information, please go through the steps in the guide <a href="https://huggingface.co/docs/microsoft-azure/guides/configure-azure-ml-microsoft-foundry" rel="nofollow">“Configure Azure Machine Learning and Microsoft Foundry”</a>.</p> <h2 class="relative group"><a id="setup-and-installation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#setup-and-installation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Setup and installation</span></h2> <p data-svelte-h="svelte-bexho5">In this example, the <a href="https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ml/azure-ai-ml" rel="nofollow">Azure Machine Learning SDK for Python</a> will be used to create the endpoint and the deployment, as well as to invoke the deployed API. Along with it, you will also need to install <code>azure-identity</code> to authenticate with your Azure credentials via Python.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->%pip install azure-ai-ml azure-identity --upgrade --quiet<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1v277rw">More information at <a href="https://learn.microsoft.com/en-us/python/api/overview/azure/ai-ml-readme?view=azure-python" rel="nofollow">Azure Machine Learning SDK for Python</a>.</p> <p data-svelte-h="svelte-6pxttm">Then, for convenience setting the following environment variables is recommended as those will be used along the example for the Azure Machine Learning Client, so make sure to update and set those values accordingly as per your Microsoft Azure account and resources.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->%env LOCATION eastus
%env SUBSCRIPTION_ID &lt;YOUR_SUBSCRIPTION_ID&gt;
%env RESOURCE_GROUP &lt;YOUR_RESOURCE_GROUP&gt;
%env WORKSPACE_NAME &lt;YOUR_WORKSPACE_NAME&gt;<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1wrtw53">Finally, you also need to define both the endpoint and deployment names, as those will be used throughout the example too:</p> <blockquote class="tip"><p data-svelte-h="svelte-zrbum5">Note that endpoint names must to be globally unique per region i.e., even if you don’t have any endpoint named that way running under your subscription, if the name is reserved by another Azure customer, then you won’t be able to use the same name. Adding a timestamp or a custom identifier is recommended to prevent running into HTTP 400 validation issues when trying to deploy an endpoint with an already locked / reserved name. Also the endpoint name must be between 3 and 32 characters long.</p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> uuid <span class="hljs-keyword">import</span> uuid4
os.environ[<span class="hljs-string">&quot;ENDPOINT_NAME&quot;</span>] = <span class="hljs-string">f&quot;smollm3-endpoint-<span class="hljs-subst">{<span class="hljs-built_in">str</span>(uuid4())[:<span class="hljs-number">8</span>]}</span>&quot;</span>
os.environ[<span class="hljs-string">&quot;DEPLOYMENT_NAME&quot;</span>] = <span class="hljs-string">f&quot;smollm3-deployment-<span class="hljs-subst">{<span class="hljs-built_in">str</span>(uuid4())[:<span class="hljs-number">8</span>]}</span>&quot;</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="authenticate-to-azure-machine-learning" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#authenticate-to-azure-machine-learning"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Authenticate to Azure Machine Learning</span></h2> <p data-svelte-h="svelte-mq78vw">Initially, you need to authenticate into the Microsoft Foundry via Azure Machine Learning with the Azure Machine Learning Python SDK, which will be later used to deploy <code>HuggingFaceTB/SmolLM3-3B</code> as an Azure Machine Learning Managed Online Endpoint in your Microsoft Foundry.</p> <blockquote class="tip"><p data-svelte-h="svelte-1mhdncd">On standard Azure Machine Learning deployments you’d need to create the <code>MLClient</code> using the Azure Machine Learning Workspace as the <code>workspace_name</code> whereas for Microsoft Foundry, you need to provide the Hub-based project name as the <code>workspace_name</code> instead, and that will deploy the endpoint under Microsoft Foundry too.</p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> azure.ai.ml <span class="hljs-keyword">import</span> MLClient
<span class="hljs-keyword">from</span> azure.identity <span class="hljs-keyword">import</span> DefaultAzureCredential
client = MLClient(
credential=DefaultAzureCredential(),
subscription_id=os.getenv(<span class="hljs-string">&quot;SUBSCRIPTION_ID&quot;</span>),
resource_group_name=os.getenv(<span class="hljs-string">&quot;RESOURCE_GROUP&quot;</span>),
workspace_name=os.getenv(<span class="hljs-string">&quot;WORKSPACE_NAME&quot;</span>),
)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="create-and-deploy-foundry-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#create-and-deploy-foundry-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Create and Deploy Foundry Endpoint</span></h2> <p data-svelte-h="svelte-16ken9o">Before creating the Managed Online Endpoint, you need to build the model URI, which is formatted as it follows <code>azureml://registries/HuggingFace/models/&lt;MODEL_ID&gt;/labels/latest</code> where the <code>MODEL_ID</code> won’t be the Hugging Face Hub ID but rather its name on Azure, as follows:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model_id = <span class="hljs-string">&quot;HuggingFaceTB/SmolLM3-3B&quot;</span>
model_uri = <span class="hljs-string">f&quot;azureml://registries/HuggingFace/models/<span class="hljs-subst">{model_id.replace(<span class="hljs-string">&#x27;/&#x27;</span>, <span class="hljs-string">&#x27;-&#x27;</span>).replace(<span class="hljs-string">&#x27;_&#x27;</span>, <span class="hljs-string">&#x27;-&#x27;</span>).lower()}</span>/labels/latest&quot;</span>
model_uri<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-sstuaa">[!NOTE]
To check if a model from the Hugging Face Hub is available in Azure, you should read about it in <a href="https://huggingface.co/docs/microsoft-azure/azure-ai/models" rel="nofollow">Supported Models</a>. If not, you can always <a href="https://huggingface.co/docs/microsoft-azure/guides/request-model-addition" rel="nofollow">Request a model addition in the Hugging Face collection on Azure</a>).</p> <p data-svelte-h="svelte-1f4out7">Then you need to create the <a href="https://learn.microsoft.com/en-us/python/api/azure-ai-ml/azure.ai.ml.entities.managedonlineendpoint?view=azure-python" rel="nofollow">ManagedOnlineEndpoint via the Azure Machine Learning Python SDK</a> as follows.</p> <blockquote class="tip"><p data-svelte-h="svelte-blz99s">Every model in the Hugging Face collection is powered by an efficient inference backend, and each of those can run on a wide variety of instance types (as listed in <a href="https://huggingface.co/docs/microsoft-azure/azure-ai/supported-hardware" rel="nofollow">Supported Hardware</a>). Since for models and inference engines require a GPU-accelerated instance, you might need to request a quota increase as per <a href="https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-quotas?view=azureml-api-2" rel="nofollow">Manage and increase quotas and limits for resources with Azure Machine Learning</a>.</p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> azure.ai.ml.entities <span class="hljs-keyword">import</span> ManagedOnlineEndpoint, ManagedOnlineDeployment
endpoint = ManagedOnlineEndpoint(name=os.getenv(<span class="hljs-string">&quot;ENDPOINT_NAME&quot;</span>))
deployment = ManagedOnlineDeployment(
name=os.getenv(<span class="hljs-string">&quot;DEPLOYMENT_NAME&quot;</span>),
endpoint_name=os.getenv(<span class="hljs-string">&quot;ENDPOINT_NAME&quot;</span>),
model=model_uri,
instance_type=<span class="hljs-string">&quot;Standard_NC40ads_H100_v5&quot;</span>,
instance_count=<span class="hljs-number">1</span>,
)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->client.begin_create_or_update(endpoint).wait()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-rurq1p"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/microsoft-azure/azure-ai/deploy-smollm3/azure-ai-endpoint.png" alt="Azure AI Endpoint from Azure AI Foundry"></p> <blockquote class="tip"><p data-svelte-h="svelte-1thum8j">On Microsoft Foundry the endpoint will only be listed within the “My assets -&gt; Models + endpoints” tab once the deployment is created, not before as in Azure Machine Learning where the endpoint is shown even if it doesn’t contain any active or in-progress deployments.</p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->client.online_deployments.begin_create_or_update(deployment).wait()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19ycyw5"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/microsoft-azure/azure-ai/deploy-smollm3/azure-ai-deployment.png" alt="Azure AI Deployment from Azure AI Foundry"></p> <p data-svelte-h="svelte-cs8ulb">The deployment might take ~10-15 minutes, but it could as well take longer depending on the selected SKU availability in the region. Once deployed, you will be able to inspect the endpoint details, the real-time logs, how to consume the endpoint, and <a href="https://learn.microsoft.com/en-us/azure/machine-learning/concept-model-monitoring?view=azureml-api-2" rel="nofollow">monitoring (on preview)</a>.</p> <p data-svelte-h="svelte-1bmop2">Find more information about it at <a href="https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints-online?view=azureml-api-2#managed-online-endpoints" rel="nofollow">Azure Machine Learning Managed Online Endpoints</a>.</p> <h2 class="relative group"><a id="send-requests-to-the-foundry-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#send-requests-to-the-foundry-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Send requests to the Foundry Endpoint</span></h2> <p data-svelte-h="svelte-h9xeh2">Finally, now that the Foundry Endpoint is deployed, you can send requests to it. In this case, since the task of the model is <code>text-generation</code> (also known as <code>chat-completion</code>) you can use the OpenAI SDK with the OpenAI-compatible route and send requests to the scoring URI i.e., <code>/v1/chat/completions</code>.</p> <blockquote class="tip"><p data-svelte-h="svelte-11fv9wi">Note that below only some of the options are listed, but you can send requests to the deployed endpoint as long as you send the HTTP requests with the <code>azureml-model-deployment</code> header set to the name of the Foundry Deployment (not the Endpoint), and have the necessary authentication token / key to send requests to the given endpoint; then you can send HTTP request to all the routes that the backend engine is exposing, not only to the scoring route.</p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->%pip install openai --upgrade --quiet<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-121ktgf">To use the OpenAI Python SDK with Azure Machine Learning Managed Online Endpoints, you need to first retrieve:</p> <ul data-svelte-h="svelte-1ye9v6w"><li><code>api_url</code> with the <code>/v1</code> route (that contains the <code>v1/chat/completions</code> endpoint that the OpenAI Python SDK will send requests to)</li> <li><code>api_key</code> which is the API Key on Microsoft Foundry or the primary key in Azure Machine Learning (unless a dedicated Azure Machine Learning Token is used instead)</li></ul> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> urllib.parse <span class="hljs-keyword">import</span> urlsplit
api_key = client.online_endpoints.get_keys(os.getenv(<span class="hljs-string">&quot;ENDPOINT_NAME&quot;</span>)).primary_key
url_parts = urlsplit(client.online_endpoints.get(os.getenv(<span class="hljs-string">&quot;ENDPOINT_NAME&quot;</span>)).scoring_uri)
api_url = <span class="hljs-string">f&quot;<span class="hljs-subst">{url_parts.scheme}</span>://<span class="hljs-subst">{url_parts.netloc}</span>/v1&quot;</span><!-- HTML_TAG_END --></pre></div> <blockquote class="tip"><p data-svelte-h="svelte-ufs9gh">Alternatively, you can also build the API URL manually as it follows, since the URIs are globally unique per region, meaning that there will only be one endpoint named the same way within the same region:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->api_url = <span class="hljs-string">f&quot;https://<span class="hljs-subst">{os.getenv(<span class="hljs-string">&#x27;ENDPOINT_NAME&#x27;</span>)}</span>.<span class="hljs-subst">{os.getenv(<span class="hljs-string">&#x27;LOCATION&#x27;</span>)}</span>.inference.ml.azure.com/v1&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-110vkm2">Or just retrieve it from either Microsoft Foundry or the Azure Machine Learning Studio.</p></blockquote> <p data-svelte-h="svelte-1qxthjn">Then you can use the OpenAI Python SDK normally, making sure to include the extra header <code>azureml-model-deployment</code> header that contains the Microsoft Foundry or Azure Machine Learning Deployment.</p> <p data-svelte-h="svelte-1t8h4hs">Via the OpenAI Python SDK it can either be set within each call to <code>chat.completions.create</code> via the <code>extra_headers</code> parameter as commented below, or via the <code>default_headers</code> parameter when instantiating the <code>OpenAI</code> client (which is the recommended approach since the header needs to be present on each request, so setting it just once is preferred).</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> os
<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI
openai_client = OpenAI(
base_url=api_url,
api_key=api_key,
default_headers={<span class="hljs-string">&quot;azureml-model-deployment&quot;</span>: os.getenv(<span class="hljs-string">&quot;DEPLOYMENT_NAME&quot;</span>)},
)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="chat-completions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#chat-completions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Chat Completions</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->completion = openai_client.chat.completions.create(
model=<span class="hljs-string">&quot;HuggingFaceTB/SmolLM3-3B&quot;</span>,
messages=[
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;system&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;You are an assistant that responds like a pirate.&quot;</span>,
},
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Give me a brief explanation of gravity in simple terms.&quot;</span>,
},
],
max_tokens=<span class="hljs-number">128</span>,
)
<span class="hljs-built_in">print</span>(completion)
<span class="hljs-comment"># ChatCompletion(id=&#x27;chatcmpl-74f6852e28&#x27;, choices=[Choice(finish_reason=&#x27;length&#x27;, index=0, logprobs=None, message=ChatCompletionMessage(content=&quot;&lt;think&gt;\nOkay, the user wants a simple explanation of gravity. Let me start by recalling what I know. Gravity is the force that pulls objects towards each other. But how to explain that simply?\n\nMaybe start with a common example, like how you fall when you jump. That&#x27;s gravity pulling you down. But wait, I should mention that it&#x27;s not just on Earth. The moon orbits the Earth because of gravity too. But how to make that easy to understand?\n\nI need to avoid technical terms. Maybe use metaphors. Like comparing gravity to a magnet, but not exactly. Or think of it as a stretchy rope pulling&quot;, refusal=None, role=&#x27;assistant&#x27;, annotations=[], audio=None, function_call=None, tool_calls=None))], created=1753178803, model=&#x27;HuggingFaceTB/SmolLM3-3B&#x27;, object=&#x27;chat.completion&#x27;, service_tier=&#x27;default&#x27;, system_fingerprint=&#x27;1a28be5c-df18-4e97-822f-118bf57374c8&#x27;, usage=CompletionUsage(completion_tokens=128, prompt_tokens=66, total_tokens=194, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0), reasoning_tokens=0))</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="extended-thinking-mode" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#extended-thinking-mode"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Extended Thinking Mode</span></h3> <p data-svelte-h="svelte-mfkn5v">By default, <code>SmolLM3-3B</code> enables extended thinking, so the example above generates the output with a reasoning trace as the reasoning is enabled by default.</p> <p data-svelte-h="svelte-2sq8am">To enable and disable it, you can provide either <code>/think</code> and <code>/no_think</code> in the system prompt, respectively.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->completion = openai_client.chat.completions.create(
model=<span class="hljs-string">&quot;HuggingFaceTB/SmolLM3-3B&quot;</span>,
messages=[
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;system&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;/no_think You are an assistant that responds like a pirate.&quot;</span>,
},
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Give me a brief explanation of gravity in simple terms.&quot;</span>,
},
],
max_tokens=<span class="hljs-number">128</span>,
)
<span class="hljs-built_in">print</span>(completion)
<span class="hljs-comment"># ChatCompletion(id=&#x27;chatcmpl-776e84a272&#x27;, choices=[Choice(finish_reason=&#x27;length&#x27;, index=0, logprobs=None, message=ChatCompletionMessage(content=&quot;Arr matey! Ye be askin&#x27; about gravity, the mighty force that keeps us swabbin&#x27; the decks and not floatin&#x27; off into the vast blue yonder. Gravity be the pull o&#x27; the Earth, a mighty force that keeps us grounded and keeps the stars in their place. It&#x27;s like a giant invisible hand that pulls us towards the center o&#x27; the Earth, makin&#x27; sure we don&#x27;t float off into space. It&#x27;s what makes the apples fall from the tree and the moon orbit &#x27;round the Earth. So, gravity be the force that keeps us all tied to this fine planet we call home.&quot;, refusal=None, role=&#x27;assistant&#x27;, annotations=[], audio=None, function_call=None, tool_calls=None))], created=1753178805, model=&#x27;HuggingFaceTB/SmolLM3-3B&#x27;, object=&#x27;chat.completion&#x27;, service_tier=&#x27;default&#x27;, system_fingerprint=&#x27;d644cb1c-84d6-49ae-b790-ac6011851042&#x27;, usage=CompletionUsage(completion_tokens=128, prompt_tokens=72, total_tokens=200, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0), reasoning_tokens=0))</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="multilingual-capabilities" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#multilingual-capabilities"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Multilingual capabilities</span></h3> <p data-svelte-h="svelte-1uezu29">As mentioned before, <code>SmolLM3-3B</code> has been trained to natively support 6 languages: English, French, Spanish, German, Italian, and Portuguese; meaning that you can leverage its multilingual potential by sending requests on any of those languages.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->completion = openai_client.chat.completions.create(
model=<span class="hljs-string">&quot;HuggingFaceTB/SmolLM3-3B&quot;</span>,
messages=[
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;system&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;/no_think You are an expert translator.&quot;</span>,
},
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Translate the following English sentence into both Spanish and German: &#x27;The brown cat sat on the mat.&#x27;&quot;</span>,
},
],
max_tokens=<span class="hljs-number">128</span>,
)
<span class="hljs-built_in">print</span>(completion)
<span class="hljs-comment"># ChatCompletion(id=&#x27;chatcmpl-da6188629f&#x27;, choices=[Choice(finish_reason=&#x27;stop&#x27;, index=0, logprobs=None, message=ChatCompletionMessage(content=&quot;The translation of the English sentence &#x27;The brown cat sat on the mat.&#x27; into Spanish is: &#x27;El gato marrón se sentó en el tapete.&#x27;\n\nThe translation of the English sentence &#x27;The brown cat sat on the mat.&#x27; into German is: &#x27;Der braune Katze saß auf dem Teppich.&#x27;&quot;, refusal=None, role=&#x27;assistant&#x27;, annotations=[], audio=None, function_call=None, tool_calls=None))], created=1753178807, model=&#x27;HuggingFaceTB/SmolLM3-3B&#x27;, object=&#x27;chat.completion&#x27;, service_tier=&#x27;default&#x27;, system_fingerprint=&#x27;054f8a76-4e8c-4a2f-90eb-31f0e802916c&#x27;, usage=CompletionUsage(completion_tokens=68, prompt_tokens=77, total_tokens=145, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0), reasoning_tokens=0))</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="agentic-use-cases-and-tool-calling" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#agentic-use-cases-and-tool-calling"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Agentic use-cases and Tool Calling</span></h3> <p data-svelte-h="svelte-1ljdto6"><code>SmolLM3-3B</code> has tool calling capabilities, meaning that you can provide a tool or list of tools that the LLM can leverage and use.</p> <blockquote class="tip"><p data-svelte-h="svelte-xjdx8i">To prevent the <code>tool_call</code> from being incomplete, you might need either unset the value for <code>max_completion_tokens</code> (former <code>max_tokens</code>) or set it to a fair enough value so that the model stops producing tokens due to length limitations before the <code>tool_call</code> is complete.</p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->completion = openai_client.chat.completions.create(
model=<span class="hljs-string">&quot;HuggingFaceTB/SmolLM3-3B&quot;</span>,
messages=[{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What is the weather like in New York?&quot;</span>}],
tools=[
{
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;function&quot;</span>,
<span class="hljs-string">&quot;function&quot;</span>: {
<span class="hljs-string">&quot;name&quot;</span>: <span class="hljs-string">&quot;get_weather&quot;</span>,
<span class="hljs-string">&quot;description&quot;</span>: <span class="hljs-string">&quot;Get the current weather in a given location&quot;</span>,
<span class="hljs-string">&quot;parameters&quot;</span>: {
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;object&quot;</span>,
<span class="hljs-string">&quot;properties&quot;</span>: {
<span class="hljs-string">&quot;location&quot;</span>: {
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;string&quot;</span>,
<span class="hljs-string">&quot;description&quot;</span>: <span class="hljs-string">&quot;The city and state, e.g. San Francisco, CA&quot;</span>,
},
<span class="hljs-string">&quot;unit&quot;</span>: {
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;string&quot;</span>,
<span class="hljs-string">&quot;enum&quot;</span>: [<span class="hljs-string">&quot;celsius&quot;</span>, <span class="hljs-string">&quot;fahrenheit&quot;</span>],
<span class="hljs-string">&quot;description&quot;</span>: <span class="hljs-string">&quot;The unit of temperature&quot;</span>,
},
},
<span class="hljs-string">&quot;required&quot;</span>: [<span class="hljs-string">&quot;location&quot;</span>],
},
},
}
],
tool_choice=<span class="hljs-string">&quot;auto&quot;</span>,
max_completion_tokens=<span class="hljs-number">256</span>,
)
<span class="hljs-built_in">print</span>(completion)
<span class="hljs-comment"># ChatCompletion(id=&#x27;chatcmpl-c36090e6b5&#x27;, choices=[Choice(finish_reason=&#x27;tool_calls&#x27;, index=0, logprobs=None, message=ChatCompletionMessage(content=&#x27;&lt;think&gt;I need to retrieve the current weather information for New York, so I\&#x27;ll use the get_weather function with the location set to \&#x27;New York\&#x27; and the unit set to \&#x27;fahrenheit\&#x27;.&lt;/think&gt;\n&lt;tool_call&gt;{&quot;name&quot;: &quot;get_weather&quot;, &quot;arguments&quot;: {&quot;location&quot;: &quot;New York&quot;, &quot;unit&quot;: &quot;fahrenheit&quot;}}&lt;/tool_call&gt;&#x27;, refusal=None, role=&#x27;assistant&#x27;, annotations=[], audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id=&#x27;call-5d5eb71a&#x27;, function=Function(arguments=&#x27;{&quot;location&quot;: &quot;New York&quot;, &quot;unit&quot;: &quot;fahrenheit&quot;}&#x27;, name=&#x27;get_weather&#x27;), type=&#x27;function&#x27;)]))], created=1753178808, model=&#x27;HuggingFaceTB/SmolLM3-3B&#x27;, object=&#x27;chat.completion&#x27;, service_tier=&#x27;default&#x27;, system_fingerprint=&#x27;5e58b305-773c-40b6-900b-fe5b177aeab9&#x27;, usage=CompletionUsage(completion_tokens=68, prompt_tokens=442, total_tokens=510, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0), reasoning_tokens=0))</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="release-resources" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#release-resources"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Release resources</span></h2> <p data-svelte-h="svelte-dzkovk">Once you are done using the Foundry Endpoint, you can delete the resources (i.e., you will stop paying for the instance on which the model is running and all the attached costs) as follows:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->client.online_endpoints.begin_delete(name=os.getenv(<span class="hljs-string">&quot;ENDPOINT_NAME&quot;</span>)).result()<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="conclusion" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#conclusion"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Conclusion</span></h2> <p data-svelte-h="svelte-y7vsdz">Throughout this example you learnt how to create and configure your Azure account for Azure Machine Learning and Microsoft Foundry, how to then create a Managed Online Endpoint running an open model from the Hugging Face collection on Microsoft Foundry and Azure Machine Learning, how to send inference requests with OpenAI SDK for a wide variety of use-cases, and finally, how to stop and release the resources.</p> <p data-svelte-h="svelte-1nopug0">If you have any doubt, issue or question about this example, feel free to <a href="https://github.com/huggingface/Microsoft-Azure/issues/new" rel="nofollow">open an issue</a> and we’ll do our best to help!</p> <hr> <blockquote class="tip"><p data-svelte-h="svelte-qhx1ch">📍 Find the complete example on GitHub <a href="https://github.com/huggingface/Microsoft-Azure/tree/main/examples/foundry/deploy-smollm3/azure-notebook.ipynb" rel="nofollow">here</a>!</p></blockquote> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/Microsoft-Azure/blob/main/docs/source/foundry/examples/deploy-smollm3.mdx" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_17ttbw8 = {
assets: "/docs/microsoft-azure/pr_39/en",
base: "/docs/microsoft-azure/pr_39/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/microsoft-azure/pr_39/en/_app/immutable/entry/start.d16ed975.js"),
import("/docs/microsoft-azure/pr_39/en/_app/immutable/entry/app.7655f7f9.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 7],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
73.1 kB
·
Xet hash:
a21451140d9e5f90488bb24ff29f1d113bf503fcc62dd9d3277a3adf1123c370

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.