Buckets:

rtrm's picture
download
raw
71.1 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Guidance&quot;,&quot;local&quot;:&quot;guidance&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;How it works&quot;,&quot;local&quot;:&quot;how-it-works&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Table of Contents 📚&quot;,&quot;local&quot;:&quot;table-of-contents-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Grammar and Constraints&quot;,&quot;local&quot;:&quot;grammar-and-constraints&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Tools and Functions&quot;,&quot;local&quot;:&quot;tools-and-functions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Grammar and Constraints 🛣️&quot;,&quot;local&quot;:&quot;grammar-and-constraints-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;The Grammar Parameter&quot;,&quot;local&quot;:&quot;the-grammar-parameter&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Hugging Face Hub Python Library&quot;,&quot;local&quot;:&quot;hugging-face-hub-python-library&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Constrain with Pydantic&quot;,&quot;local&quot;:&quot;constrain-with-pydantic&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Tools and Functions 🛠️&quot;,&quot;local&quot;:&quot;tools-and-functions-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;The Tools Parameter&quot;,&quot;local&quot;:&quot;the-tools-parameter&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Chat Completion with Tools&quot;,&quot;local&quot;:&quot;chat-completion-with-tools&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;OpenAI integration&quot;,&quot;local&quot;:&quot;openai-integration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Tool Choice Configuration&quot;,&quot;local&quot;:&quot;tool-choice-configuration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/text-generation-inference/pr_2965/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/text-generation-inference/pr_2965/en/_app/immutable/entry/start.6569fc45.js">
<link rel="modulepreload" href="/docs/text-generation-inference/pr_2965/en/_app/immutable/chunks/scheduler.362310b7.js">
<link rel="modulepreload" href="/docs/text-generation-inference/pr_2965/en/_app/immutable/chunks/singletons.189677f5.js">
<link rel="modulepreload" href="/docs/text-generation-inference/pr_2965/en/_app/immutable/chunks/index.7f53ec41.js">
<link rel="modulepreload" href="/docs/text-generation-inference/pr_2965/en/_app/immutable/chunks/paths.323159e8.js">
<link rel="modulepreload" href="/docs/text-generation-inference/pr_2965/en/_app/immutable/entry/app.98c60904.js">
<link rel="modulepreload" href="/docs/text-generation-inference/pr_2965/en/_app/immutable/chunks/index.57dfc70d.js">
<link rel="modulepreload" href="/docs/text-generation-inference/pr_2965/en/_app/immutable/nodes/0.69e98ac5.js">
<link rel="modulepreload" href="/docs/text-generation-inference/pr_2965/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/text-generation-inference/pr_2965/en/_app/immutable/nodes/12.4c91dda4.js">
<link rel="modulepreload" href="/docs/text-generation-inference/pr_2965/en/_app/immutable/chunks/CodeBlock.d3c47f83.js">
<link rel="modulepreload" href="/docs/text-generation-inference/pr_2965/en/_app/immutable/chunks/EditOnGithub.9633c464.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Guidance&quot;,&quot;local&quot;:&quot;guidance&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;How it works&quot;,&quot;local&quot;:&quot;how-it-works&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Table of Contents 📚&quot;,&quot;local&quot;:&quot;table-of-contents-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Grammar and Constraints&quot;,&quot;local&quot;:&quot;grammar-and-constraints&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Tools and Functions&quot;,&quot;local&quot;:&quot;tools-and-functions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Grammar and Constraints 🛣️&quot;,&quot;local&quot;:&quot;grammar-and-constraints-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;The Grammar Parameter&quot;,&quot;local&quot;:&quot;the-grammar-parameter&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Hugging Face Hub Python Library&quot;,&quot;local&quot;:&quot;hugging-face-hub-python-library&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Constrain with Pydantic&quot;,&quot;local&quot;:&quot;constrain-with-pydantic&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Tools and Functions 🛠️&quot;,&quot;local&quot;:&quot;tools-and-functions-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;The Tools Parameter&quot;,&quot;local&quot;:&quot;the-tools-parameter&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Chat Completion with Tools&quot;,&quot;local&quot;:&quot;chat-completion-with-tools&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;OpenAI integration&quot;,&quot;local&quot;:&quot;openai-integration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Tool Choice Configuration&quot;,&quot;local&quot;:&quot;tool-choice-configuration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="guidance" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#guidance"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Guidance</span></h1> <p data-svelte-h="svelte-kbjeep">Text Generation Inference (TGI) now supports <a href="#grammar-and-constraints">JSON and regex grammars</a> and <a href="#tools-and-functions">tools and functions</a> to help developers guide LLM responses to fit their needs.</p> <p data-svelte-h="svelte-fq1iv3">These feature are available starting from version <code>1.4.3</code>. They are accessible via the <a href="https://pypi.org/project/huggingface-hub/" rel="nofollow"><code>huggingface_hub</code></a> library. The tool support is compatible with OpenAI’s client libraries. The following guide will walk you through the new features and how to use them!</p> <p data-svelte-h="svelte-bfgv2a"><em>note: guidance is supported as grammar in the <code>/generate</code> endpoint and as tools in the <code>v1/chat/completions</code> endpoint.</em></p> <h2 class="relative group"><a id="how-it-works" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-it-works"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How it works</span></h2> <p data-svelte-h="svelte-wsp0l4">TGI leverages the <a href="https://github.com/outlines-dev/outlines" rel="nofollow">outlines</a> library to efficiently parse and compile the grammatical structures and tools specified by users. This integration transforms the defined grammars into an intermediate representation that acts as a framework to guide and constrain content generation, ensuring that outputs adhere to the specified grammatical rules.</p> <p data-svelte-h="svelte-15m2t7v">If you are interested in the technical details on how outlines is used in TGI, you can check out the <a href="../conceptual/guidance">conceptual guidance documentation</a>.</p> <h2 class="relative group"><a id="table-of-contents-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#table-of-contents-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Table of Contents 📚</span></h2> <h3 class="relative group"><a id="grammar-and-constraints" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#grammar-and-constraints"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Grammar and Constraints</span></h3> <ul data-svelte-h="svelte-12576sd"><li><a href="#the-grammar-parameter">The Grammar Parameter</a>: Shape your AI’s responses with precision.</li> <li><a href="#constrain-with-pydantic">Constrain with Pydantic</a>: Define a grammar using Pydantic models.</li> <li><a href="#json-schema-integration">JSON Schema Integration</a>: Fine-grained control over your requests via JSON schema.</li> <li><a href="#using-the-client">Using the client</a>: Use TGI’s client libraries to shape the AI’s responses.</li></ul> <h3 class="relative group"><a id="tools-and-functions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tools-and-functions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Tools and Functions</span></h3> <ul data-svelte-h="svelte-18ofpvb"><li><a href="#the-tools-parameter">The Tools Parameter</a>: Enhance the AI’s capabilities with predefined functions.</li> <li><a href="#text-generation-inference-client">Via the client</a>: Use TGI’s client libraries to interact with the Messages API and Tool functions.</li> <li><a href="#openai-integration">OpenAI integration</a>: Use OpenAI’s client libraries to interact with TGI’s Messages API and Tool functions.</li></ul> <h2 class="relative group"><a id="grammar-and-constraints-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#grammar-and-constraints-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Grammar and Constraints 🛣️</span></h2> <h3 class="relative group"><a id="the-grammar-parameter" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#the-grammar-parameter"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>The Grammar Parameter</span></h3> <p data-svelte-h="svelte-yoqu28">In TGI <code>1.4.3</code>, we’ve introduced the grammar parameter, which allows you to specify the format of the response you want from the LLM.</p> <p data-svelte-h="svelte-1vsv0tm">Using curl, you can make a request to TGI’s Messages API with the grammar parameter. This is the most primitive way to interact with the API and using <a href="#constrain-with-pydantic">Pydantic</a> is recommended for ease of use and readability.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->curl localhost<span class="hljs-punctuation">:</span><span class="hljs-number">3000</span>/generate \
-X POST \
-H &#x27;Content-Type<span class="hljs-punctuation">:</span> application/json&#x27; \
-d &#x27;<span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;inputs&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;I saw a puppy a cat and a raccoon during my bike ride in the park&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;parameters&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;repetition_penalty&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1.3</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;grammar&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;json&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;value&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;properties&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;location&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;string&quot;</span>
<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;activity&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;string&quot;</span>
<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;animals_seen&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;integer&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;minimum&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;maximum&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-number">5</span>
<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;animals&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;array&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;items&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;string&quot;</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;required&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span><span class="hljs-string">&quot;location&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&quot;activity&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&quot;animals_seen&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&quot;animals&quot;</span><span class="hljs-punctuation">]</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span>&#x27;
<span class="hljs-comment">// {&quot;generated_text&quot;:&quot;{ \n\n\&quot;activity\&quot;: \&quot;biking\&quot;,\n\&quot;animals\&quot;: [\&quot;puppy\&quot;,\&quot;cat\&quot;,\&quot;raccoon\&quot;],\n\&quot;animals_seen\&quot;: 3,\n\&quot;location\&quot;: \&quot;park\&quot;\n}&quot;}</span>
<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="hugging-face-hub-python-library" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#hugging-face-hub-python-library"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Hugging Face Hub Python Library</span></h3> <p data-svelte-h="svelte-126mt2g">The Hugging Face Hub Python library provides a client that makes it easy to interact with the Messages API. Here’s an example of how to use the client to send a request with a grammar parameter.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient
client = InferenceClient(<span class="hljs-string">&quot;http://localhost:3000&quot;</span>)
schema = {
<span class="hljs-string">&quot;properties&quot;</span>: {
<span class="hljs-string">&quot;location&quot;</span>: {<span class="hljs-string">&quot;title&quot;</span>: <span class="hljs-string">&quot;Location&quot;</span>, <span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;string&quot;</span>},
<span class="hljs-string">&quot;activity&quot;</span>: {<span class="hljs-string">&quot;title&quot;</span>: <span class="hljs-string">&quot;Activity&quot;</span>, <span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;string&quot;</span>},
<span class="hljs-string">&quot;animals_seen&quot;</span>: {
<span class="hljs-string">&quot;maximum&quot;</span>: <span class="hljs-number">5</span>,
<span class="hljs-string">&quot;minimum&quot;</span>: <span class="hljs-number">1</span>,
<span class="hljs-string">&quot;title&quot;</span>: <span class="hljs-string">&quot;Animals Seen&quot;</span>,
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;integer&quot;</span>,
},
<span class="hljs-string">&quot;animals&quot;</span>: {<span class="hljs-string">&quot;items&quot;</span>: {<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;string&quot;</span>}, <span class="hljs-string">&quot;title&quot;</span>: <span class="hljs-string">&quot;Animals&quot;</span>, <span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;array&quot;</span>},
},
<span class="hljs-string">&quot;required&quot;</span>: [<span class="hljs-string">&quot;location&quot;</span>, <span class="hljs-string">&quot;activity&quot;</span>, <span class="hljs-string">&quot;animals_seen&quot;</span>, <span class="hljs-string">&quot;animals&quot;</span>],
<span class="hljs-string">&quot;title&quot;</span>: <span class="hljs-string">&quot;Animals&quot;</span>,
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;object&quot;</span>,
}
user_input = <span class="hljs-string">&quot;I saw a puppy a cat and a raccoon during my bike ride in the park&quot;</span>
resp = client.text_generation(
<span class="hljs-string">f&quot;convert to JSON: &#x27;f<span class="hljs-subst">{user_input}</span>&#x27;. please use the following schema: <span class="hljs-subst">{schema}</span>&quot;</span>,
max_new_tokens=<span class="hljs-number">100</span>,
seed=<span class="hljs-number">42</span>,
grammar={<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;json&quot;</span>, <span class="hljs-string">&quot;value&quot;</span>: schema},
)
<span class="hljs-built_in">print</span>(resp)
<span class="hljs-comment"># { &quot;activity&quot;: &quot;bike ride&quot;, &quot;animals&quot;: [&quot;puppy&quot;, &quot;cat&quot;, &quot;raccoon&quot;], &quot;animals_seen&quot;: 3, &quot;location&quot;: &quot;park&quot; }</span>
<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1nlfjb7">A grammar can be defined using Pydantic models, JSON schemas, or regular expressions. The LLM will then generate a response that conforms to the specified grammar.</p> <blockquote data-svelte-h="svelte-cm9i0i"><p>Note: A grammar must compile to an intermediate representation to constrain the output. Grammar compilation is a computationally expensive and may take a few seconds to complete on the first request. Subsequent requests will use the cached grammar and will be much faster.</p></blockquote> <h3 class="relative group"><a id="constrain-with-pydantic" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#constrain-with-pydantic"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Constrain with Pydantic</span></h3> <p data-svelte-h="svelte-3pxjtu">Using Pydantic models we can define a similar grammar as the previous example in a shorter and more readable way.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient
<span class="hljs-keyword">from</span> pydantic <span class="hljs-keyword">import</span> BaseModel, conint
<span class="hljs-keyword">from</span> typing <span class="hljs-keyword">import</span> <span class="hljs-type">List</span>
<span class="hljs-keyword">class</span> <span class="hljs-title class_">Animals</span>(<span class="hljs-title class_ inherited__">BaseModel</span>):
location: <span class="hljs-built_in">str</span>
activity: <span class="hljs-built_in">str</span>
animals_seen: conint(ge=<span class="hljs-number">1</span>, le=<span class="hljs-number">5</span>) <span class="hljs-comment"># Constrained integer type</span>
animals: <span class="hljs-type">List</span>[<span class="hljs-built_in">str</span>]
client = InferenceClient(<span class="hljs-string">&quot;http://localhost:3000&quot;</span>)
user_input = <span class="hljs-string">&quot;I saw a puppy a cat and a raccoon during my bike ride in the park&quot;</span>
resp = client.text_generation(
<span class="hljs-string">f&quot;convert to JSON: &#x27;f<span class="hljs-subst">{user_input}</span>&#x27;. please use the following schema: <span class="hljs-subst">{Animals.schema()}</span>&quot;</span>,
max_new_tokens=<span class="hljs-number">100</span>,
seed=<span class="hljs-number">42</span>,
grammar={<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;json&quot;</span>, <span class="hljs-string">&quot;value&quot;</span>: Animals.schema()},
)
<span class="hljs-built_in">print</span>(resp)
<span class="hljs-comment"># { &quot;activity&quot;: &quot;bike ride&quot;, &quot;animals&quot;: [&quot;puppy&quot;, &quot;cat&quot;, &quot;raccoon&quot;], &quot;animals_seen&quot;: 3, &quot;location&quot;: &quot;park&quot; }</span>
<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-6wpj0x">defining a grammar as regular expressions</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient
client = InferenceClient(<span class="hljs-string">&quot;http://localhost:3000&quot;</span>)
section_regex = <span class="hljs-string">&quot;(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)&quot;</span>
regexp = <span class="hljs-string">f&quot;HELLO\.<span class="hljs-subst">{section_regex}</span>\.WORLD\.<span class="hljs-subst">{section_regex}</span>&quot;</span>
<span class="hljs-comment"># This is a more realistic example of an ip address regex</span>
<span class="hljs-comment"># regexp = f&quot;{section_regex}\.{section_regex}\.{section_regex}\.{section_regex}&quot;</span>
resp = client.text_generation(
<span class="hljs-string">f&quot;Whats Googles DNS? Please use the following regex: <span class="hljs-subst">{regexp}</span>&quot;</span>,
seed=<span class="hljs-number">42</span>,
grammar={
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;regex&quot;</span>,
<span class="hljs-string">&quot;value&quot;</span>: regexp,
},
)
<span class="hljs-built_in">print</span>(resp)
<span class="hljs-comment"># HELLO.255.WORLD.255</span>
<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="tools-and-functions-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tools-and-functions-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Tools and Functions 🛠️</span></h2> <h3 class="relative group"><a id="the-tools-parameter" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#the-tools-parameter"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>The Tools Parameter</span></h3> <p data-svelte-h="svelte-lut42n">In addition to the grammar parameter, we’ve also introduced a set of tools and functions to help you get the most out of the Messages API.</p> <p data-svelte-h="svelte-1a560wn">Tools are a set of user defined functions that can be used in tandem with the chat functionality to enhance the LLM’s capabilities. Functions, similar to grammar are defined as JSON schema and can be passed as part of the parameters to the Messages API.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->curl localhost<span class="hljs-punctuation">:</span><span class="hljs-number">3000</span>/v1/chat/completions \
-X POST \
-H &#x27;Content-Type<span class="hljs-punctuation">:</span> application/json&#x27; \
-d &#x27;<span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;model&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;tgi&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;messages&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span>
<span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;role&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;user&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;content&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;What is the weather like in New York?&quot;</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;tools&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span>
<span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;function&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;function&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;get_current_weather&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;description&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;Get the current weather&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;parameters&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;object&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;properties&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;location&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;string&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;description&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;The city and state, e.g. San Francisco, CA&quot;</span>
<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;format&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;string&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;enum&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span><span class="hljs-string">&quot;celsius&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&quot;fahrenheit&quot;</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;description&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;The temperature unit to use. Infer this from the users location.&quot;</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;required&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span><span class="hljs-string">&quot;location&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&quot;format&quot;</span><span class="hljs-punctuation">]</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;tool_choice&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;get_current_weather&quot;</span>
<span class="hljs-punctuation">}</span>&#x27;
<span class="hljs-comment">// {&quot;id&quot;:&quot;&quot;,&quot;object&quot;:&quot;text_completion&quot;,&quot;created&quot;:1709051640,&quot;model&quot;:&quot;HuggingFaceH4/zephyr-7b-beta&quot;,&quot;system_fingerprint&quot;:&quot;1.4.3-native&quot;,&quot;choices&quot;:[{&quot;index&quot;:0,&quot;message&quot;:{&quot;role&quot;:&quot;assistant&quot;,&quot;tool_calls&quot;:{&quot;id&quot;:0,&quot;type&quot;:&quot;function&quot;,&quot;function&quot;:{&quot;description&quot;:null,&quot;name&quot;:&quot;tools&quot;,&quot;parameters&quot;:{&quot;format&quot;:&quot;celsius&quot;,&quot;location&quot;:&quot;New York&quot;}}}},&quot;logprobs&quot;:null,&quot;finish_reason&quot;:&quot;eos_token&quot;}],&quot;usage&quot;:{&quot;prompt_tokens&quot;:157,&quot;completion_tokens&quot;:19,&quot;total_tokens&quot;:176}}</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="chat-completion-with-tools" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#chat-completion-with-tools"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Chat Completion with Tools</span></h3> <p data-svelte-h="svelte-13xxzo">Grammars are supported in the <code>/generate</code> endpoint, while tools are supported in the <code>/chat/completions</code> endpoint. Here’s an example of how to use the client to send a request with a tool parameter.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient
client = InferenceClient(<span class="hljs-string">&quot;http://localhost:3000&quot;</span>)
tools = [
{
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;function&quot;</span>,
<span class="hljs-string">&quot;function&quot;</span>: {
<span class="hljs-string">&quot;name&quot;</span>: <span class="hljs-string">&quot;get_current_weather&quot;</span>,
<span class="hljs-string">&quot;description&quot;</span>: <span class="hljs-string">&quot;Get the current weather&quot;</span>,
<span class="hljs-string">&quot;parameters&quot;</span>: {
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;object&quot;</span>,
<span class="hljs-string">&quot;properties&quot;</span>: {
<span class="hljs-string">&quot;location&quot;</span>: {
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;string&quot;</span>,
<span class="hljs-string">&quot;description&quot;</span>: <span class="hljs-string">&quot;The city and state, e.g. San Francisco, CA&quot;</span>,
},
<span class="hljs-string">&quot;format&quot;</span>: {
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;string&quot;</span>,
<span class="hljs-string">&quot;enum&quot;</span>: [<span class="hljs-string">&quot;celsius&quot;</span>, <span class="hljs-string">&quot;fahrenheit&quot;</span>],
<span class="hljs-string">&quot;description&quot;</span>: <span class="hljs-string">&quot;The temperature unit to use. Infer this from the users location.&quot;</span>,
},
},
<span class="hljs-string">&quot;required&quot;</span>: [<span class="hljs-string">&quot;location&quot;</span>, <span class="hljs-string">&quot;format&quot;</span>],
},
},
},
{
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;function&quot;</span>,
<span class="hljs-string">&quot;function&quot;</span>: {
<span class="hljs-string">&quot;name&quot;</span>: <span class="hljs-string">&quot;get_n_day_weather_forecast&quot;</span>,
<span class="hljs-string">&quot;description&quot;</span>: <span class="hljs-string">&quot;Get an N-day weather forecast&quot;</span>,
<span class="hljs-string">&quot;parameters&quot;</span>: {
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;object&quot;</span>,
<span class="hljs-string">&quot;properties&quot;</span>: {
<span class="hljs-string">&quot;location&quot;</span>: {
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;string&quot;</span>,
<span class="hljs-string">&quot;description&quot;</span>: <span class="hljs-string">&quot;The city and state, e.g. San Francisco, CA&quot;</span>,
},
<span class="hljs-string">&quot;format&quot;</span>: {
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;string&quot;</span>,
<span class="hljs-string">&quot;enum&quot;</span>: [<span class="hljs-string">&quot;celsius&quot;</span>, <span class="hljs-string">&quot;fahrenheit&quot;</span>],
<span class="hljs-string">&quot;description&quot;</span>: <span class="hljs-string">&quot;The temperature unit to use. Infer this from the users location.&quot;</span>,
},
<span class="hljs-string">&quot;num_days&quot;</span>: {
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;integer&quot;</span>,
<span class="hljs-string">&quot;description&quot;</span>: <span class="hljs-string">&quot;The number of days to forecast&quot;</span>,
},
},
<span class="hljs-string">&quot;required&quot;</span>: [<span class="hljs-string">&quot;location&quot;</span>, <span class="hljs-string">&quot;format&quot;</span>, <span class="hljs-string">&quot;num_days&quot;</span>],
},
},
},
]
chat = client.chat_completion(
messages=[
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;system&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;You&#x27;re a helpful assistant! Answer the users question best you can.&quot;</span>,
},
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What is the weather like in Brooklyn, New York?&quot;</span>,
},
],
tools=tools,
seed=<span class="hljs-number">42</span>,
max_tokens=<span class="hljs-number">100</span>,
)
<span class="hljs-built_in">print</span>(chat.choices[<span class="hljs-number">0</span>].message.tool_calls)
<span class="hljs-comment"># [ChatCompletionOutputToolCall(function=ChatCompletionOutputFunctionDefinition(arguments={&#x27;format&#x27;: &#x27;fahrenheit&#x27;, &#x27;location&#x27;: &#x27;Brooklyn, New York&#x27;, &#x27;num_days&#x27;: 7}, name=&#x27;get_n_day_weather_forecast&#x27;, description=None), id=0, type=&#x27;function&#x27;)]</span>
<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="openai-integration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#openai-integration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>OpenAI integration</span></h3> <p data-svelte-h="svelte-qj1me9">TGI exposes an OpenAI-compatible API, which means you can use OpenAI’s client libraries to interact with TGI’s Messages API and Tool functions.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI
<span class="hljs-comment"># Initialize the client, pointing it to one of the available models</span>
client = OpenAI(
base_url=<span class="hljs-string">&quot;http://localhost:3000/v1&quot;</span>,
api_key=<span class="hljs-string">&quot;_&quot;</span>,
)
<span class="hljs-comment"># <span class="hljs-doctag">NOTE:</span> tools defined above and removed for brevity</span>
chat_completion = client.chat.completions.create(
model=<span class="hljs-string">&quot;tgi&quot;</span>,
messages=[
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;system&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Don&#x27;t make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.&quot;</span>,
},
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What&#x27;s the weather like the next 3 days in San Francisco, CA?&quot;</span>,
},
],
tools=tools,
tool_choice=<span class="hljs-string">&quot;auto&quot;</span>, <span class="hljs-comment"># tool selected by model</span>
max_tokens=<span class="hljs-number">500</span>,
)
called = chat_completion.choices[<span class="hljs-number">0</span>].message.tool_calls
<span class="hljs-built_in">print</span>(called)
<span class="hljs-comment"># {</span>
<span class="hljs-comment"># &quot;id&quot;: 0,</span>
<span class="hljs-comment"># &quot;type&quot;: &quot;function&quot;,</span>
<span class="hljs-comment"># &quot;function&quot;: {</span>
<span class="hljs-comment"># &quot;description&quot;: None,</span>
<span class="hljs-comment"># &quot;name&quot;: &quot;tools&quot;,</span>
<span class="hljs-comment"># &quot;parameters&quot;: {</span>
<span class="hljs-comment"># &quot;format&quot;: &quot;celsius&quot;,</span>
<span class="hljs-comment"># &quot;location&quot;: &quot;San Francisco, CA&quot;,</span>
<span class="hljs-comment"># &quot;num_days&quot;: 3,</span>
<span class="hljs-comment"># },</span>
<span class="hljs-comment"># },</span>
<span class="hljs-comment"># }</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="tool-choice-configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tool-choice-configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Tool Choice Configuration</span></h3> <p data-svelte-h="svelte-13rggpe">When configuring how the model interacts with tools during a chat completion, there are several options for determining if or how a tool should be called. These options are controlled by the <code>tool_choice</code> parameter, which specifies the behavior of the model in relation to tool usage. The following modes are supported:</p> <ol><li><p data-svelte-h="svelte-xbr04b"><strong><code>auto</code></strong>:</p> <ul><li data-svelte-h="svelte-kqytsb">The model decides whether to call a tool or generate a response message based on the user’s input.</li> <li data-svelte-h="svelte-1jg6yx2">If tools are provided, this is the default mode.</li> <li>Example usage:
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tool_choice=<span class="hljs-string">&quot;auto&quot;</span><!-- HTML_TAG_END --></pre></div></li></ul></li> <li><p data-svelte-h="svelte-11bn5m0"><strong><code>none</code></strong>:</p> <ul><li data-svelte-h="svelte-klffq4">The model will never call any tools and will only generate a response message.</li> <li data-svelte-h="svelte-up128b">If no tools are provided, this is the default mode.</li> <li>Example usage:
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tool_choice=<span class="hljs-string">&quot;none&quot;</span><!-- HTML_TAG_END --></pre></div></li></ul></li> <li><p data-svelte-h="svelte-1u693el"><strong><code>required</code></strong>:</p> <ul><li data-svelte-h="svelte-9fo4bt">The model must call one or more tools and will not generate a response message on its own.</li> <li>Example usage:
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tool_choice=<span class="hljs-string">&quot;required&quot;</span><!-- HTML_TAG_END --></pre></div></li></ul></li> <li><p data-svelte-h="svelte-gefj73"><strong>Specific Tool Call by Function Name</strong>:</p> <ul><li data-svelte-h="svelte-1o336wz">You can force the model to call a specific tool either by specifying the tool function directly or by using an object definition.</li> <li>Two ways to do this:<ol><li>Provide the function name as a string:
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tool_choice=<span class="hljs-string">&quot;get_current_weather&quot;</span><!-- HTML_TAG_END --></pre></div></li> <li>Use the function object format:
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tool_choice={
<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;function&quot;</span>,
<span class="hljs-string">&quot;function&quot;</span>: {
<span class="hljs-string">&quot;name&quot;</span>: <span class="hljs-string">&quot;get_current_weather&quot;</span>
}
}<!-- HTML_TAG_END --></pre></div></li></ol></li></ul></li></ol> <p data-svelte-h="svelte-6l9nzs">These options allow flexibility when integrating tools with the chat completions endpoint. You can configure the model to either rely on tools automatically or force it to follow a predefined behavior, based on the needs of the task at hand.</p> <hr> <table data-svelte-h="svelte-9e6c3b"><thead><tr><th><strong>Tool Choice Option</strong></th> <th><strong>Description</strong></th> <th><strong>When to Use</strong></th></tr></thead> <tbody><tr><td><code>auto</code></td> <td>The model decides whether to call a tool or generate a message. This is the default if tools are provided.</td> <td>Use when you want the model to decide when a tool is necessary.</td></tr> <tr><td><code>none</code></td> <td>The model generates a message without calling any tools. This is the default if no tools are provided.</td> <td>Use when you do not want the model to call any tools.</td></tr> <tr><td><code>required</code></td> <td>The model must call one or more tools and will not generate a message on its own.</td> <td>Use when a tool call is mandatory, and you do not want a regular message generated.</td></tr> <tr><td>Specific Tool Call (<code>name</code> or object)</td> <td>Force the model to call a specific tool either by specifying its name (<code>tool_choice=&quot;get_current_weather&quot;</code>) or using an object.</td> <td>Use when you want to restrict the model to calling a particular tool for the response.</td></tr></tbody></table> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/text-generation-inference/blob/main/docs/source/basic_tutorials/using_guidance.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1q4976k = {
assets: "/docs/text-generation-inference/pr_2965/en",
base: "/docs/text-generation-inference/pr_2965/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/text-generation-inference/pr_2965/en/_app/immutable/entry/start.6569fc45.js"),
import("/docs/text-generation-inference/pr_2965/en/_app/immutable/entry/app.98c60904.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 12],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
71.1 kB
·
Xet hash:
db92796b9ce9235daeaf9f87d31faf4da2498d1587cf7e66d7e4b5c26cd44d1f

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.