Buckets:

rtrm's picture
download
raw
41 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Text Generation&quot;,&quot;local&quot;:&quot;text-generation&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Recommended models&quot;,&quot;local&quot;:&quot;recommended-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Using the API&quot;,&quot;local&quot;:&quot;using-the-api&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;API specification&quot;,&quot;local&quot;:&quot;api-specification&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Request&quot;,&quot;local&quot;:&quot;request&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Response&quot;,&quot;local&quot;:&quot;response&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3}],&quot;depth&quot;:2}">
<link href="/docs/inference-providers/pr_1663/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/entry/start.d5f15666.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/scheduler.ddb4e551.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/singletons.0f5b782d.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.ce98237b.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/paths.b324c1e2.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/entry/app.68b4644d.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.e16e4efa.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/nodes/0.80863911.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/nodes/22.64d88479.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/Tip.20abb04f.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/index.e108c5ed.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/InferenceSnippet.8df18a84.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/CodeBlock.754e6cfc.js">
<link rel="modulepreload" href="/docs/inference-providers/pr_1663/en/_app/immutable/chunks/IconCurl.399d095b.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Text Generation&quot;,&quot;local&quot;:&quot;text-generation&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Recommended models&quot;,&quot;local&quot;:&quot;recommended-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Using the API&quot;,&quot;local&quot;:&quot;using-the-api&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;API specification&quot;,&quot;local&quot;:&quot;api-specification&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Request&quot;,&quot;local&quot;:&quot;request&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Response&quot;,&quot;local&quot;:&quot;response&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3}],&quot;depth&quot;:2}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h2 class="relative group"><a id="text-generation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#text-generation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Text Generation</span></h2> <p data-svelte-h="svelte-g8uxfr">Generate text based on a prompt.</p> <p data-svelte-h="svelte-nq7udc">If you are interested in a Chat Completion task, which generates a response based on a list of messages, check out the <a href="./chat_completion"><code>chat-completion</code></a> task.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-3rp22a">For more details about the <code>text-generation</code> task, check out its <a href="https://huggingface.co/tasks/text-generation" rel="nofollow">dedicated page</a>! You will find examples and related materials.</p></div> <h3 class="relative group"><a id="recommended-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#recommended-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Recommended models</span></h3> <ul data-svelte-h="svelte-1699af1"><li><a href="https://huggingface.co/google/gemma-2-2b-it" rel="nofollow">google/gemma-2-2b-it</a>: A text-generation model trained to follow instructions.</li> <li><a href="https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" rel="nofollow">deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B</a>: Smaller variant of one of the most powerful models.</li> <li><a href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" rel="nofollow">meta-llama/Meta-Llama-3.1-8B-Instruct</a>: Very powerful text generation model trained to follow instructions.</li> <li><a href="https://huggingface.co/microsoft/phi-4" rel="nofollow">microsoft/phi-4</a>: Powerful text generation model by Microsoft.</li> <li><a href="https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct" rel="nofollow">Qwen/Qwen2.5-Coder-32B-Instruct</a>: Text generation model used to write code.</li> <li><a href="https://huggingface.co/deepseek-ai/DeepSeek-R1" rel="nofollow">deepseek-ai/DeepSeek-R1</a>: Powerful reasoning based open large language model.</li></ul> <p data-svelte-h="svelte-aq5b5v">Explore all available models and find the one that suits you best <a href="https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending" rel="nofollow">here</a>.</p> <h3 class="relative group"><a id="using-the-api" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-the-api"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using the API</span></h3> <div class="flex gap-x-2 justify-between md:items-top w-full text-sm not-prose flex-col md:flex-row"> <div><p class="font-mono text-xs opacity-50 hidden md:block" data-svelte-h="svelte-1s5bpew">Language</p> <div class="my-1.5 flex items-center gap-x-1 gap-y-0.5 flex-wrap"><button class="text-md flex select-none items-center rounded-lg border px-1.5 py-1 leading-none border-gray-800 bg-black text-white dark:bg-gray-700" type="button"><svg class="mr-1.5 text-current" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M15.84.5a16.4,16.4,0,0,0-3.57.32C9.1,1.39,8.53,2.53,8.53,4.64V7.48H16v1H5.77a4.73,4.73,0,0,0-4.7,3.74,14.82,14.82,0,0,0,0,7.54c.57,2.28,1.86,3.82,4,3.82h2.6V20.14a4.73,4.73,0,0,1,4.63-4.63h7.38a3.72,3.72,0,0,0,3.73-3.73V4.64A4.16,4.16,0,0,0,19.65.82,20.49,20.49,0,0,0,15.84.5ZM11.78,2.77a1.39,1.39,0,0,1,1.38,1.46,1.37,1.37,0,0,1-1.38,1.38A1.42,1.42,0,0,1,10.4,4.23,1.44,1.44,0,0,1,11.78,2.77Z" fill="#5a9fd4"></path><path d="M16.16,31.5a16.4,16.4,0,0,0,3.57-.32c3.17-.57,3.74-1.71,3.74-3.82V24.52H16v-1H26.23a4.73,4.73,0,0,0,4.7-3.74,14.82,14.82,0,0,0,0-7.54c-.57-2.28-1.86-3.82-4-3.82h-2.6v3.41a4.73,4.73,0,0,1-4.63,4.63H12.35a3.72,3.72,0,0,0-3.73,3.73v7.14a4.16,4.16,0,0,0,3.73,3.82A20.49,20.49,0,0,0,16.16,31.5Zm4.06-2.27a1.39,1.39,0,0,1-1.38-1.46,1.37,1.37,0,0,1,1.38-1.38,1.42,1.42,0,0,1,1.38,1.38A1.44,1.44,0,0,1,20.22,29.23Z" fill="#ffd43b"></path></svg> Python </button><button class="text-md flex select-none items-center rounded-lg border px-1.5 py-1 leading-none hover:shadow-xs cursor-pointer text-gray-500 opacity-90 hover:text-gray-700 dark:hover:text-gray-200" type="button"><svg class="mr-1.5 text-current" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><rect width="32" height="32" fill="#f7df1e"></rect><path d="M21.5,25a3.27,3.27,0,0,0,3,1.83c1.25,0,2-.63,2-1.49,0-1-.81-1.39-2.19-2L23.56,23C21.39,22.1,20,20.94,20,18.49c0-2.25,1.72-4,4.41-4a4.44,4.44,0,0,1,4.27,2.41l-2.34,1.5a2,2,0,0,0-1.93-1.29,1.31,1.31,0,0,0-1.44,1.29c0,.9.56,1.27,1.85,1.83l.75.32c2.55,1.1,4,2.21,4,4.72,0,2.71-2.12,4.19-5,4.19a5.78,5.78,0,0,1-5.48-3.07Zm-10.63.26c.48.84.91,1.55,1.94,1.55s1.61-.39,1.61-1.89V14.69h3V25c0,3.11-1.83,4.53-4.49,4.53a4.66,4.66,0,0,1-4.51-2.75Z"></path></svg> JavaScript </button><button class="text-md flex select-none items-center rounded-lg border px-1.5 py-1 leading-none hover:shadow-xs cursor-pointer text-gray-500 opacity-90 hover:text-gray-700 dark:hover:text-gray-200" type="button"><svg class="mr-1.5 text-current" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><rect width="32" height="32" rx="4" fill="#1683a5"></rect><path d="M6.71,14A5,5,0,0,1,8.82,9.29l2.64-2.2c1.67-1.37,2.52-1.41,4.6-1.41H21.7c1.19,0,2.45.27,2.45,1.79s-1.4,1.78-2.45,1.78H15.44a3.31,3.31,0,0,0-2,.89L11.24,12c-.55.44-1,.81-1,1.52v4.41c0,.7.41,1.07,1,1.52l2.16,1.82a3.34,3.34,0,0,0,2,.89H21.7c1.05,0,2.45.23,2.45,1.78s-1.26,1.78-2.45,1.78H16.06c-2.08,0-2.94,0-4.6-1.4L8.82,22.09A5.05,5.05,0,0,1,6.71,17.4Z" fill="#fff"></path></svg> cURL </button></div></div> <div><p class="font-mono text-xs opacity-50 hidden md:block" data-svelte-h="svelte-1kuuf89">Client</p> <div class="my-1.5 flex items-center gap-x-1 gap-y-0.5 flex-wrap"><button class="text-md flex select-none items-center rounded-lg border px-1.5 py-1 leading-none border-gray-800 bg-black text-white dark:bg-gray-700" type="button">huggingface_hub </button><button class="text-md flex select-none items-center rounded-lg border px-1.5 py-1 leading-none hover:shadow-xs cursor-pointer text-gray-500 opacity-90 hover:text-gray-700 dark:hover:text-gray-200" type="button">requests </button><button class="text-md flex select-none items-center rounded-lg border px-1.5 py-1 leading-none hover:shadow-xs cursor-pointer text-gray-500 opacity-90 hover:text-gray-700 dark:hover:text-gray-200" type="button">openai </button></div></div> <div><p class="font-mono text-xs opacity-50 hidden md:block" data-svelte-h="svelte-1p9m5m3">Provider</p> <div class="my-1.5 flex items-center gap-x-1 gap-y-0.5 flex-wrap"><button class="text-md flex select-none items-center rounded-lg border px-1.5 py-1 leading-none border-gray-800 bg-black text-white dark:bg-gray-700" type="button"><svg class="mr-1.5 text-current" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 26 26"><rect x="3.34856" y="3.02654" width="19.9474" height="19.9474" rx="2.95009" fill="#FFD21E" stroke="#FFB41E" stroke-width="1.18004"></rect><path fill-rule="evenodd" clip-rule="evenodd" d="M7.69336 9.74609V16.9754H9.32329V13.9595H11.8181V16.9754H13.4591V9.74609H11.8181V12.5292H9.32329V9.74609H7.69336ZM15.1646 9.74609V16.9754H16.7945V14.1702H19.3004V12.7953H16.7945V11.121H19.7217V9.74609H15.1646Z" fill="#814D00"></path></svg> HF Inference API </button><button class="text-md flex select-none items-center rounded-lg border px-1.5 py-1 leading-none hover:shadow-xs cursor-pointer text-gray-500 opacity-90 hover:text-gray-700 dark:hover:text-gray-200" type="button"><svg class="mr-1.5 text-current" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 26 26"><g clip-path="url(#clip0_55_1726)"><path d="M19.925 2.5H6.33674C4.29004 2.5 2.63086 4.15918 2.63086 6.20588V19.7941C2.63086 21.8408 4.29004 23.5 6.33674 23.5H19.925C21.9717 23.5 23.6309 21.8408 23.6309 19.7941V6.20588C23.6309 4.15918 21.9717 2.5 19.925 2.5Z" fill="#F1EFED"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M17.6087 12.5368C19.6554 12.5368 21.3146 10.8776 21.3146 8.83088C21.3146 6.78418 19.6554 5.125 17.6087 5.125C15.562 5.125 13.9028 6.78418 13.9028 8.83088C13.9028 10.8776 15.562 12.5368 17.6087 12.5368ZM17.6087 21.1842C19.6554 21.1842 21.3146 19.525 21.3146 17.4783C21.3146 15.4316 19.6554 13.7725 17.6087 13.7725C15.562 13.7725 13.9028 15.4316 13.9028 17.4783C13.9028 19.525 15.562 21.1842 17.6087 21.1842ZM12.6676 17.4783C12.6676 19.525 11.0084 21.1842 8.96174 21.1842C6.91504 21.1842 5.25586 19.525 5.25586 17.4783C5.25586 15.4316 6.91504 13.7725 8.96174 13.7725C11.0084 13.7725 12.6676 15.4316 12.6676 17.4783Z" fill="#D3D1D1"></path><path d="M8.96174 12.5368C11.0084 12.5368 12.6676 10.8776 12.6676 8.83088C12.6676 6.78418 11.0084 5.125 8.96174 5.125C6.91504 5.125 5.25586 6.78418 5.25586 8.83088C5.25586 10.8776 6.91504 12.5368 8.96174 12.5368Z" fill="#0F6FFF"></path></g><defs><clipPath id="clip0_55_1726"><rect width="21" height="21" fill="white" transform="translate(2.63086 2.5)"></rect></clipPath></defs></svg> Together AI </button> </div></div> <div><p class="font-mono text-xs invisible hidden md:block" data-svelte-h="svelte-hnzs25">Settings</p> <div class="flex not-prose my-1.5"><div class="relative hidden md:block "> <button class=" " type="button"> <button class="text-md flex select-none items-center rounded-lg border px-1.5 py-1 leading-none hover:shadow-xs cursor-pointer text-gray-500 opacity-90 hover:text-gray-700 dark:hover:text-gray-200" type="button" title="Settings dropdown"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 9 7"><path fill="currentColor" d="M8.537 1.153H7.361A1.445 1.445 0 0 0 5.954 0c-.689 0-1.263.49-1.407 1.153H.5v.576h4.047a1.445 1.445 0 0 0 1.407 1.153c.689 0 1.263-.49 1.407-1.153h1.176v-.576M5.954 2.305a.847.847 0 0 1-.861-.864c0-.49.373-.865.861-.865s.861.375.861.865-.373.864-.861.864M.5 5.764h1.177a1.445 1.445 0 0 0 1.406 1.152c.69 0 1.263-.49 1.407-1.152h4.047v-.577H4.49a1.445 1.445 0 0 0-1.407-1.152c-.688 0-1.263.49-1.406 1.152H.5v.577M3.083 4.61c.488 0 .862.375.862.864 0 .49-.374.865-.862.865a.847.847 0 0 1-.86-.865c0-.49.372-.864.86-.864"></path></svg>
Settings</button> </button> </div> <div class="relative md:hidden "> <button class=" " type="button"> <button class="text-md flex select-none items-center rounded-lg border px-1.5 py-1 leading-none hover:shadow-xs cursor-pointer text-gray-500 opacity-90 hover:text-gray-700 dark:hover:text-gray-200" type="button" title="Settings dropdown"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 9 7"><path fill="currentColor" d="M8.537 1.153H7.361A1.445 1.445 0 0 0 5.954 0c-.689 0-1.263.49-1.407 1.153H.5v.576h4.047a1.445 1.445 0 0 0 1.407 1.153c.689 0 1.263-.49 1.407-1.153h1.176v-.576M5.954 2.305a.847.847 0 0 1-.861-.864c0-.49.373-.865.861-.865s.861.375.861.865-.373.864-.861.864M.5 5.764h1.177a1.445 1.445 0 0 0 1.406 1.152c.69 0 1.263-.49 1.407-1.152h4.047v-.577H4.49a1.445 1.445 0 0 0-1.407-1.152c-.688 0-1.263.49-1.406 1.152H.5v.577M3.083 4.61c.488 0 .862.375.862.864 0 .49-.374.865-.862.865a.847.847 0 0 1-.86-.865c0-.49.372-.864.86-.864"></path></svg>
Settings</button> </button> </div> <div class="flex-grow md:hidden"></div></div></div></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient
client = InferenceClient(
provider=<span class="hljs-string">&quot;hf-inference&quot;</span>,
api_key=<span class="hljs-string">&quot;hf_xxxxxxxxxxxxxxxxxxxxxxxx&quot;</span>,
)
completion = client.chat.completions.create(
model=<span class="hljs-string">&quot;Qwen/QwQ-32B&quot;</span>,
messages=<span class="hljs-string">&quot;\&quot;Can you please let us know more details about your \&quot;&quot;</span>,
max_tokens=<span class="hljs-number">500</span>,
)
<span class="hljs-built_in">print</span>(completion.choices[<span class="hljs-number">0</span>].message)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="api-specification" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#api-specification"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>API specification</span></h3> <h4 class="relative group"><a id="request" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#request"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Request</span></h4> <table data-svelte-h="svelte-u7j2r"><thead><tr><th align="left">Payload</th> <th align="left"></th> <th align="left"></th></tr></thead> <tbody><tr><td align="left"><strong>inputs*</strong></td> <td align="left"><em>string</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>parameters</strong></td> <td align="left"><em>object</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        adapter_id</strong></td> <td align="left"><em>string</em></td> <td align="left">Lora adapter id</td></tr> <tr><td align="left"><strong>        best_of</strong></td> <td align="left"><em>integer</em></td> <td align="left">Generate best_of sequences and return the one if the highest token logprobs.</td></tr> <tr><td align="left"><strong>        decoder_input_details</strong></td> <td align="left"><em>boolean</em></td> <td align="left">Whether to return decoder input token logprobs and ids.</td></tr> <tr><td align="left"><strong>        details</strong></td> <td align="left"><em>boolean</em></td> <td align="left">Whether to return generation details.</td></tr> <tr><td align="left"><strong>        do_sample</strong></td> <td align="left"><em>boolean</em></td> <td align="left">Activate logits sampling.</td></tr> <tr><td align="left"><strong>        frequency_penalty</strong></td> <td align="left"><em>number</em></td> <td align="left">The parameter for frequency penalty. 1.0 means no penalty Penalize new tokens based on their existing frequency in the text so far, decreasing the model’s likelihood to repeat the same line verbatim.</td></tr> <tr><td align="left"><strong>        grammar</strong></td> <td align="left"><em>unknown</em></td> <td align="left">One of the following:</td></tr> <tr><td align="left"><strong>                 (#1)</strong></td> <td align="left"><em>object</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                        type*</strong></td> <td align="left"><em>enum</em></td> <td align="left">Possible values: json.</td></tr> <tr><td align="left"><strong>                        value*</strong></td> <td align="left"><em>unknown</em></td> <td align="left">A string that represents a <a href="https://json-schema.org/" rel="nofollow">JSON Schema</a>. JSON Schema is a declarative language that allows to annotate JSON documents with types and descriptions.</td></tr> <tr><td align="left"><strong>                 (#2)</strong></td> <td align="left"><em>object</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                        type*</strong></td> <td align="left"><em>enum</em></td> <td align="left">Possible values: regex.</td></tr> <tr><td align="left"><strong>                        value*</strong></td> <td align="left"><em>string</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        max_new_tokens</strong></td> <td align="left"><em>integer</em></td> <td align="left">Maximum number of tokens to generate.</td></tr> <tr><td align="left"><strong>        repetition_penalty</strong></td> <td align="left"><em>number</em></td> <td align="left">The parameter for repetition penalty. 1.0 means no penalty. See <a href="https://arxiv.org/pdf/1909.05858.pdf" rel="nofollow">this paper</a> for more details.</td></tr> <tr><td align="left"><strong>        return_full_text</strong></td> <td align="left"><em>boolean</em></td> <td align="left">Whether to prepend the prompt to the generated text</td></tr> <tr><td align="left"><strong>        seed</strong></td> <td align="left"><em>integer</em></td> <td align="left">Random sampling seed.</td></tr> <tr><td align="left"><strong>        stop</strong></td> <td align="left"><em>string[]</em></td> <td align="left">Stop generating tokens if a member of <code>stop</code> is generated.</td></tr> <tr><td align="left"><strong>        temperature</strong></td> <td align="left"><em>number</em></td> <td align="left">The value used to module the logits distribution.</td></tr> <tr><td align="left"><strong>        top_k</strong></td> <td align="left"><em>integer</em></td> <td align="left">The number of highest probability vocabulary tokens to keep for top-k-filtering.</td></tr> <tr><td align="left"><strong>        top_n_tokens</strong></td> <td align="left"><em>integer</em></td> <td align="left">The number of highest probability vocabulary tokens to keep for top-n-filtering.</td></tr> <tr><td align="left"><strong>        top_p</strong></td> <td align="left"><em>number</em></td> <td align="left">Top-p value for nucleus sampling.</td></tr> <tr><td align="left"><strong>        truncate</strong></td> <td align="left"><em>integer</em></td> <td align="left">Truncate inputs tokens to the given size.</td></tr> <tr><td align="left"><strong>        typical_p</strong></td> <td align="left"><em>number</em></td> <td align="left">Typical Decoding mass See <a href="https://arxiv.org/abs/2202.00666" rel="nofollow">Typical Decoding for Natural Language Generation</a> for more information.</td></tr> <tr><td align="left"><strong>        watermark</strong></td> <td align="left"><em>boolean</em></td> <td align="left">Watermarking with <a href="https://arxiv.org/abs/2301.10226" rel="nofollow">A Watermark for Large Language Models</a>.</td></tr> <tr><td align="left"><strong>stream</strong></td> <td align="left"><em>boolean</em></td> <td align="left"></td></tr></tbody></table> <p data-svelte-h="svelte-xa4wks">Some options can be configured by passing headers to the Inference API. Here are the available headers:</p> <table data-svelte-h="svelte-2rfiu7"><thead><tr><th align="left">Headers</th> <th align="left"></th> <th align="left"></th></tr></thead> <tbody><tr><td align="left"><strong>authorization</strong></td> <td align="left"><em>string</em></td> <td align="left">Authentication header in the form <code>&#39;Bearer: hf_****&#39;</code> when <code>hf_****</code> is a personal user access token with Inference API permission. You can generate one from <a href="https://huggingface.co/settings/tokens" rel="nofollow">your settings page</a>.</td></tr> <tr><td align="left"><strong>x-use-cache</strong></td> <td align="left"><em>boolean, default to <code>true</code></em></td> <td align="left">There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching <a href="../parameters#caching%5D">here</a>.</td></tr> <tr><td align="left"><strong>x-wait-for-model</strong></td> <td align="left"><em>boolean, default to <code>false</code></em></td> <td align="left">If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability <a href="../overview#eligibility%5D">here</a>.</td></tr></tbody></table> <p data-svelte-h="svelte-1ps9cb1">For more information about Inference API headers, check out the parameters <a href="../parameters">guide</a>.</p> <h4 class="relative group"><a id="response" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#response"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Response</span></h4> <p data-svelte-h="svelte-6fikiy">Output type depends on the <code>stream</code> input parameter.
If <code>stream</code> is <code>false</code> (default), the response will be a JSON object with the following fields:</p> <table data-svelte-h="svelte-1tzm7bk"><thead><tr><th align="left">Body</th> <th align="left"></th> <th align="left"></th></tr></thead> <tbody><tr><td align="left"><strong>details</strong></td> <td align="left"><em>object</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        best_of_sequences</strong></td> <td align="left"><em>object[]</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                finish_reason</strong></td> <td align="left"><em>enum</em></td> <td align="left">Possible values: length, eos_token, stop_sequence.</td></tr> <tr><td align="left"><strong>                generated_text</strong></td> <td align="left"><em>string</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                generated_tokens</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                prefill</strong></td> <td align="left"><em>object[]</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                        id</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                        logprob</strong></td> <td align="left"><em>number</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                        text</strong></td> <td align="left"><em>string</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                seed</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                tokens</strong></td> <td align="left"><em>object[]</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                        id</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                        logprob</strong></td> <td align="left"><em>number</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                        special</strong></td> <td align="left"><em>boolean</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                        text</strong></td> <td align="left"><em>string</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                top_tokens</strong></td> <td align="left"><em>array[]</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                        id</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                        logprob</strong></td> <td align="left"><em>number</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                        special</strong></td> <td align="left"><em>boolean</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                        text</strong></td> <td align="left"><em>string</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        finish_reason</strong></td> <td align="left"><em>enum</em></td> <td align="left">Possible values: length, eos_token, stop_sequence.</td></tr> <tr><td align="left"><strong>        generated_tokens</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        prefill</strong></td> <td align="left"><em>object[]</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                id</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                logprob</strong></td> <td align="left"><em>number</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                text</strong></td> <td align="left"><em>string</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        seed</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        tokens</strong></td> <td align="left"><em>object[]</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                id</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                logprob</strong></td> <td align="left"><em>number</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                special</strong></td> <td align="left"><em>boolean</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                text</strong></td> <td align="left"><em>string</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        top_tokens</strong></td> <td align="left"><em>array[]</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                id</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                logprob</strong></td> <td align="left"><em>number</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                special</strong></td> <td align="left"><em>boolean</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>                text</strong></td> <td align="left"><em>string</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>generated_text</strong></td> <td align="left"><em>string</em></td> <td align="left"></td></tr></tbody></table> <p data-svelte-h="svelte-htfkb9">If <code>stream</code> is <code>true</code>, generated tokens are returned as a stream, using Server-Sent Events (SSE).
For more information about streaming, check out <a href="https://huggingface.co/docs/text-generation-inference/conceptual/streaming" rel="nofollow">this guide</a>.</p> <table data-svelte-h="svelte-813c3t"><thead><tr><th align="left">Body</th> <th align="left"></th> <th align="left"></th></tr></thead> <tbody><tr><td align="left"><strong>details</strong></td> <td align="left"><em>object</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        finish_reason</strong></td> <td align="left"><em>enum</em></td> <td align="left">Possible values: length, eos_token, stop_sequence.</td></tr> <tr><td align="left"><strong>        generated_tokens</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        input_length</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        seed</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>generated_text</strong></td> <td align="left"><em>string</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>index</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>token</strong></td> <td align="left"><em>object</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        id</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        logprob</strong></td> <td align="left"><em>number</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        special</strong></td> <td align="left"><em>boolean</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        text</strong></td> <td align="left"><em>string</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>top_tokens</strong></td> <td align="left"><em>object[]</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        id</strong></td> <td align="left"><em>integer</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        logprob</strong></td> <td align="left"><em>number</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        special</strong></td> <td align="left"><em>boolean</em></td> <td align="left"></td></tr> <tr><td align="left"><strong>        text</strong></td> <td align="left"><em>string</em></td> <td align="left"></td></tr></tbody></table> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hub-docs/blob/main/docs/inference-providers/tasks/text-generation.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1o5mypj = {
assets: "/docs/inference-providers/pr_1663/en",
base: "/docs/inference-providers/pr_1663/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/inference-providers/pr_1663/en/_app/immutable/entry/start.d5f15666.js"),
import("/docs/inference-providers/pr_1663/en/_app/immutable/entry/app.68b4644d.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 22],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
41 kB
·
Xet hash:
1aa1fa517db1fa08144984d141b7027a30f8bc8b210532c77496f4b06dc58468

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.