Buckets:

rtrm's picture
download
raw
63.2 kB
import{s as Ie,c as ul,u as Jl,g as jl,b as hl,d as ml,i as we,o as Tl,n as ft}from"../chunks/scheduler.eb244325.js";import{S as fe,i as ge,e as T,s as J,a as U,f as Zt,d as n,b as j,k as Ct,m as a,t as f,o as g,A as Ul,r as wl,u as dl,l as de,B as Il,v as fl,w as il,y as gl,x as Cl,c as b,h as bl,g as B,j as w,n as Z,p as G}from"../chunks/index.661680a1.js";import{T as Bl}from"../chunks/Tip.76637dd3.js";import{C as Zl,H as E,E as Gl}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.c047d438.js";import{C as W}from"../chunks/CodeBlock.0d14d0aa.js";import{e as Ml}from"../chunks/each.e59479a4.js";import{w as Al}from"../chunks/index.3c23fb4b.js";const Bt=Al({});function El(h,l){const y=new URL(window.location.href),s=new URLSearchParams(y.search);s.set(h,l),y.search=s.toString(),history.replaceState(null,"",y.toString())}function Wl(h){const l=new URL(window.location.href);return new URLSearchParams(l.search).get(h)}function pl(h,l,y){const s=h.slice();return s[7]=l[y],s}function cl(h){let l,y=h[7]+"",s,c,u,r,o;function e(){return h[6](h[7])}return{c(){l=T("div"),s=wl(y),c=J(),this.h()},l(M){l=U(M,"DIV",{class:!0});var m=Zt(l);s=dl(m,y),c=j(m),m.forEach(n),this.h()},h(){Ct(l,"class",u="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd "+(h[2][h[0]]===h[7]?"border-gray-800 bg-black dark:bg-gray-700 text-white":"text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm"))},m(M,m){a(M,l,m),de(l,s),de(l,c),r||(o=Il(l,"click",e),r=!0)},p(M,m){h=M,m&2&&y!==(y=h[7]+"")&&fl(s,y),m&7&&u!==(u="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd "+(h[2][h[0]]===h[7]?"border-gray-800 bg-black dark:bg-gray-700 text-white":"text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm"))&&Ct(l,"class",u)},d(M){M&&n(l),r=!1,o()}}}function $l(h){let l,y,s,c,u=Ml(h[1]),r=[];for(let M=0;M<u.length;M+=1)r[M]=cl(pl(h,u,M));const o=h[5].default,e=ul(o,h,h[4],null);return{c(){l=T("div");for(let M=0;M<r.length;M+=1)r[M].c();y=J(),s=T("div"),e&&e.c(),this.h()},l(M){l=U(M,"DIV",{class:!0});var m=Zt(l);for(let I=0;I<r.length;I+=1)r[I].l(m);m.forEach(n),y=j(M),s=U(M,"DIV",{class:!0});var d=Zt(s);e&&e.l(d),d.forEach(n),this.h()},h(){Ct(l,"class","flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"),Ct(s,"class","language-select")},m(M,m){a(M,l,m);for(let d=0;d<r.length;d+=1)r[d]&&r[d].m(l,null);a(M,y,m),a(M,s,m),e&&e.m(s,null),c=!0},p(M,[m]){if(m&15){u=Ml(M[1]);let d;for(d=0;d<u.length;d+=1){const I=pl(M,u,d);r[d]?r[d].p(I,m):(r[d]=cl(I),r[d].c(),r[d].m(l,null))}for(;d<r.length;d+=1)r[d].d(1);r.length=u.length}e&&e.p&&(!c||m&16)&&Jl(e,o,M,M[4],c?hl(o,M[4],m,null):jl(M[4]),null)},i(M){c||(f(e,M),c=!0)},o(M){g(e,M),c=!1},d(M){M&&(n(l),n(y),n(s)),Ul(r,M),e&&e.d(M)}}}function ql(h,l,y){let s;ml(h,Bt,m=>y(2,s=m));let{$$slots:c={},$$scope:u}=l,{id:r}=l,{options:o}=l;we(Bt,s[r]=o[0],s);function e(m){we(Bt,s[r]=m,s),El(r,m)}Tl(()=>{const m=Wl(r);m&&o.includes(m)&&we(Bt,s[r]=m,s)});const M=m=>e(m);return h.$$set=m=>{"id"in m&&y(0,r=m.id),"options"in m&&y(1,o=m.options),"$$scope"in m&&y(4,u=m.$$scope)},[r,o,s,e,u,c,M]}class rl extends fe{constructor(l){super(),ge(this,l,ql,$l,Ie,{id:0,options:1})}}function yl(h){let l;const y=h[4].default,s=ul(y,h,h[3],null);return{c(){s&&s.c()},l(c){s&&s.l(c)},m(c,u){s&&s.m(c,u),l=!0},p(c,u){s&&s.p&&(!l||u&8)&&Jl(s,y,c,c[3],l?hl(y,c[3],u,null):jl(c[3]),null)},i(c){l||(f(s,c),l=!0)},o(c){g(s,c),l=!1},d(c){s&&s.d(c)}}}function vl(h){let l,y,s=h[2][h[0]]===h[1]&&yl(h);return{c(){s&&s.c(),l=il()},l(c){s&&s.l(c),l=il()},m(c,u){s&&s.m(c,u),a(c,l,u),y=!0},p(c,[u]){c[2][c[0]]===c[1]?s?(s.p(c,u),u&7&&f(s,1)):(s=yl(c),s.c(),f(s,1),s.m(l.parentNode,l)):s&&(gl(),g(s,1,1,()=>{s=null}),Cl())},i(c){y||(f(s),y=!0)},o(c){g(s),y=!1},d(c){c&&n(l),s&&s.d(c)}}}function kl(h,l,y){let s;ml(h,Bt,e=>y(2,s=e));let{$$slots:c={},$$scope:u}=l,{id:r}=l,{option:o}=l;return h.$$set=e=>{"id"in e&&y(0,r=e.id),"option"in e&&y(1,o=e.option),"$$scope"in e&&y(3,u=e.$$scope)},[r,o,s,u,c]}class gt extends fe{constructor(l){super(),ge(this,l,kl,vl,Ie,{id:0,option:1})}}function Ql(h){let l,y='This Tutorial uses Python, but your client can be any language that can make HTTP requests. The model and engine you deploy on Inference Endpoints uses the <strong>OpenAI Chat Completions format</strong>, so you can use any <a href="https://platform.openai.com/docs/libraries" rel="nofollow">OpenAI client</a> to connect to them, in languages like JavaScript, Java, and Go.';return{c(){l=T("p"),l.innerHTML=y},l(s){l=U(s,"P",{"data-svelte-h":!0}),w(l)!=="svelte-1t7fc3k"&&(l.innerHTML=y)},m(s,c){a(s,l,c)},p:ft,d(s){s&&n(l)}}}function Nl(h){let l,y="<strong>Using Hugging Face InferenceClient</strong>",s,c,u="First, install the required dependencies:",r,o,e,M,m="The Hugging Face InferenceClient provides a clean interface that’s compatible with the OpenAI API format:",d,I,A;return o=new W({props:{code:"cGlwJTIwaW5zdGFsbCUyMGdyYWRpbyUyMGh1Z2dpbmdmYWNlLWh1Yg==",highlighted:"pip install gradio huggingface-hub",wrap:!1}}),I=new W({props:{code:"aW1wb3J0JTIwb3MlMEFpbXBvcnQlMjBncmFkaW8lMjBhcyUyMGdyJTBBZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMEluZmVyZW5jZUNsaWVudCUwQSUwQSUyMyUyMEluaXRpYWxpemUlMjB0aGUlMjBIdWdnaW5nJTIwRmFjZSUyMEluZmVyZW5jZUNsaWVudCUwQWNsaWVudCUyMCUzRCUyMEluZmVyZW5jZUNsaWVudCglMEElMjAlMjAlMjAlMjBiYXNlX3VybCUzRCUyMiUzQ2VuZHBvaW50LXVybCUzRSUyRnYxJTJGJTIyJTJDJTIwJTIwJTIzJTIwUmVwbGFjZSUyMHdpdGglMjB5b3VyJTIwZW5kcG9pbnQlMjBVUkwlMEElMjAlMjAlMjAlMjB0b2tlbiUzRG9zLmdldGVudiglMjJIRl9UT0tFTiUyMiklMjAlMjAlMjMlMjBVc2UlMjBlbnZpcm9ubWVudCUyMHZhcmlhYmxlJTIwZm9yJTIwc2VjdXJpdHklMEEpJTBBJTBBZGVmJTIwY2hhdF93aXRoX2hmX2NsaWVudChtZXNzYWdlJTJDJTIwaGlzdG9yeSklM0ElMEElMjAlMjAlMjAlMjAlMjMlMjBDb252ZXJ0JTIwR3JhZGlvJTIwaGlzdG9yeSUyMHRvJTIwbWVzc2FnZXMlMjBmb3JtYXQlMEElMjAlMjAlMjAlMjBtZXNzYWdlcyUyMCUzRCUyMCU1QiU3QiUyMnJvbGUlMjIlM0ElMjBtc2clNUIlMjJyb2xlJTIyJTVEJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMG1zZyU1QiUyMmNvbnRlbnQlMjIlNUQlN0QlMjBmb3IlMjBtc2clMjBpbiUyMGhpc3RvcnklNUQlMEElMjAlMjAlMjAlMjAlMEElMjAlMjAlMjAlMjAlMjMlMjBBZGQlMjB0aGUlMjBjdXJyZW50JTIwbWVzc2FnZSUwQSUyMCUyMCUyMCUyMG1lc3NhZ2VzLmFwcGVuZCglN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjBtZXNzYWdlJTdEKSUwQSUyMCUyMCUyMCUyMCUwQSUyMCUyMCUyMCUyMCUyMyUyMENyZWF0ZSUyMGNoYXQlMjBjb21wbGV0aW9uJTBBJTIwJTIwJTIwJTIwY2hhdF9jb21wbGV0aW9uJTIwJTNEJTIwY2xpZW50LmNoYXQuY29tcGxldGlvbnMuY3JlYXRlKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1vZGVsJTNEJTIyZW5kcG9pbnQtbmFtZSUyMiUyQyUyMCUyMCUyMyUyMFVzZSUyMHRoZSUyMG5hbWUlMjBvZiUyMHlvdXIlMjBlbmRwb2ludCUyMChpLmUuJTIwcXdlbjMtMS43Yi1pbnN0cnVjdC14eHh4KSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1lc3NhZ2VzJTNEbWVzc2FnZXMlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtYXhfdG9rZW5zJTNEMTUwJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdGVtcGVyYXR1cmUlM0QwLjclMkMlMEElMjAlMjAlMjAlMjApJTBBJTIwJTIwJTIwJTIwJTBBJTIwJTIwJTIwJTIwJTIzJTIwUmV0dXJuJTIwdGhlJTIwcmVzcG9uc2UlMEElMjAlMjAlMjAlMjByZXR1cm4lMjBjaGF0X2NvbXBsZXRpb24uY2hvaWNlcyU1QjAlNUQubWVzc2FnZS5jb250ZW50JTBBJTBBJTIzJTIwQ3JlYXRlJTIwdGhlJTIwR3JhZGlvJTIwaW50ZXJmYWNlJTBBZGVtbyUyMCUzRCUyMGdyLkNoYXRJbnRlcmZhY2UoJTBBJTIwJTIwJTIwJTIwZm4lM0RjaGF0X3dpdGhfaGZfY2xpZW50JTJDJTBBJTIwJTIwJTIwJTIwdHlwZSUzRCUyMm1lc3NhZ2VzJTIyJTJDJTBBJTIwJTIwJTIwJTIwdGl0bGUlM0QlMjJDdXN0b20lMjBDaGF0JTIwd2l0aCUyMEluZmVyZW5jZSUyMEVuZHBvaW50cyUyMiUyQyUwQSUyMCUyMCUyMCUyMGV4YW1wbGVzJTNEJTVCJTIyV2hhdCUyMGlzJTIwZGVlcCUyMGxlYXJuaW5nJTNGJTIyJTJDJTIwJTIyRXhwbGFpbiUyMG5ldXJhbCUyMG5ldHdvcmtzJTIyJTJDJTIwJTIySG93JTIwZG9lcyUyMEFJJTIwd29yayUzRiUyMiU1RCUwQSklMEElMEFpZiUyMF9fbmFtZV9fJTIwJTNEJTNEJTIwJTIyX19tYWluX18lMjIlM0ElMEElMjAlMjAlMjAlMjBkZW1vLmxhdW5jaCgp",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">import</span> gradio <span class="hljs-keyword">as</span> gr
<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient
<span class="hljs-comment"># Initialize the Hugging Face InferenceClient</span>
client = InferenceClient(
base_url=<span class="hljs-string">&quot;&lt;endpoint-url&gt;/v1/&quot;</span>, <span class="hljs-comment"># Replace with your endpoint URL</span>
token=os.getenv(<span class="hljs-string">&quot;HF_TOKEN&quot;</span>) <span class="hljs-comment"># Use environment variable for security</span>
)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">chat_with_hf_client</span>(<span class="hljs-params">message, history</span>):
<span class="hljs-comment"># Convert Gradio history to messages format</span>
messages = [{<span class="hljs-string">&quot;role&quot;</span>: msg[<span class="hljs-string">&quot;role&quot;</span>], <span class="hljs-string">&quot;content&quot;</span>: msg[<span class="hljs-string">&quot;content&quot;</span>]} <span class="hljs-keyword">for</span> msg <span class="hljs-keyword">in</span> history]
<span class="hljs-comment"># Add the current message</span>
messages.append({<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: message})
<span class="hljs-comment"># Create chat completion</span>
chat_completion = client.chat.completions.create(
model=<span class="hljs-string">&quot;endpoint-name&quot;</span>, <span class="hljs-comment"># Use the name of your endpoint (i.e. qwen3-1.7b-instruct-xxxx)</span>
messages=messages,
max_tokens=<span class="hljs-number">150</span>,
temperature=<span class="hljs-number">0.7</span>,
)
<span class="hljs-comment"># Return the response</span>
<span class="hljs-keyword">return</span> chat_completion.choices[<span class="hljs-number">0</span>].message.content
<span class="hljs-comment"># Create the Gradio interface</span>
demo = gr.ChatInterface(
fn=chat_with_hf_client,
<span class="hljs-built_in">type</span>=<span class="hljs-string">&quot;messages&quot;</span>,
title=<span class="hljs-string">&quot;Custom Chat with Inference Endpoints&quot;</span>,
examples=[<span class="hljs-string">&quot;What is deep learning?&quot;</span>, <span class="hljs-string">&quot;Explain neural networks&quot;</span>, <span class="hljs-string">&quot;How does AI work?&quot;</span>]
)
<span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">&quot;__main__&quot;</span>:
demo.launch()`,wrap:!1}}),{c(){l=T("p"),l.innerHTML=y,s=J(),c=T("p"),c.textContent=u,r=J(),b(o.$$.fragment),e=J(),M=T("p"),M.textContent=m,d=J(),b(I.$$.fragment)},l(p){l=U(p,"P",{"data-svelte-h":!0}),w(l)!=="svelte-15fpcnt"&&(l.innerHTML=y),s=j(p),c=U(p,"P",{"data-svelte-h":!0}),w(c)!=="svelte-1f3oki6"&&(c.textContent=u),r=j(p),B(o.$$.fragment,p),e=j(p),M=U(p,"P",{"data-svelte-h":!0}),w(M)!=="svelte-u7e6qd"&&(M.textContent=m),d=j(p),B(I.$$.fragment,p)},m(p,C){a(p,l,C),a(p,s,C),a(p,c,C),a(p,r,C),Z(o,p,C),a(p,e,C),a(p,M,C),a(p,d,C),Z(I,p,C),A=!0},p:ft,i(p){A||(f(o.$$.fragment,p),f(I.$$.fragment,p),A=!0)},o(p){g(o.$$.fragment,p),g(I.$$.fragment,p),A=!1},d(p){p&&(n(l),n(s),n(c),n(r),n(e),n(M),n(d)),G(o,p),G(I,p)}}}function Vl(h){let l,y="<strong>Using OpenAI Client</strong>",s,c,u="First, install the required dependencies:",r,o,e,M,m="Here’s a basic chat function using the OpenAI client:",d,I,A;return o=new W({props:{code:"cGlwJTIwaW5zdGFsbCUyMGdyYWRpbyUyMG9wZW5haQ==",highlighted:"pip install gradio openai",wrap:!1}}),I=new W({props:{code:"aW1wb3J0JTIwb3MlMEFpbXBvcnQlMjBncmFkaW8lMjBhcyUyMGdyJTBBZnJvbSUyMG9wZW5haSUyMGltcG9ydCUyME9wZW5BSSUwQSUwQSUyMyUyMEluaXRpYWxpemUlMjB0aGUlMjBPcGVuQUklMjBjbGllbnQlMjB3aXRoJTIweW91ciUyMEluZmVyZW5jZSUyMEVuZHBvaW50JTBBY2xpZW50JTIwJTNEJTIwT3BlbkFJKCUwQSUyMCUyMCUyMCUyMGJhc2VfdXJsJTNEJTIyJTNDZW5kcG9pbnQtdXJsJTNFJTJGdjElMkYlMjIlMkMlMjAlMjAlMjMlMjBSZXBsYWNlJTIwd2l0aCUyMHlvdXIlMjBlbmRwb2ludCUyMFVSTCUwQSUyMCUyMCUyMCUyMGFwaV9rZXklM0Rvcy5nZXRlbnYoJTIySEZfVE9LRU4lMjIpJTIwJTIwJTIzJTIwVXNlJTIwZW52aXJvbm1lbnQlMjB2YXJpYWJsZSUyMGZvciUyMHNlY3VyaXR5JTBBKSUwQSUwQWRlZiUyMGNoYXRfd2l0aF9vcGVuYWkobWVzc2FnZSUyQyUyMGhpc3RvcnkpJTNBJTBBJTBBJTIwJTIwJTIwJTIwJTIzJTIwQ29udmVydCUyMEdyYWRpbyUyMGhpc3RvcnklMjB0byUyME9wZW5BSSUyMGZvcm1hdCUwQSUyMCUyMCUyMCUyMG1lc3NhZ2VzJTIwJTNEJTIwJTVCJTdCJTIycm9sZSUyMiUzQSUyMG1zZyU1QiUyMnJvbGUlMjIlNUQlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwbXNnJTVCJTIyY29udGVudCUyMiU1RCU3RCUyMGZvciUyMG1zZyUyMGluJTIwaGlzdG9yeSU1RCUwQSUyMCUyMCUyMCUyMCUwQSUyMCUyMCUyMCUyMCUyMyUyMEFkZCUyMHRoZSUyMGN1cnJlbnQlMjBtZXNzYWdlJTBBJTIwJTIwJTIwJTIwbWVzc2FnZXMuYXBwZW5kKCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMG1lc3NhZ2UlN0QpJTBBJTIwJTIwJTIwJTIwJTBBJTIwJTIwJTIwJTIwJTIzJTIwQ3JlYXRlJTIwY2hhdCUyMGNvbXBsZXRpb24lMEElMjAlMjAlMjAlMjBjaGF0X2NvbXBsZXRpb24lMjAlM0QlMjBjbGllbnQuY2hhdC5jb21wbGV0aW9ucy5jcmVhdGUoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbW9kZWwlM0QlMjJlbmRwb2ludC1uYW1lJTIyJTJDJTIwJTIwJTIzJTIwVXNlJTIwdGhlJTIwbmFtZSUyMG9mJTIweW91ciUyMGVuZHBvaW50JTIwKGkuZS4lMjBxd2VuMy0xLjdiLXh4eHgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbWVzc2FnZXMlM0RtZXNzYWdlcyUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1heF90b2tlbnMlM0QxNTAlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzRDAuNyUyQyUwQSUyMCUyMCUyMCUyMCklMEElMjAlMjAlMjAlMjAlMEElMjAlMjAlMjAlMjAlMjMlMjByZXR1cm4lMjB0aGUlMjByZXNwb25zZSUwQSUyMCUyMCUyMCUyMHJldHVybiUyMGNoYXRfY29tcGxldGlvbi5jaG9pY2VzJTVCMCU1RC5tZXNzYWdlLmNvbnRlbnQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMEElMEElMjMlMjBDcmVhdGUlMjB0aGUlMjBHcmFkaW8lMjBpbnRlcmZhY2UlMEFkZW1vJTIwJTNEJTIwZ3IuQ2hhdEludGVyZmFjZSglMEElMjAlMjAlMjAlMjBmbiUzRGNoYXRfd2l0aF9vcGVuYWklMkMlMEElMjAlMjAlMjAlMjB0eXBlJTNEJTIybWVzc2FnZXMlMjIlMkMlMEElMjAlMjAlMjAlMjB0aXRsZSUzRCUyMkN1c3RvbSUyMENoYXQlMjB3aXRoJTIwSW5mZXJlbmNlJTIwRW5kcG9pbnRzJTIyJTJDJTBBJTIwJTIwJTIwJTIwZXhhbXBsZXMlM0QlNUIlMjJXaGF0JTIwaXMlMjBkZWVwJTIwbGVhcm5pbmclM0YlMjIlMkMlMjAlMjJFeHBsYWluJTIwbmV1cmFsJTIwbmV0d29ya3MlMjIlMkMlMjAlMjJIb3clMjBkb2VzJTIwQUklMjB3b3JrJTNGJTIyJTVEJTBBKSUwQSUwQWlmJTIwX19uYW1lX18lMjAlM0QlM0QlMjAlMjJfX21haW5fXyUyMiUzQSUwQSUyMCUyMCUyMCUyMGRlbW8ubGF1bmNoKCk=",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">import</span> gradio <span class="hljs-keyword">as</span> gr
<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI
<span class="hljs-comment"># Initialize the OpenAI client with your Inference Endpoint</span>
client = OpenAI(
base_url=<span class="hljs-string">&quot;&lt;endpoint-url&gt;/v1/&quot;</span>, <span class="hljs-comment"># Replace with your endpoint URL</span>
api_key=os.getenv(<span class="hljs-string">&quot;HF_TOKEN&quot;</span>) <span class="hljs-comment"># Use environment variable for security</span>
)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">chat_with_openai</span>(<span class="hljs-params">message, history</span>):
<span class="hljs-comment"># Convert Gradio history to OpenAI format</span>
messages = [{<span class="hljs-string">&quot;role&quot;</span>: msg[<span class="hljs-string">&quot;role&quot;</span>], <span class="hljs-string">&quot;content&quot;</span>: msg[<span class="hljs-string">&quot;content&quot;</span>]} <span class="hljs-keyword">for</span> msg <span class="hljs-keyword">in</span> history]
<span class="hljs-comment"># Add the current message</span>
messages.append({<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: message})
<span class="hljs-comment"># Create chat completion</span>
chat_completion = client.chat.completions.create(
model=<span class="hljs-string">&quot;endpoint-name&quot;</span>, <span class="hljs-comment"># Use the name of your endpoint (i.e. qwen3-1.7b-xxxx)</span>
messages=messages,
max_tokens=<span class="hljs-number">150</span>,
temperature=<span class="hljs-number">0.7</span>,
)
<span class="hljs-comment"># return the response</span>
<span class="hljs-keyword">return</span> chat_completion.choices[<span class="hljs-number">0</span>].message.content
<span class="hljs-comment"># Create the Gradio interface</span>
demo = gr.ChatInterface(
fn=chat_with_openai,
<span class="hljs-built_in">type</span>=<span class="hljs-string">&quot;messages&quot;</span>,
title=<span class="hljs-string">&quot;Custom Chat with Inference Endpoints&quot;</span>,
examples=[<span class="hljs-string">&quot;What is deep learning?&quot;</span>, <span class="hljs-string">&quot;Explain neural networks&quot;</span>, <span class="hljs-string">&quot;How does AI work?&quot;</span>]
)
<span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">&quot;__main__&quot;</span>:
demo.launch()`,wrap:!1}}),{c(){l=T("p"),l.innerHTML=y,s=J(),c=T("p"),c.textContent=u,r=J(),b(o.$$.fragment),e=J(),M=T("p"),M.textContent=m,d=J(),b(I.$$.fragment)},l(p){l=U(p,"P",{"data-svelte-h":!0}),w(l)!=="svelte-on37og"&&(l.innerHTML=y),s=j(p),c=U(p,"P",{"data-svelte-h":!0}),w(c)!=="svelte-1f3oki6"&&(c.textContent=u),r=j(p),B(o.$$.fragment,p),e=j(p),M=U(p,"P",{"data-svelte-h":!0}),w(M)!=="svelte-fjp75b"&&(M.textContent=m),d=j(p),B(I.$$.fragment,p)},m(p,C){a(p,l,C),a(p,s,C),a(p,c,C),a(p,r,C),Z(o,p,C),a(p,e,C),a(p,M,C),a(p,d,C),Z(I,p,C),A=!0},p:ft,i(p){A||(f(o.$$.fragment,p),f(I.$$.fragment,p),A=!0)},o(p){g(o.$$.fragment,p),g(I.$$.fragment,p),A=!1},d(p){p&&(n(l),n(s),n(c),n(r),n(e),n(M),n(d)),G(o,p),G(I,p)}}}function Sl(h){let l,y="<strong>Using Requests Library</strong>",s,c,u="First, install the required dependencies:",r,o,e,M,m="Here’s a basic chat function using the requests library with the Messages API:",d,I,A;return o=new W({props:{code:"cGlwJTIwaW5zdGFsbCUyMGdyYWRpbyUyMHJlcXVlc3Rz",highlighted:"pip install gradio requests",wrap:!1}}),I=new W({props:{code:"aW1wb3J0JTIwb3MlMEFpbXBvcnQlMjBncmFkaW8lMjBhcyUyMGdyJTBBaW1wb3J0JTIwcmVxdWVzdHMlMEElMEElMjMlMjBDb25maWd1cmUlMjB5b3VyJTIwSW5mZXJlbmNlJTIwRW5kcG9pbnQlMEFBUElfVVJMJTIwJTNEJTIwJTIyJTNDZW5kcG9pbnQtdXJsJTNFJTJGdjElMkZjaGF0JTJGY29tcGxldGlvbnMlMjIlMjAlMjAlMjMlMjBVc2UlMjB0aGUlMjBjaGF0JTIwY29tcGxldGlvbnMlMjBlbmRwb2ludCUwQSUwQWhlYWRlcnMlMjAlM0QlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjJBY2NlcHQlMjIlM0ElMjAlMjJhcHBsaWNhdGlvbiUyRmpzb24lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJDb250ZW50LVR5cGUlMjIlM0ElMjAlMjJhcHBsaWNhdGlvbiUyRmpzb24lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJBdXRob3JpemF0aW9uJTIyJTNBJTIwZiUyMkJlYXJlciUyMCU3Qm9zLmdldGVudignSEZfVE9LRU4nKSU3RCUyMiUyMCUyMCUyMyUyMFVzZSUyMGVudmlyb25tZW50JTIwdmFyaWFibGUlMjBmb3IlMjBzZWN1cml0eSUwQSU3RCUwQSUwQWRlZiUyMGNoYXRfd2l0aF9yZXF1ZXN0cyhtZXNzYWdlJTJDJTIwaGlzdG9yeSklM0ElMEElMjAlMjAlMjAlMjAlMjMlMjBDb252ZXJ0JTIwR3JhZGlvJTIwaGlzdG9yeSUyMHRvJTIwbWVzc2FnZXMlMjBmb3JtYXQlMEElMjAlMjAlMjAlMjBtZXNzYWdlcyUyMCUzRCUyMCU1QiU3QiUyMnJvbGUlMjIlM0ElMjBtc2clNUIlMjJyb2xlJTIyJTVEJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMG1zZyU1QiUyMmNvbnRlbnQlMjIlNUQlN0QlMjBmb3IlMjBtc2clMjBpbiUyMGhpc3RvcnklNUQlMEElMjAlMjAlMjAlMjAlMEElMjAlMjAlMjAlMjAlMjMlMjBBZGQlMjB0aGUlMjBjdXJyZW50JTIwbWVzc2FnZSUwQSUyMCUyMCUyMCUyMG1lc3NhZ2VzLmFwcGVuZCglN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjBtZXNzYWdlJTdEKSUwQSUyMCUyMCUyMCUyMCUwQSUyMCUyMCUyMCUyMCUyMyUyMFByZXBhcmUlMjB0aGUlMjBwYXlsb2FkJTIwdXNpbmclMjB0aGUlMjBNZXNzYWdlcyUyMEFQSSUyMGZvcm1hdCUwQSUyMCUyMCUyMCUyMHBheWxvYWQlMjAlM0QlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJtb2RlbCUyMiUzQSUyMCUyMmVuZHBvaW50LW5hbWUlMjIlMkMlMjAlMjAlMjMlMjBVc2UlMjB0aGUlMjBuYW1lJTIwb2YlMjB5b3VyJTIwZW5kcG9pbnQlMjAoaS5lLiUyMHF3ZW4zLTEuN2IteHh4eCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJtZXNzYWdlcyUyMiUzQSUyMG1lc3NhZ2VzJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybWF4X3Rva2VucyUyMiUzQSUyMDE1MCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnRlbXBlcmF0dXJlJTIyJTNBJTIwMC43JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyc3RyZWFtJTIyJTNBJTIwRmFsc2UlMEElMjAlMjAlMjAlMjAlN0QlMEElMjAlMjAlMjAlMjAlMEElMjAlMjAlMjAlMjAlMjMlMjBNYWtlJTIwdGhlJTIwcmVxdWVzdCUwQSUyMCUyMCUyMCUyMHJlc3BvbnNlJTIwJTNEJTIwcmVxdWVzdHMucG9zdChBUElfVVJMJTJDJTIwaGVhZGVycyUzRGhlYWRlcnMlMkMlMjBqc29uJTNEcGF5bG9hZCklMEElMjAlMjAlMjAlMjByZXNwb25zZS5yYWlzZV9mb3Jfc3RhdHVzKCklMEElMjAlMjAlMjAlMjAlMEElMjAlMjAlMjAlMjAlMjMlMjBQYXJzZSUyMHRoZSUyMHJlc3BvbnNlJTBBJTIwJTIwJTIwJTIwcmVzdWx0JTIwJTNEJTIwcmVzcG9uc2UuanNvbigpJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwcmVzdWx0JTVCJTIyY2hvaWNlcyUyMiU1RCU1QjAlNUQlNUIlMjJtZXNzYWdlJTIyJTVEJTVCJTIyY29udGVudCUyMiU1RCUwQSUwQSUyMyUyMENyZWF0ZSUyMHRoZSUyMEdyYWRpbyUyMGludGVyZmFjZSUwQWRlbW8lMjAlM0QlMjBnci5DaGF0SW50ZXJmYWNlKCUwQSUyMCUyMCUyMCUyMGZuJTNEY2hhdF93aXRoX3JlcXVlc3RzJTJDJTBBJTIwJTIwJTIwJTIwdHlwZSUzRCUyMm1lc3NhZ2VzJTIyJTJDJTBBJTIwJTIwJTIwJTIwdGl0bGUlM0QlMjJDdXN0b20lMjBDaGF0JTIwd2l0aCUyMEluZmVyZW5jZSUyMEVuZHBvaW50cyUyMiUyQyUwQSUyMCUyMCUyMCUyMGV4YW1wbGVzJTNEJTVCJTIyV2hhdCUyMGlzJTIwZGVlcCUyMGxlYXJuaW5nJTNGJTIyJTJDJTIwJTIyRXhwbGFpbiUyMG5ldXJhbCUyMG5ldHdvcmtzJTIyJTJDJTIwJTIySG93JTIwZG9lcyUyMEFJJTIwd29yayUzRiUyMiU1RCUwQSklMEElMEFpZiUyMF9fbmFtZV9fJTIwJTNEJTNEJTIwJTIyX19tYWluX18lMjIlM0ElMEElMjAlMjAlMjAlMjBkZW1vLmxhdW5jaCgp",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">import</span> gradio <span class="hljs-keyword">as</span> gr
<span class="hljs-keyword">import</span> requests
<span class="hljs-comment"># Configure your Inference Endpoint</span>
API_URL = <span class="hljs-string">&quot;&lt;endpoint-url&gt;/v1/chat/completions&quot;</span> <span class="hljs-comment"># Use the chat completions endpoint</span>
headers = {
<span class="hljs-string">&quot;Accept&quot;</span>: <span class="hljs-string">&quot;application/json&quot;</span>,
<span class="hljs-string">&quot;Content-Type&quot;</span>: <span class="hljs-string">&quot;application/json&quot;</span>,
<span class="hljs-string">&quot;Authorization&quot;</span>: <span class="hljs-string">f&quot;Bearer <span class="hljs-subst">{os.getenv(<span class="hljs-string">&#x27;HF_TOKEN&#x27;</span>)}</span>&quot;</span> <span class="hljs-comment"># Use environment variable for security</span>
}
<span class="hljs-keyword">def</span> <span class="hljs-title function_">chat_with_requests</span>(<span class="hljs-params">message, history</span>):
<span class="hljs-comment"># Convert Gradio history to messages format</span>
messages = [{<span class="hljs-string">&quot;role&quot;</span>: msg[<span class="hljs-string">&quot;role&quot;</span>], <span class="hljs-string">&quot;content&quot;</span>: msg[<span class="hljs-string">&quot;content&quot;</span>]} <span class="hljs-keyword">for</span> msg <span class="hljs-keyword">in</span> history]
<span class="hljs-comment"># Add the current message</span>
messages.append({<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: message})
<span class="hljs-comment"># Prepare the payload using the Messages API format</span>
payload = {
<span class="hljs-string">&quot;model&quot;</span>: <span class="hljs-string">&quot;endpoint-name&quot;</span>, <span class="hljs-comment"># Use the name of your endpoint (i.e. qwen3-1.7b-xxxx)</span>
<span class="hljs-string">&quot;messages&quot;</span>: messages,
<span class="hljs-string">&quot;max_tokens&quot;</span>: <span class="hljs-number">150</span>,
<span class="hljs-string">&quot;temperature&quot;</span>: <span class="hljs-number">0.7</span>,
<span class="hljs-string">&quot;stream&quot;</span>: <span class="hljs-literal">False</span>
}
<span class="hljs-comment"># Make the request</span>
response = requests.post(API_URL, headers=headers, json=payload)
response.raise_for_status()
<span class="hljs-comment"># Parse the response</span>
result = response.json()
<span class="hljs-keyword">return</span> result[<span class="hljs-string">&quot;choices&quot;</span>][<span class="hljs-number">0</span>][<span class="hljs-string">&quot;message&quot;</span>][<span class="hljs-string">&quot;content&quot;</span>]
<span class="hljs-comment"># Create the Gradio interface</span>
demo = gr.ChatInterface(
fn=chat_with_requests,
<span class="hljs-built_in">type</span>=<span class="hljs-string">&quot;messages&quot;</span>,
title=<span class="hljs-string">&quot;Custom Chat with Inference Endpoints&quot;</span>,
examples=[<span class="hljs-string">&quot;What is deep learning?&quot;</span>, <span class="hljs-string">&quot;Explain neural networks&quot;</span>, <span class="hljs-string">&quot;How does AI work?&quot;</span>]
)
<span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">&quot;__main__&quot;</span>:
demo.launch()`,wrap:!1}}),{c(){l=T("p"),l.innerHTML=y,s=J(),c=T("p"),c.textContent=u,r=J(),b(o.$$.fragment),e=J(),M=T("p"),M.textContent=m,d=J(),b(I.$$.fragment)},l(p){l=U(p,"P",{"data-svelte-h":!0}),w(l)!=="svelte-10zf8di"&&(l.innerHTML=y),s=j(p),c=U(p,"P",{"data-svelte-h":!0}),w(c)!=="svelte-1f3oki6"&&(c.textContent=u),r=j(p),B(o.$$.fragment,p),e=j(p),M=U(p,"P",{"data-svelte-h":!0}),w(M)!=="svelte-177no80"&&(M.textContent=m),d=j(p),B(I.$$.fragment,p)},m(p,C){a(p,l,C),a(p,s,C),a(p,c,C),a(p,r,C),Z(o,p,C),a(p,e,C),a(p,M,C),a(p,d,C),Z(I,p,C),A=!0},p:ft,i(p){A||(f(o.$$.fragment,p),f(I.$$.fragment,p),A=!0)},o(p){g(o.$$.fragment,p),g(I.$$.fragment,p),A=!1},d(p){p&&(n(l),n(s),n(c),n(r),n(e),n(M),n(d)),G(o,p),G(I,p)}}}function _l(h){let l,y,s,c,u,r;return l=new gt({props:{id:"chat-implementation",option:"hf-client",$$slots:{default:[Nl]},$$scope:{ctx:h}}}),s=new gt({props:{id:"chat-implementation",option:"openai-client",$$slots:{default:[Vl]},$$scope:{ctx:h}}}),u=new gt({props:{id:"chat-implementation",option:"requests",$$slots:{default:[Sl]},$$scope:{ctx:h}}}),{c(){b(l.$$.fragment),y=J(),b(s.$$.fragment),c=J(),b(u.$$.fragment)},l(o){B(l.$$.fragment,o),y=j(o),B(s.$$.fragment,o),c=j(o),B(u.$$.fragment,o)},m(o,e){Z(l,o,e),a(o,y,e),Z(s,o,e),a(o,c,e),Z(u,o,e),r=!0},p(o,e){const M={};e&2&&(M.$$scope={dirty:e,ctx:o}),l.$set(M);const m={};e&2&&(m.$$scope={dirty:e,ctx:o}),s.$set(m);const d={};e&2&&(d.$$scope={dirty:e,ctx:o}),u.$set(d)},i(o){r||(f(l.$$.fragment,o),f(s.$$.fragment,o),f(u.$$.fragment,o),r=!0)},o(o){g(l.$$.fragment,o),g(s.$$.fragment,o),g(u.$$.fragment,o),r=!1},d(o){o&&(n(y),n(c)),G(l,o),G(s,o),G(u,o)}}}function Rl(h){let l,y,s,c="The Hugging Face InferenceClient supports streaming similar to the OpenAI client:",u,r,o;return l=new E({props:{title:"Hugging Face InferenceClient Streaming",local:"hugging-face-inferenceclient-streaming",headingTag:"h3"}}),r=new W({props:{code:"aW1wb3J0JTIwb3MlMEFpbXBvcnQlMjBncmFkaW8lMjBhcyUyMGdyJTBBZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMEluZmVyZW5jZUNsaWVudCUwQSUwQWNsaWVudCUyMCUzRCUyMEluZmVyZW5jZUNsaWVudCglMEElMjAlMjAlMjAlMjBiYXNlX3VybCUzRCUyMiUzQ2VuZHBvaW50LXVybCUzRSUyRnYxJTJGJTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9rZW4lM0Rvcy5nZXRlbnYoJTIySEZfVE9LRU4lMjIpJTBBKSUwQSUwQWRlZiUyMGNoYXRfd2l0aF9oZl9zdHJlYW1pbmcobWVzc2FnZSUyQyUyMGhpc3RvcnkpJTNBJTBBJTIwJTIwJTIwJTIwJTIzJTIwQ29udmVydCUyMGhpc3RvcnklMjB0byUyMG1lc3NhZ2VzJTIwZm9ybWF0JTBBJTIwJTIwJTIwJTIwbWVzc2FnZXMlMjAlM0QlMjAlNUIlN0IlMjJyb2xlJTIyJTNBJTIwbXNnJTVCJTIycm9sZSUyMiU1RCUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjBtc2clNUIlMjJjb250ZW50JTIyJTVEJTdEJTIwZm9yJTIwbXNnJTIwaW4lMjBoaXN0b3J5JTVEJTBBJTIwJTIwJTIwJTIwbWVzc2FnZXMuYXBwZW5kKCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMG1lc3NhZ2UlN0QpJTBBJTIwJTIwJTIwJTIwJTBBJTIwJTIwJTIwJTIwJTIzJTIwQ3JlYXRlJTIwc3RyZWFtaW5nJTIwY2hhdCUyMGNvbXBsZXRpb24lMEElMjAlMjAlMjAlMjBjaGF0X2NvbXBsZXRpb24lMjAlM0QlMjBjbGllbnQuY2hhdC5jb21wbGV0aW9ucy5jcmVhdGUoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbW9kZWwlM0QlMjJlbmRwb2ludC1uYW1lJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbWVzc2FnZXMlM0RtZXNzYWdlcyUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1heF90b2tlbnMlM0QxNTAlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzRDAuNyUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN0cmVhbSUzRFRydWUlMjAlMjAlMjMlMjBFbmFibGUlMjBzdHJlYW1pbmclMEElMjAlMjAlMjAlMjApJTBBJTIwJTIwJTIwJTIwJTBBJTIwJTIwJTIwJTIwcmVzcG9uc2UlMjAlM0QlMjAlMjIlMjIlMEElMjAlMjAlMjAlMjBmb3IlMjBjaHVuayUyMGluJTIwY2hhdF9jb21wbGV0aW9uJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaWYlMjBjaHVuay5jaG9pY2VzJTVCMCU1RC5kZWx0YS5jb250ZW50JTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmVzcG9uc2UlMjAlMkIlM0QlMjBjaHVuay5jaG9pY2VzJTVCMCU1RC5kZWx0YS5jb250ZW50JTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIweWllbGQlMjByZXNwb25zZSUyMCUyMCUyMyUyMFlpZWxkJTIwcGFydGlhbCUyMHJlc3BvbnNlJTIwZm9yJTIwc3RyZWFtaW5nJTBBJTBBJTIzJTIwQ3JlYXRlJTIwc3RyZWFtaW5nJTIwaW50ZXJmYWNlJTBBZGVtbyUyMCUzRCUyMGdyLkNoYXRJbnRlcmZhY2UoJTBBJTIwJTIwJTIwJTIwZm4lM0RjaGF0X3dpdGhfaGZfc3RyZWFtaW5nJTJDJTBBJTIwJTIwJTIwJTIwdHlwZSUzRCUyMm1lc3NhZ2VzJTIyJTJDJTBBJTIwJTIwJTIwJTIwdGl0bGUlM0QlMjJTdHJlYW1pbmclMjBDaGF0JTIwd2l0aCUyMEluZmVyZW5jZSUyMEVuZHBvaW50cyUyMiUwQSklMEElMEFkZW1vLmxhdW5jaCgp",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">import</span> gradio <span class="hljs-keyword">as</span> gr
<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient
client = InferenceClient(
base_url=<span class="hljs-string">&quot;&lt;endpoint-url&gt;/v1/&quot;</span>,
token=os.getenv(<span class="hljs-string">&quot;HF_TOKEN&quot;</span>)
)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">chat_with_hf_streaming</span>(<span class="hljs-params">message, history</span>):
<span class="hljs-comment"># Convert history to messages format</span>
messages = [{<span class="hljs-string">&quot;role&quot;</span>: msg[<span class="hljs-string">&quot;role&quot;</span>], <span class="hljs-string">&quot;content&quot;</span>: msg[<span class="hljs-string">&quot;content&quot;</span>]} <span class="hljs-keyword">for</span> msg <span class="hljs-keyword">in</span> history]
messages.append({<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: message})
<span class="hljs-comment"># Create streaming chat completion</span>
chat_completion = client.chat.completions.create(
model=<span class="hljs-string">&quot;endpoint-name&quot;</span>,
messages=messages,
max_tokens=<span class="hljs-number">150</span>,
temperature=<span class="hljs-number">0.7</span>,
stream=<span class="hljs-literal">True</span> <span class="hljs-comment"># Enable streaming</span>
)
response = <span class="hljs-string">&quot;&quot;</span>
<span class="hljs-keyword">for</span> chunk <span class="hljs-keyword">in</span> chat_completion:
<span class="hljs-keyword">if</span> chunk.choices[<span class="hljs-number">0</span>].delta.content:
response += chunk.choices[<span class="hljs-number">0</span>].delta.content
<span class="hljs-keyword">yield</span> response <span class="hljs-comment"># Yield partial response for streaming</span>
<span class="hljs-comment"># Create streaming interface</span>
demo = gr.ChatInterface(
fn=chat_with_hf_streaming,
<span class="hljs-built_in">type</span>=<span class="hljs-string">&quot;messages&quot;</span>,
title=<span class="hljs-string">&quot;Streaming Chat with Inference Endpoints&quot;</span>
)
demo.launch()`,wrap:!1}}),{c(){b(l.$$.fragment),y=J(),s=T("p"),s.textContent=c,u=J(),b(r.$$.fragment)},l(e){B(l.$$.fragment,e),y=j(e),s=U(e,"P",{"data-svelte-h":!0}),w(s)!=="svelte-tuciwp"&&(s.textContent=c),u=j(e),B(r.$$.fragment,e)},m(e,M){Z(l,e,M),a(e,y,M),a(e,s,M),a(e,u,M),Z(r,e,M),o=!0},p:ft,i(e){o||(f(l.$$.fragment,e),f(r.$$.fragment,e),o=!0)},o(e){g(l.$$.fragment,e),g(r.$$.fragment,e),o=!1},d(e){e&&(n(y),n(s),n(u)),G(l,e),G(r,e)}}}function Xl(h){let l,y,s,c="To use streaming with the OpenAI client, we need to set <code>stream=True</code> and yield the response as it builds:",u,r,o;return l=new E({props:{title:"OpenAI Client Streaming",local:"openai-client-streaming",headingTag:"h3"}}),r=new W({props:{code:"aW1wb3J0JTIwb3MlMEFpbXBvcnQlMjBncmFkaW8lMjBhcyUyMGdyJTBBZnJvbSUyMG9wZW5haSUyMGltcG9ydCUyME9wZW5BSSUwQSUwQWNsaWVudCUyMCUzRCUyME9wZW5BSShiYXNlX3VybCUzRCUyMiUzQ2VuZHBvaW50LXVybCUzRSUyRnYxJTJGJTIyJTJDJTIwYXBpX2tleSUzRG9zLmdldGVudiglMjJIRl9UT0tFTiUyMikpJTBBJTBBJTBBZGVmJTIwY2hhdF93aXRoX3N0cmVhbWluZyhtZXNzYWdlJTJDJTIwaGlzdG9yeSklM0ElMEElMjAlMjAlMjAlMjAlMjMlMjBDb252ZXJ0JTIwaGlzdG9yeSUyMHRvJTIwT3BlbkFJJTIwZm9ybWF0JTBBJTIwJTIwJTIwJTIwbWVzc2FnZXMlMjAlM0QlMjAlNUIlN0IlMjJyb2xlJTIyJTNBJTIwbXNnJTVCJTIycm9sZSUyMiU1RCUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjBtc2clNUIlMjJjb250ZW50JTIyJTVEJTdEJTIwZm9yJTIwbXNnJTIwaW4lMjBoaXN0b3J5JTVEJTBBJTIwJTIwJTIwJTIwbWVzc2FnZXMuYXBwZW5kKCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMG1lc3NhZ2UlN0QpJTBBJTBBJTBBY2hhdF9jb21wbGV0aW9uJTIwJTNEJTIwY2xpZW50LmNoYXQuY29tcGxldGlvbnMuY3JlYXRlKCUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEJTIyZW5kcG9pbnQtbmFtZSUyMiUyQyUyMCUyMyUyMFVzZSUyMHRoZSUyMG5hbWUlMjBvZiUyMHlvdXIlMjBlbmRwb2ludCUyMChpLmUuJTIwcXdlbjMtMS43Yi14eHh4KSUwQSUyMCUyMCUyMCUyMG1lc3NhZ2VzJTNEbWVzc2FnZXMlMkMlMEElMjAlMjAlMjAlMjBtYXhfdG9rZW5zJTNEMTUwJTJDJTBBJTIwJTIwJTIwJTIwdGVtcGVyYXR1cmUlM0QwLjclMkMlMEElMjAlMjAlMjAlMjBzdHJlYW0lM0RUcnVlJTJDJTIwJTIwJTIzJTIwRW5hYmxlJTIwc3RyZWFtaW5nJTBBKSUwQSUwQXJlc3BvbnNlJTIwJTNEJTIwJTIyJTIyJTBBZm9yJTIwY2h1bmslMjBpbiUyMGNoYXRfY29tcGxldGlvbiUzQSUwQSUyMCUyMCUyMCUyMGlmJTIwY2h1bmsuY2hvaWNlcyU1QjAlNUQuZGVsdGEuY29udGVudCUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHJlc3BvbnNlJTIwJTJCJTNEJTIwY2h1bmsuY2hvaWNlcyU1QjAlNUQuZGVsdGEuY29udGVudCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHlpZWxkJTIwcmVzcG9uc2UlMjAlMjAlMjMlMjBZaWVsZCUyMHBhcnRpYWwlMjByZXNwb25zZSUyMGZvciUyMHN0cmVhbWluZyUwQSUwQSUwQSUyMyUyMENyZWF0ZSUyMHN0cmVhbWluZyUyMGludGVyZmFjZSUwQWRlbW8lMjAlM0QlMjBnci5DaGF0SW50ZXJmYWNlKCUwQSUyMCUyMCUyMCUyMGZuJTNEY2hhdF93aXRoX3N0cmVhbWluZyUyQyUwQSUyMCUyMCUyMCUyMHR5cGUlM0QlMjJtZXNzYWdlcyUyMiUyQyUwQSUyMCUyMCUyMCUyMHRpdGxlJTNEJTIyU3RyZWFtaW5nJTIwQ2hhdCUyMHdpdGglMjBJbmZlcmVuY2UlMjBFbmRwb2ludHMlMjIlMkMlMEEpJTBBJTBBZGVtby5sYXVuY2goKSUwQQ==",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">import</span> gradio <span class="hljs-keyword">as</span> gr
<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI
client = OpenAI(base_url=<span class="hljs-string">&quot;&lt;endpoint-url&gt;/v1/&quot;</span>, api_key=os.getenv(<span class="hljs-string">&quot;HF_TOKEN&quot;</span>))
<span class="hljs-keyword">def</span> <span class="hljs-title function_">chat_with_streaming</span>(<span class="hljs-params">message, history</span>):
<span class="hljs-comment"># Convert history to OpenAI format</span>
messages = [{<span class="hljs-string">&quot;role&quot;</span>: msg[<span class="hljs-string">&quot;role&quot;</span>], <span class="hljs-string">&quot;content&quot;</span>: msg[<span class="hljs-string">&quot;content&quot;</span>]} <span class="hljs-keyword">for</span> msg <span class="hljs-keyword">in</span> history]
messages.append({<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: message})
chat_completion = client.chat.completions.create(
model=<span class="hljs-string">&quot;endpoint-name&quot;</span>, <span class="hljs-comment"># Use the name of your endpoint (i.e. qwen3-1.7b-xxxx)</span>
messages=messages,
max_tokens=<span class="hljs-number">150</span>,
temperature=<span class="hljs-number">0.7</span>,
stream=<span class="hljs-literal">True</span>, <span class="hljs-comment"># Enable streaming</span>
)
response = <span class="hljs-string">&quot;&quot;</span>
<span class="hljs-keyword">for</span> chunk <span class="hljs-keyword">in</span> chat_completion:
<span class="hljs-keyword">if</span> chunk.choices[<span class="hljs-number">0</span>].delta.content:
response += chunk.choices[<span class="hljs-number">0</span>].delta.content
<span class="hljs-keyword">yield</span> response <span class="hljs-comment"># Yield partial response for streaming</span>
<span class="hljs-comment"># Create streaming interface</span>
demo = gr.ChatInterface(
fn=chat_with_streaming,
<span class="hljs-built_in">type</span>=<span class="hljs-string">&quot;messages&quot;</span>,
title=<span class="hljs-string">&quot;Streaming Chat with Inference Endpoints&quot;</span>,
)
demo.launch()
`,wrap:!1}}),{c(){b(l.$$.fragment),y=J(),s=T("p"),s.innerHTML=c,u=J(),b(r.$$.fragment)},l(e){B(l.$$.fragment,e),y=j(e),s=U(e,"P",{"data-svelte-h":!0}),w(s)!=="svelte-21doe5"&&(s.innerHTML=c),u=j(e),B(r.$$.fragment,e)},m(e,M){Z(l,e,M),a(e,y,M),a(e,s,M),a(e,u,M),Z(r,e,M),o=!0},p:ft,i(e){o||(f(l.$$.fragment,e),f(r.$$.fragment,e),o=!0)},o(e){g(l.$$.fragment,e),g(r.$$.fragment,e),o=!1},d(e){e&&(n(y),n(s),n(u)),G(l,e),G(r,e)}}}function zl(h){let l,y,s,c="For requests, you can use the streaming approach with the Messages API by setting <code>stream=True</code>:",u,r,o;return l=new E({props:{title:"Requests Library Streaming",local:"requests-library-streaming",headingTag:"h3"}}),r=new W({props:{code:"aW1wb3J0JTIwb3MlMEFpbXBvcnQlMjBncmFkaW8lMjBhcyUyMGdyJTBBaW1wb3J0JTIwcmVxdWVzdHMlMEFpbXBvcnQlMjBqc29uJTBBJTBBQVBJX1VSTCUyMCUzRCUyMCUyMmh0dHBzJTNBJTJGJTJGJTNDaWQlM0UuJTNDcmVnaW9uJTNFLiUzQ2Nsb3VkJTNFLmVuZHBvaW50cy5odWdnaW5nZmFjZS5jbG91ZCUyRnYxJTJGY2hhdCUyRmNvbXBsZXRpb25zJTIyJTBBJTBBaGVhZGVycyUyMCUzRCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMkFjY2VwdCUyMiUzQSUyMCUyMmFwcGxpY2F0aW9uJTJGanNvbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMkNvbnRlbnQtVHlwZSUyMiUzQSUyMCUyMmFwcGxpY2F0aW9uJTJGanNvbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMkF1dGhvcml6YXRpb24lMjIlM0ElMjBmJTIyQmVhcmVyJTIwJTdCb3MuZ2V0ZW52KCdIRl9UT0tFTicpJTdEJTIyJTJDJTBBJTdEJTBBJTBBJTBBZGVmJTIwY2hhdF93aXRoX3JlcXVlc3RzX3N0cmVhbWluZyhtZXNzYWdlJTJDJTIwaGlzdG9yeSklM0ElMEElMjAlMjAlMjAlMjAlMjMlMjBDb252ZXJ0JTIwR3JhZGlvJTIwaGlzdG9yeSUyMHRvJTIwbWVzc2FnZXMlMjBmb3JtYXQlMEElMjAlMjAlMjAlMjBtZXNzYWdlcyUyMCUzRCUyMCU1QiU3QiUyMnJvbGUlMjIlM0ElMjBtc2clNUIlMjJyb2xlJTIyJTVEJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMG1zZyU1QiUyMmNvbnRlbnQlMjIlNUQlN0QlMjBmb3IlMjBtc2clMjBpbiUyMGhpc3RvcnklNUQlMEElMjAlMjAlMjAlMjBtZXNzYWdlcy5hcHBlbmQoJTdCJTIycm9sZSUyMiUzQSUyMCUyMnVzZXIlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwbWVzc2FnZSU3RCklMEElMEElMjAlMjAlMjAlMjAlMjMlMjBQcmVwYXJlJTIwcGF5bG9hZCUyMHVzaW5nJTIwTWVzc2FnZXMlMjBBUEklMjBmb3JtYXQlMEElMjAlMjAlMjAlMjBwYXlsb2FkJTIwJTNEJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybW9kZWwlMjIlM0ElMjAlMjJzbW9sbG0yLTEtN2ItaW5zdHJ1Y3QtbGpuJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybWVzc2FnZXMlMjIlM0ElMjBtZXNzYWdlcyUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm1heF90b2tlbnMlMjIlM0ElMjAxNTAlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ0ZW1wZXJhdHVyZSUyMiUzQSUyMDAuNyUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnN0cmVhbSUyMiUzQSUyMFRydWUlMkMlMjAlMjAlMjMlMjBFbmFibGUlMjBzdHJlYW1pbmclMEElMjAlMjAlMjAlMjAlN0QlMEElMEElMjAlMjAlMjAlMjByZXNwb25zZSUyMCUzRCUyMHJlcXVlc3RzLnBvc3QoQVBJX1VSTCUyQyUyMGhlYWRlcnMlM0RoZWFkZXJzJTJDJTIwanNvbiUzRHBheWxvYWQlMkMlMjBzdHJlYW0lM0RUcnVlKSUwQSUwQSUyMCUyMCUyMCUyMGNvbnRlbnQlMjAlM0QlMjAlMjIlMjIlMEElMEElMjAlMjAlMjAlMjBmb3IlMjBsaW5lJTIwaW4lMjByZXNwb25zZS5pdGVyX2xpbmVzKCklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsaW5lJTIwJTNEJTIwbGluZS5kZWNvZGUoJTIydXRmLTglMjIpJTBBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaWYlMjBsaW5lLnN0YXJ0c3dpdGgoJTIyZGF0YSUzQSUyMCUyMiklMjBhbmQlMjBub3QlMjBsaW5lLmVuZHN3aXRoKCUyMiU1QkRPTkUlNUQlMjIpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZGF0YSUyMCUzRCUyMGpzb24ubG9hZHMobGluZSU1QmxlbiglMjJkYXRhJTNBJTIwJTIyKSUyMCUzQSU1RCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBjaHVuayUyMCUzRCUyMGRhdGElNUIlMjJjaG9pY2VzJTIyJTVEJTVCMCU1RCU1QiUyMmRlbHRhJTIyJTVELmdldCglMjJjb250ZW50JTIyJTJDJTIwJTIyJTIyKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGNvbnRlbnQlMjAlMkIlM0QlMjBjaHVuayUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHlpZWxkJTIwY29udGVudCUwQSUwQSUwQSUyMyUyMENyZWF0ZSUyMHN0cmVhbWluZyUyMGludGVyZmFjZSUwQWRlbW8lMjAlM0QlMjBnci5DaGF0SW50ZXJmYWNlKCUwQSUyMCUyMCUyMCUyMGZuJTNEY2hhdF93aXRoX3JlcXVlc3RzX3N0cmVhbWluZyUyQyUwQSUyMCUyMCUyMCUyMHR5cGUlM0QlMjJtZXNzYWdlcyUyMiUyQyUwQSUyMCUyMCUyMCUyMHRpdGxlJTNEJTIyU3RyZWFtaW5nJTIwQ2hhdCUyMHdpdGglMjBJbmZlcmVuY2UlMjBFbmRwb2ludHMlMjIlMkMlMEEpJTBBJTBBZGVtby5sYXVuY2goKSUwQQ==",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">import</span> gradio <span class="hljs-keyword">as</span> gr
<span class="hljs-keyword">import</span> requests
<span class="hljs-keyword">import</span> json
API_URL = <span class="hljs-string">&quot;https://&lt;id&gt;.&lt;region&gt;.&lt;cloud&gt;.endpoints.huggingface.cloud/v1/chat/completions&quot;</span>
headers = {
<span class="hljs-string">&quot;Accept&quot;</span>: <span class="hljs-string">&quot;application/json&quot;</span>,
<span class="hljs-string">&quot;Content-Type&quot;</span>: <span class="hljs-string">&quot;application/json&quot;</span>,
<span class="hljs-string">&quot;Authorization&quot;</span>: <span class="hljs-string">f&quot;Bearer <span class="hljs-subst">{os.getenv(<span class="hljs-string">&#x27;HF_TOKEN&#x27;</span>)}</span>&quot;</span>,
}
<span class="hljs-keyword">def</span> <span class="hljs-title function_">chat_with_requests_streaming</span>(<span class="hljs-params">message, history</span>):
<span class="hljs-comment"># Convert Gradio history to messages format</span>
messages = [{<span class="hljs-string">&quot;role&quot;</span>: msg[<span class="hljs-string">&quot;role&quot;</span>], <span class="hljs-string">&quot;content&quot;</span>: msg[<span class="hljs-string">&quot;content&quot;</span>]} <span class="hljs-keyword">for</span> msg <span class="hljs-keyword">in</span> history]
messages.append({<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: message})
<span class="hljs-comment"># Prepare payload using Messages API format</span>
payload = {
<span class="hljs-string">&quot;model&quot;</span>: <span class="hljs-string">&quot;smollm2-1-7b-instruct-ljn&quot;</span>,
<span class="hljs-string">&quot;messages&quot;</span>: messages,
<span class="hljs-string">&quot;max_tokens&quot;</span>: <span class="hljs-number">150</span>,
<span class="hljs-string">&quot;temperature&quot;</span>: <span class="hljs-number">0.7</span>,
<span class="hljs-string">&quot;stream&quot;</span>: <span class="hljs-literal">True</span>, <span class="hljs-comment"># Enable streaming</span>
}
response = requests.post(API_URL, headers=headers, json=payload, stream=<span class="hljs-literal">True</span>)
content = <span class="hljs-string">&quot;&quot;</span>
<span class="hljs-keyword">for</span> line <span class="hljs-keyword">in</span> response.iter_lines():
line = line.decode(<span class="hljs-string">&quot;utf-8&quot;</span>)
<span class="hljs-keyword">if</span> line.startswith(<span class="hljs-string">&quot;data: &quot;</span>) <span class="hljs-keyword">and</span> <span class="hljs-keyword">not</span> line.endswith(<span class="hljs-string">&quot;[DONE]&quot;</span>):
data = json.loads(line[<span class="hljs-built_in">len</span>(<span class="hljs-string">&quot;data: &quot;</span>) :])
chunk = data[<span class="hljs-string">&quot;choices&quot;</span>][<span class="hljs-number">0</span>][<span class="hljs-string">&quot;delta&quot;</span>].get(<span class="hljs-string">&quot;content&quot;</span>, <span class="hljs-string">&quot;&quot;</span>)
content += chunk
<span class="hljs-keyword">yield</span> content
<span class="hljs-comment"># Create streaming interface</span>
demo = gr.ChatInterface(
fn=chat_with_requests_streaming,
<span class="hljs-built_in">type</span>=<span class="hljs-string">&quot;messages&quot;</span>,
title=<span class="hljs-string">&quot;Streaming Chat with Inference Endpoints&quot;</span>,
)
demo.launch()
`,wrap:!1}}),{c(){b(l.$$.fragment),y=J(),s=T("p"),s.innerHTML=c,u=J(),b(r.$$.fragment)},l(e){B(l.$$.fragment,e),y=j(e),s=U(e,"P",{"data-svelte-h":!0}),w(s)!=="svelte-1yg3qym"&&(s.innerHTML=c),u=j(e),B(r.$$.fragment,e)},m(e,M){Z(l,e,M),a(e,y,M),a(e,s,M),a(e,u,M),Z(r,e,M),o=!0},p:ft,i(e){o||(f(l.$$.fragment,e),f(r.$$.fragment,e),o=!0)},o(e){g(l.$$.fragment,e),g(r.$$.fragment,e),o=!1},d(e){e&&(n(y),n(s),n(u)),G(l,e),G(r,e)}}}function Yl(h){let l,y,s,c,u,r;return l=new gt({props:{id:"streaming-implementation",option:"hf-client",$$slots:{default:[Rl]},$$scope:{ctx:h}}}),s=new gt({props:{id:"streaming-implementation",option:"openai-client",$$slots:{default:[Xl]},$$scope:{ctx:h}}}),u=new gt({props:{id:"streaming-implementation",option:"requests",$$slots:{default:[zl]},$$scope:{ctx:h}}}),{c(){b(l.$$.fragment),y=J(),b(s.$$.fragment),c=J(),b(u.$$.fragment)},l(o){B(l.$$.fragment,o),y=j(o),B(s.$$.fragment,o),c=j(o),B(u.$$.fragment,o)},m(o,e){Z(l,o,e),a(o,y,e),Z(s,o,e),a(o,c,e),Z(u,o,e),r=!0},p(o,e){const M={};e&2&&(M.$$scope={dirty:e,ctx:o}),l.$set(M);const m={};e&2&&(m.$$scope={dirty:e,ctx:o}),s.$set(m);const d={};e&2&&(d.$$scope={dirty:e,ctx:o}),u.$set(d)},i(o){r||(f(l.$$.fragment,o),f(s.$$.fragment,o),f(u.$$.fragment,o),r=!0)},o(o){g(l.$$.fragment,o),g(s.$$.fragment,o),g(u.$$.fragment,o),r=!1},d(o){o&&(n(y),n(c)),G(l,o),G(s,o),G(u,o)}}}function Fl(h){let l,y,s,c,u,r,o,e,M,m="This tutorial will guide you from end to end on how to deploy your own chat application using Hugging Face Inference Endpoints. We will use Gradio to create a chat interface and an OpenAI client to connect to the Inference Endpoint.",d,I,A,p,C,v,Ce="First, we need to create an Inference Endpoint for a model that can chat.",Gt,k,be=`Start by navigating to the Inference Endpoints UI, and once you have logged in you should see a button for creating a new Inference
Endpoint. Click the “New” button.`,At,Q,Be='<img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/quick_start/1-new-button.png" alt="new-button"/>',Et,N,Ze=`From there you’ll be directed to the catalog. The Model Catalog consists of popular models which have tuned configurations to work just as one-click
deploys. You can filter by name, task, price of the hardware and much more.`,Wt,V,Ge='<img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/quick_start/2-catalog.png" alt="catalog"/>',$t,S,Ae=`In this example let’s deploy the <a href="https://huggingface.co/Qwen/Qwen3-1.7B" rel="nofollow">Qwen/Qwen3-1.7B</a> model. You can find
it by searching for <code>qwen3 1.7b</code> in the search field and deploy it by clicking the card.`,qt,_,Ee='<img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/tutorials/chatbot/qwen-search.png" alt="qwen"/>',vt,R,We=`Next we’ll choose which hardware and deployment settings we’ll go for. Since this is a catalog model, all of the pre-selected options are very good
defaults. So in this case we don’t need to change anything. In case you want a deeper dive on what the different settings mean you can check out
the <a href="./guides/configuration">configuration guide</a>.`,kt,X,$e=`For this model the Nvidia L4 is the recommended choice. It will be perfect for our testing. Performant but still reasonably priced. Also note that by
default the endpoint will scale down to zero, meaning it will become idle after 1h of inactivity.`,Qt,z,qe="Now all you need to do is click click “Create Endpoint” 🚀",Nt,Y,ve='<img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/tutorials/chatbot/config.png" alt="config"/>',Vt,F,ke=`Now our Inference Endpoint is initializing, which usually takes about 3-5 minutes. If you want to can allow browser notifications which will give you a
ping once the endpoint reaches a running state.`,St,x,Qe='<img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/tutorials/chatbot/init.png" alt="init"/>',_t,H,Rt,L,Ne="Now that we’ve created our Inference Endpoint, we can test it in the playground section.",Xt,P,Ve='<img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/tutorials/chatbot/playground.png" alt="playground"/>',zt,D,Se="You can use the model through a chat interface or copy code snippets to use it in your own application.",Yt,K,Ft,O,_e='We need to grab details of our Inference Endpoint, which we can find in the Endpoint’s <a href="https://endpoints.huggingface.co/" rel="nofollow">Overview</a>. We will need the following details:',xt,tt,Re="<li>The base URL of the endpoint plus the version of the OpenAI API (e.g. <code>https://&lt;id&gt;.&lt;region&gt;.&lt;cloud&gt;.endpoints.huggingface.cloud/v1/</code>)</li> <li>The name of the endpoint to use (e.g. <code>qwen3-1-7b-xll</code>)</li> <li>The token to use for authentication (e.g. <code>hf_&lt;token&gt;</code>)</li>",Ht,et,Xe='<img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/tutorials/chatbot/endpoint-page.png" alt="endpoint-details"/>',Lt,lt,ze='We can find the token in your <a href="https://huggingface.co/settings/tokens" rel="nofollow">account settings</a> which is accessible from the top dropdown and clicking on your account name.',Pt,st,Dt,nt,Ye='The easiest way to deploy a chat application with <a href="https://gradio.app/" rel="nofollow">Gradio</a> is to use the convenient <code>load_chat</code> method. This abstracts everything away and you can have a working chat application quickly.',Kt,at,Ot,ot,Fe="The <code>load_chat</code> method won’t cater for your production needs, but it’s a great way to get started and test your application.",te,it,ee,Mt,xe="If you want more control over your chat application, you can build your own custom chat interface with Gradio. This gives you more flexibility to customize the behavior, add features, and handle errors.",le,pt,He="Choose your preferred method for connecting to Inference Endpoints:",se,$,ne,ct,ae,rt,Le="For a better user experience, you can implement streaming responses. This will require us to handle the messages and <code>yield</code> them to the client.",oe,yt,Pe="Here’s how to add streaming to each client:",ie,q,Me,ut,pe,Jt,De="Our app will run on port 7860 and look like this:",ce,jt,Ke='<img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/tutorials/chatbot/app.png" alt="Gradio app"/>',re,ht,Oe="To deploy, we’ll need to create a new Space and upload our files.",ye,mt,tl='<li><strong>Create a new Space</strong>: Go to <a href="https://huggingface.co/new-space" rel="nofollow">huggingface.co/new-space</a></li> <li><strong>Choose Gradio SDK</strong> and make it public</li> <li><strong>Upload your files</strong>: Upload <code>app.py</code></li> <li><strong>Add your token</strong>: In Space settings, add <code>HF_TOKEN</code> as a secret (get it from <a href="https://huggingface.co/settings/tokens" rel="nofollow">your settings</a>)</li> <li><strong>Launch</strong>: Your app will be live at <code>https://huggingface.co/spaces/your-username/your-space-name</code></li>',ue,Tt,el="<p><strong>Note</strong>: While we used CLI authentication locally, Spaces requires the token as a secret for the deployment environment.</p>",Je,Ut,je,wt,ll="That’s it! You now have a chat application running on Hugging Face Spaces powered by Inference Endpoints.",he,dt,sl='Why not level up and try out the <a href="./transcription">next guide</a> to build a Text-to-Speech application?',me,It,Te,bt,Ue;return u=new Zl({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),o=new E({props:{title:"Build and deploy your own chat application",local:"build-and-deploy-your-own-chat-application",headingTag:"h1"}}),I=new Bl({props:{$$slots:{default:[Ql]},$$scope:{ctx:h}}}),p=new E({props:{title:"Create your Inference Endpoint",local:"create-your-inference-endpoint",headingTag:"h2"}}),H=new E({props:{title:"Test your Inference Endpoint in the browser",local:"test-your-inference-endpoint-in-the-browser",headingTag:"h2"}}),K=new E({props:{title:"Get your Inference Endpoint details",local:"get-your-inference-endpoint-details",headingTag:"h2"}}),st=new E({props:{title:"Deploy in a few lines of code",local:"deploy-in-a-few-lines-of-code",headingTag:"h2"}}),at=new W({props:{code:"aW1wb3J0JTIwb3MlMEElMEFpbXBvcnQlMjBncmFkaW8lMjBhcyUyMGdyJTBBJTBBZ3IubG9hZF9jaGF0KCUwQSUyMCUyMCUyMCUyMGJhc2VfdXJsJTNEJTIyJTNDZW5kcG9pbnQtdXJsJTNFJTJGdjElMkYlMjIlMkMlMjAlMjMlMjBSZXBsYWNlJTIwd2l0aCUyMHlvdXIlMjBlbmRwb2ludCUyMFVSTCUyMCUyQiUyMHZlcnNpb24lMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMmVuZHBvaW50LW5hbWUlMjIlMkMlMjAlMjMlMjBSZXBsYWNlJTIwd2l0aCUyMHlvdXIlMjBlbmRwb2ludCUyMG5hbWUlMEElMjAlMjAlMjAlMjB0b2tlbiUzRG9zLmdldGVudiglMjJIRl9UT0tFTiUyMiklMkMlMjAlMjMlMjBSZXBsYWNlJTIwd2l0aCUyMHlvdXIlMjB0b2tlbiUwQSkubGF1bmNoKCk=",highlighted:`<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">import</span> gradio <span class="hljs-keyword">as</span> gr
gr.load_chat(
base_url=<span class="hljs-string">&quot;&lt;endpoint-url&gt;/v1/&quot;</span>, <span class="hljs-comment"># Replace with your endpoint URL + version</span>
model=<span class="hljs-string">&quot;endpoint-name&quot;</span>, <span class="hljs-comment"># Replace with your endpoint name</span>
token=os.getenv(<span class="hljs-string">&quot;HF_TOKEN&quot;</span>), <span class="hljs-comment"># Replace with your token</span>
).launch()`,wrap:!1}}),it=new E({props:{title:"Build your own custom chat application",local:"build-your-own-custom-chat-application",headingTag:"h2"}}),$=new rl({props:{id:"chat-implementation",options:["hf-client","openai-client","requests"],$$slots:{default:[_l]},$$scope:{ctx:h}}}),ct=new E({props:{title:"Adding Streaming Support",local:"adding-streaming-support",headingTag:"h2"}}),q=new rl({props:{id:"streaming-implementation",options:["hf-client","openai-client","requests"],$$slots:{default:[Yl]},$$scope:{ctx:h}}}),ut=new E({props:{title:"Deploy your chat application",local:"deploy-your-chat-application",headingTag:"h2"}}),Ut=new E({props:{title:"Next steps",local:"next-steps",headingTag:"h2"}}),It=new Gl({props:{source:"https://github.com/huggingface/hf-endpoints-documentation/blob/main/docs/source/tutorials/chat_bot.md"}}),{c(){l=T("meta"),y=J(),s=T("p"),c=J(),b(u.$$.fragment),r=J(),b(o.$$.fragment),e=J(),M=T("p"),M.textContent=m,d=J(),b(I.$$.fragment),A=J(),b(p.$$.fragment),C=J(),v=T("p"),v.textContent=Ce,Gt=J(),k=T("p"),k.textContent=be,At=J(),Q=T("p"),Q.innerHTML=Be,Et=J(),N=T("p"),N.textContent=Ze,Wt=J(),V=T("p"),V.innerHTML=Ge,$t=J(),S=T("p"),S.innerHTML=Ae,qt=J(),_=T("p"),_.innerHTML=Ee,vt=J(),R=T("p"),R.innerHTML=We,kt=J(),X=T("p"),X.textContent=$e,Qt=J(),z=T("p"),z.textContent=qe,Nt=J(),Y=T("p"),Y.innerHTML=ve,Vt=J(),F=T("p"),F.textContent=ke,St=J(),x=T("p"),x.innerHTML=Qe,_t=J(),b(H.$$.fragment),Rt=J(),L=T("p"),L.textContent=Ne,Xt=J(),P=T("p"),P.innerHTML=Ve,zt=J(),D=T("p"),D.textContent=Se,Yt=J(),b(K.$$.fragment),Ft=J(),O=T("p"),O.innerHTML=_e,xt=J(),tt=T("ul"),tt.innerHTML=Re,Ht=J(),et=T("p"),et.innerHTML=Xe,Lt=J(),lt=T("p"),lt.innerHTML=ze,Pt=J(),b(st.$$.fragment),Dt=J(),nt=T("p"),nt.innerHTML=Ye,Kt=J(),b(at.$$.fragment),Ot=J(),ot=T("p"),ot.innerHTML=Fe,te=J(),b(it.$$.fragment),ee=J(),Mt=T("p"),Mt.textContent=xe,le=J(),pt=T("p"),pt.textContent=He,se=J(),b($.$$.fragment),ne=J(),b(ct.$$.fragment),ae=J(),rt=T("p"),rt.innerHTML=Le,oe=J(),yt=T("p"),yt.textContent=Pe,ie=J(),b(q.$$.fragment),Me=J(),b(ut.$$.fragment),pe=J(),Jt=T("p"),Jt.textContent=De,ce=J(),jt=T("p"),jt.innerHTML=Ke,re=J(),ht=T("p"),ht.textContent=Oe,ye=J(),mt=T("ol"),mt.innerHTML=tl,ue=J(),Tt=T("blockquote"),Tt.innerHTML=el,Je=J(),b(Ut.$$.fragment),je=J(),wt=T("p"),wt.textContent=ll,he=J(),dt=T("p"),dt.innerHTML=sl,me=J(),b(It.$$.fragment),Te=J(),bt=T("p"),this.h()},l(t){const i=bl("svelte-u9bgzb",document.head);l=U(i,"META",{name:!0,content:!0}),i.forEach(n),y=j(t),s=U(t,"P",{}),Zt(s).forEach(n),c=j(t),B(u.$$.fragment,t),r=j(t),B(o.$$.fragment,t),e=j(t),M=U(t,"P",{"data-svelte-h":!0}),w(M)!=="svelte-1k65zp7"&&(M.textContent=m),d=j(t),B(I.$$.fragment,t),A=j(t),B(p.$$.fragment,t),C=j(t),v=U(t,"P",{"data-svelte-h":!0}),w(v)!=="svelte-15thrvo"&&(v.textContent=Ce),Gt=j(t),k=U(t,"P",{"data-svelte-h":!0}),w(k)!=="svelte-147h3qd"&&(k.textContent=be),At=j(t),Q=U(t,"P",{"data-svelte-h":!0}),w(Q)!=="svelte-dnyg4"&&(Q.innerHTML=Be),Et=j(t),N=U(t,"P",{"data-svelte-h":!0}),w(N)!=="svelte-11utja6"&&(N.textContent=Ze),Wt=j(t),V=U(t,"P",{"data-svelte-h":!0}),w(V)!=="svelte-fxecmn"&&(V.innerHTML=Ge),$t=j(t),S=U(t,"P",{"data-svelte-h":!0}),w(S)!=="svelte-1hfg5l1"&&(S.innerHTML=Ae),qt=j(t),_=U(t,"P",{"data-svelte-h":!0}),w(_)!=="svelte-lar0em"&&(_.innerHTML=Ee),vt=j(t),R=U(t,"P",{"data-svelte-h":!0}),w(R)!=="svelte-1ekjdqo"&&(R.innerHTML=We),kt=j(t),X=U(t,"P",{"data-svelte-h":!0}),w(X)!=="svelte-1baa6m9"&&(X.textContent=$e),Qt=j(t),z=U(t,"P",{"data-svelte-h":!0}),w(z)!=="svelte-uhpblt"&&(z.textContent=qe),Nt=j(t),Y=U(t,"P",{"data-svelte-h":!0}),w(Y)!=="svelte-13bbzr1"&&(Y.innerHTML=ve),Vt=j(t),F=U(t,"P",{"data-svelte-h":!0}),w(F)!=="svelte-1ujocpz"&&(F.textContent=ke),St=j(t),x=U(t,"P",{"data-svelte-h":!0}),w(x)!=="svelte-1vjlki5"&&(x.innerHTML=Qe),_t=j(t),B(H.$$.fragment,t),Rt=j(t),L=U(t,"P",{"data-svelte-h":!0}),w(L)!=="svelte-19rlqt8"&&(L.textContent=Ne),Xt=j(t),P=U(t,"P",{"data-svelte-h":!0}),w(P)!=="svelte-1ka9eqz"&&(P.innerHTML=Ve),zt=j(t),D=U(t,"P",{"data-svelte-h":!0}),w(D)!=="svelte-1pdktab"&&(D.textContent=Se),Yt=j(t),B(K.$$.fragment,t),Ft=j(t),O=U(t,"P",{"data-svelte-h":!0}),w(O)!=="svelte-1lccfab"&&(O.innerHTML=_e),xt=j(t),tt=U(t,"UL",{"data-svelte-h":!0}),w(tt)!=="svelte-zmd562"&&(tt.innerHTML=Re),Ht=j(t),et=U(t,"P",{"data-svelte-h":!0}),w(et)!=="svelte-2v5kr8"&&(et.innerHTML=Xe),Lt=j(t),lt=U(t,"P",{"data-svelte-h":!0}),w(lt)!=="svelte-y2qnlj"&&(lt.innerHTML=ze),Pt=j(t),B(st.$$.fragment,t),Dt=j(t),nt=U(t,"P",{"data-svelte-h":!0}),w(nt)!=="svelte-bfky2a"&&(nt.innerHTML=Ye),Kt=j(t),B(at.$$.fragment,t),Ot=j(t),ot=U(t,"P",{"data-svelte-h":!0}),w(ot)!=="svelte-1gpt8m1"&&(ot.innerHTML=Fe),te=j(t),B(it.$$.fragment,t),ee=j(t),Mt=U(t,"P",{"data-svelte-h":!0}),w(Mt)!=="svelte-1krh3b"&&(Mt.textContent=xe),le=j(t),pt=U(t,"P",{"data-svelte-h":!0}),w(pt)!=="svelte-4kkdzr"&&(pt.textContent=He),se=j(t),B($.$$.fragment,t),ne=j(t),B(ct.$$.fragment,t),ae=j(t),rt=U(t,"P",{"data-svelte-h":!0}),w(rt)!=="svelte-7z4ymg"&&(rt.innerHTML=Le),oe=j(t),yt=U(t,"P",{"data-svelte-h":!0}),w(yt)!=="svelte-ttzscb"&&(yt.textContent=Pe),ie=j(t),B(q.$$.fragment,t),Me=j(t),B(ut.$$.fragment,t),pe=j(t),Jt=U(t,"P",{"data-svelte-h":!0}),w(Jt)!=="svelte-1n5m4re"&&(Jt.textContent=De),ce=j(t),jt=U(t,"P",{"data-svelte-h":!0}),w(jt)!=="svelte-rhi1bh"&&(jt.innerHTML=Ke),re=j(t),ht=U(t,"P",{"data-svelte-h":!0}),w(ht)!=="svelte-13uvzr6"&&(ht.textContent=Oe),ye=j(t),mt=U(t,"OL",{"data-svelte-h":!0}),w(mt)!=="svelte-1yvydwn"&&(mt.innerHTML=tl),ue=j(t),Tt=U(t,"BLOCKQUOTE",{"data-svelte-h":!0}),w(Tt)!=="svelte-1gqrdse"&&(Tt.innerHTML=el),Je=j(t),B(Ut.$$.fragment,t),je=j(t),wt=U(t,"P",{"data-svelte-h":!0}),w(wt)!=="svelte-xl8t4e"&&(wt.textContent=ll),he=j(t),dt=U(t,"P",{"data-svelte-h":!0}),w(dt)!=="svelte-1l2lcqb"&&(dt.innerHTML=sl),me=j(t),B(It.$$.fragment,t),Te=j(t),bt=U(t,"P",{}),Zt(bt).forEach(n),this.h()},h(){Ct(l,"name","hf:doc:metadata"),Ct(l,"content",xl)},m(t,i){de(document.head,l),a(t,y,i),a(t,s,i),a(t,c,i),Z(u,t,i),a(t,r,i),Z(o,t,i),a(t,e,i),a(t,M,i),a(t,d,i),Z(I,t,i),a(t,A,i),Z(p,t,i),a(t,C,i),a(t,v,i),a(t,Gt,i),a(t,k,i),a(t,At,i),a(t,Q,i),a(t,Et,i),a(t,N,i),a(t,Wt,i),a(t,V,i),a(t,$t,i),a(t,S,i),a(t,qt,i),a(t,_,i),a(t,vt,i),a(t,R,i),a(t,kt,i),a(t,X,i),a(t,Qt,i),a(t,z,i),a(t,Nt,i),a(t,Y,i),a(t,Vt,i),a(t,F,i),a(t,St,i),a(t,x,i),a(t,_t,i),Z(H,t,i),a(t,Rt,i),a(t,L,i),a(t,Xt,i),a(t,P,i),a(t,zt,i),a(t,D,i),a(t,Yt,i),Z(K,t,i),a(t,Ft,i),a(t,O,i),a(t,xt,i),a(t,tt,i),a(t,Ht,i),a(t,et,i),a(t,Lt,i),a(t,lt,i),a(t,Pt,i),Z(st,t,i),a(t,Dt,i),a(t,nt,i),a(t,Kt,i),Z(at,t,i),a(t,Ot,i),a(t,ot,i),a(t,te,i),Z(it,t,i),a(t,ee,i),a(t,Mt,i),a(t,le,i),a(t,pt,i),a(t,se,i),Z($,t,i),a(t,ne,i),Z(ct,t,i),a(t,ae,i),a(t,rt,i),a(t,oe,i),a(t,yt,i),a(t,ie,i),Z(q,t,i),a(t,Me,i),Z(ut,t,i),a(t,pe,i),a(t,Jt,i),a(t,ce,i),a(t,jt,i),a(t,re,i),a(t,ht,i),a(t,ye,i),a(t,mt,i),a(t,ue,i),a(t,Tt,i),a(t,Je,i),Z(Ut,t,i),a(t,je,i),a(t,wt,i),a(t,he,i),a(t,dt,i),a(t,me,i),Z(It,t,i),a(t,Te,i),a(t,bt,i),Ue=!0},p(t,[i]){const nl={};i&2&&(nl.$$scope={dirty:i,ctx:t}),I.$set(nl);const al={};i&2&&(al.$$scope={dirty:i,ctx:t}),$.$set(al);const ol={};i&2&&(ol.$$scope={dirty:i,ctx:t}),q.$set(ol)},i(t){Ue||(f(u.$$.fragment,t),f(o.$$.fragment,t),f(I.$$.fragment,t),f(p.$$.fragment,t),f(H.$$.fragment,t),f(K.$$.fragment,t),f(st.$$.fragment,t),f(at.$$.fragment,t),f(it.$$.fragment,t),f($.$$.fragment,t),f(ct.$$.fragment,t),f(q.$$.fragment,t),f(ut.$$.fragment,t),f(Ut.$$.fragment,t),f(It.$$.fragment,t),Ue=!0)},o(t){g(u.$$.fragment,t),g(o.$$.fragment,t),g(I.$$.fragment,t),g(p.$$.fragment,t),g(H.$$.fragment,t),g(K.$$.fragment,t),g(st.$$.fragment,t),g(at.$$.fragment,t),g(it.$$.fragment,t),g($.$$.fragment,t),g(ct.$$.fragment,t),g(q.$$.fragment,t),g(ut.$$.fragment,t),g(Ut.$$.fragment,t),g(It.$$.fragment,t),Ue=!1},d(t){t&&(n(y),n(s),n(c),n(r),n(e),n(M),n(d),n(A),n(C),n(v),n(Gt),n(k),n(At),n(Q),n(Et),n(N),n(Wt),n(V),n($t),n(S),n(qt),n(_),n(vt),n(R),n(kt),n(X),n(Qt),n(z),n(Nt),n(Y),n(Vt),n(F),n(St),n(x),n(_t),n(Rt),n(L),n(Xt),n(P),n(zt),n(D),n(Yt),n(Ft),n(O),n(xt),n(tt),n(Ht),n(et),n(Lt),n(lt),n(Pt),n(Dt),n(nt),n(Kt),n(Ot),n(ot),n(te),n(ee),n(Mt),n(le),n(pt),n(se),n(ne),n(ae),n(rt),n(oe),n(yt),n(ie),n(Me),n(pe),n(Jt),n(ce),n(jt),n(re),n(ht),n(ye),n(mt),n(ue),n(Tt),n(Je),n(je),n(wt),n(he),n(dt),n(me),n(Te),n(bt)),n(l),G(u,t),G(o,t),G(I,t),G(p,t),G(H,t),G(K,t),G(st,t),G(at,t),G(it,t),G($,t),G(ct,t),G(q,t),G(ut,t),G(Ut,t),G(It,t)}}}const xl='{"title":"Build and deploy your own chat application","local":"build-and-deploy-your-own-chat-application","sections":[{"title":"Create your Inference Endpoint","local":"create-your-inference-endpoint","sections":[],"depth":2},{"title":"Test your Inference Endpoint in the browser","local":"test-your-inference-endpoint-in-the-browser","sections":[],"depth":2},{"title":"Get your Inference Endpoint details","local":"get-your-inference-endpoint-details","sections":[],"depth":2},{"title":"Deploy in a few lines of code","local":"deploy-in-a-few-lines-of-code","sections":[],"depth":2},{"title":"Build your own custom chat application","local":"build-your-own-custom-chat-application","sections":[],"depth":2},{"title":"Adding Streaming Support","local":"adding-streaming-support","sections":[{"title":"Hugging Face InferenceClient Streaming","local":"hugging-face-inferenceclient-streaming","sections":[],"depth":3},{"title":"OpenAI Client Streaming","local":"openai-client-streaming","sections":[],"depth":3},{"title":"Requests Library Streaming","local":"requests-library-streaming","sections":[],"depth":3}],"depth":2},{"title":"Deploy your chat application","local":"deploy-your-chat-application","sections":[],"depth":2},{"title":"Next steps","local":"next-steps","sections":[],"depth":2}],"depth":1}';function Hl(h){return Tl(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ls extends fe{constructor(l){super(),ge(this,l,Hl,Fl,Ie,{})}}export{ls as component};

Xet Storage Details

Size:
63.2 kB
·
Xet hash:
a52ad058d8f7b30d914c39265f183f59f5b2f6939ae6c7be97f805b534b47720

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.