VcRlAgent commited on
Commit
54b83a6
·
1 Parent(s): 3aacf33

Starter LLM Inference Call

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -3,10 +3,14 @@ import gradio as gr
3
  from openai import OpenAI
4
 
5
  # Initialize HF Router client using OpenAI SDK
 
6
  client = OpenAI(
7
  base_url="https://router.huggingface.co/v1",
8
  api_key=os.environ["HF_TOKEN"], # ensure HF_TOKEN is set
9
  )
 
 
 
10
 
11
  # LLM function
12
  def ask_llm(prompt):
@@ -16,7 +20,8 @@ def ask_llm(prompt):
16
  messages=[
17
  {"role": "user", "content": prompt}
18
  ],
19
- max_tokens=200
 
20
  )
21
  #return completion.choices[0].message["content"]
22
  return completion.choices[0].message.content
@@ -29,8 +34,8 @@ demo = gr.Interface(
29
  fn=ask_llm,
30
  inputs=gr.Textbox(lines=3, label="Ask the AI"),
31
  outputs=gr.Textbox(label="Response"),
32
- title="HF Router LLM Demo",
33
- description="Powered by HuggingFace Router + OpenAI SDK client."
34
  )
35
 
36
  demo.launch()
 
3
  from openai import OpenAI
4
 
5
  # Initialize HF Router client using OpenAI SDK
6
+ '''
7
  client = OpenAI(
8
  base_url="https://router.huggingface.co/v1",
9
  api_key=os.environ["HF_TOKEN"], # ensure HF_TOKEN is set
10
  )
11
+ '''
12
+
13
+ client = InferenceClient(api_key=os.environ["HF_TOKEN"])
14
 
15
  # LLM function
16
  def ask_llm(prompt):
 
20
  messages=[
21
  {"role": "user", "content": prompt}
22
  ],
23
+ max_tokens=200,
24
+ temperature=0.7
25
  )
26
  #return completion.choices[0].message["content"]
27
  return completion.choices[0].message.content
 
34
  fn=ask_llm,
35
  inputs=gr.Textbox(lines=3, label="Ask the AI"),
36
  outputs=gr.Textbox(label="Response"),
37
+ title="HF Inference Client LLM Demo",
38
+ description="Powered by HuggingFace InferenceClient SDK."
39
  )
40
 
41
  demo.launch()