Elieon commited on
Commit
cff74fc
·
verified ·
1 Parent(s): 26dfa1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -5,21 +5,24 @@ import os
5
  # Load secrets
6
  system_message = os.environ["SYSTEM_MESSAGE"]
7
  HF_TOKEN = os.environ["HF_TOKEN"]
8
- MODEL_NAME = os.environ["MODEL_NAME"] # <-- Add this
9
 
10
  client = InferenceClient(token=HF_TOKEN)
11
 
12
- # Chat function — stripped down for speed
13
  def respond(message, history, max_tokens, temperature, top_p):
 
14
  prompt = [{"role": "system", "content": system_message}]
15
- for user, bot in history:
16
- if user: prompt.append({"role": "user", "content": user})
17
- if bot: prompt.append({"role": "assistant", "content": bot})
 
 
18
  prompt.append({"role": "user", "content": message})
19
 
20
  response = []
21
  stream = client.chat.completions.create(
22
- model=MODEL_NAME, # <-- use the secret here
23
  messages=prompt,
24
  max_tokens=max_tokens,
25
  temperature=temperature,
@@ -28,7 +31,10 @@ def respond(message, history, max_tokens, temperature, top_p):
28
  )
29
 
30
  for chunk in stream:
31
- token = chunk.choices[0].delta.content
 
 
 
32
  if token:
33
  response.append(token)
34
  yield "".join(response)
@@ -36,6 +42,7 @@ def respond(message, history, max_tokens, temperature, top_p):
36
  # UI
37
  app = gr.ChatInterface(
38
  fn=respond,
 
39
  additional_inputs=[
40
  gr.Slider(16, 2048, value=512, step=1, label="Max Tokens"),
41
  gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
 
5
  # Load secrets
6
  system_message = os.environ["SYSTEM_MESSAGE"]
7
  HF_TOKEN = os.environ["HF_TOKEN"]
8
+ MODEL_NAME = os.environ["MODEL_NAME"]
9
 
10
  client = InferenceClient(token=HF_TOKEN)
11
 
12
+ # Chat function
13
  def respond(message, history, max_tokens, temperature, top_p):
14
+ # Build prompt in OpenAI-style
15
  prompt = [{"role": "system", "content": system_message}]
16
+ for msg in history:
17
+ if msg["role"] == "user":
18
+ prompt.append({"role": "user", "content": msg["content"]})
19
+ elif msg["role"] == "assistant":
20
+ prompt.append({"role": "assistant", "content": msg["content"]})
21
  prompt.append({"role": "user", "content": message})
22
 
23
  response = []
24
  stream = client.chat.completions.create(
25
+ model=MODEL_NAME,
26
  messages=prompt,
27
  max_tokens=max_tokens,
28
  temperature=temperature,
 
31
  )
32
 
33
  for chunk in stream:
34
+ if not chunk.choices:
35
+ continue # skip empty chunks safely
36
+ delta = chunk.choices[0].delta
37
+ token = getattr(delta, "content", None)
38
  if token:
39
  response.append(token)
40
  yield "".join(response)
 
42
  # UI
43
  app = gr.ChatInterface(
44
  fn=respond,
45
+ type="messages", # ✅ fixes Gradio warning
46
  additional_inputs=[
47
  gr.Slider(16, 2048, value=512, step=1, label="Max Tokens"),
48
  gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),