Raemi commited on
Commit
436ee5f
·
verified ·
1 Parent(s): 800ea60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -17
app.py CHANGED
@@ -2,8 +2,8 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
 
5
- # 🔹 Load HF token from secret
6
- HF_TOKEN = os.environ.get('telemedpro') # store your HF token in Space Secrets
7
 
8
  # 🔹 Fixed persona system message
9
  PERSONA_MESSAGE = (
@@ -13,14 +13,15 @@ PERSONA_MESSAGE = (
13
  "You behave politely, patiently, and with care, like a trusted family doctor."
14
  )
15
 
16
- # 🔹 Create InferenceClient once
17
  client = InferenceClient(token=HF_TOKEN, model="m42-health/Llama3-Med42-70B")
18
 
19
- def respond(message, history, max_tokens, temperature, top_p):
 
20
  try:
21
  messages = [{"role": "system", "content": PERSONA_MESSAGE}]
22
 
23
- # Add previous conversation
24
  if history:
25
  for h in history:
26
  user_msg = h[0] if h[0] else ""
@@ -28,25 +29,21 @@ def respond(message, history, max_tokens, temperature, top_p):
28
  messages.append({"role": "user", "content": user_msg})
29
  messages.append({"role": "assistant", "content": ai_msg})
30
 
31
- # Add current user message
32
  messages.append({"role": "user", "content": message})
33
 
34
- response = ""
35
- # Stream model output token by token
36
- for msg in client.chat_completion(
37
  messages,
38
  max_tokens=max_tokens,
39
- stream=True,
40
  temperature=temperature,
41
- top_p=top_p,
42
- ):
43
- if msg.choices and msg.choices[0].delta.content:
44
- token = msg.choices[0].delta.content
45
- response += token
46
- yield response
47
 
48
  except Exception as e:
49
- yield f"⚠️ Space error: {e}"
50
 
51
  # 🔹 Gradio Chat Interface
52
  chatbot = gr.ChatInterface(
 
2
  from huggingface_hub import InferenceClient
3
  import os
4
 
5
+ # 🔹 Load HF token from Space Secrets
6
+ HF_TOKEN = os.environ.get('telemedpro') # Add your token in Space Settings → Secrets
7
 
8
  # 🔹 Fixed persona system message
9
  PERSONA_MESSAGE = (
 
13
  "You behave politely, patiently, and with care, like a trusted family doctor."
14
  )
15
 
16
+ # 🔹 Initialize InferenceClient once
17
  client = InferenceClient(token=HF_TOKEN, model="m42-health/Llama3-Med42-70B")
18
 
19
+ # 🔹 Respond function (non-streaming for stability)
20
+ def respond(message, history, max_tokens=512, temperature=0.7, top_p=0.95):
21
  try:
22
  messages = [{"role": "system", "content": PERSONA_MESSAGE}]
23
 
24
+ # Append previous conversation safely
25
  if history:
26
  for h in history:
27
  user_msg = h[0] if h[0] else ""
 
29
  messages.append({"role": "user", "content": user_msg})
30
  messages.append({"role": "assistant", "content": ai_msg})
31
 
32
+ # Append current user message
33
  messages.append({"role": "user", "content": message})
34
 
35
+ # 🔹 Non-streaming call for stability
36
+ result = client.chat_completion(
 
37
  messages,
38
  max_tokens=max_tokens,
 
39
  temperature=temperature,
40
+ top_p=top_p
41
+ )
42
+
43
+ return result.choices[0].message.content
 
 
44
 
45
  except Exception as e:
46
+ return f"⚠️ Space error: {e}"
47
 
48
  # 🔹 Gradio Chat Interface
49
  chatbot = gr.ChatInterface(