asusf15 commited on
Commit
19b2a18
·
verified ·
1 Parent(s): 3bf3c5d

Fix: handle streaming response format correctly

Browse files
Files changed (1) hide show
  1. app.py +20 -29
app.py CHANGED
@@ -6,24 +6,9 @@ import gradio as gr
6
  from huggingface_hub import InferenceClient
7
 
8
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
9
 
10
- # Try multiple providers in order of preference
11
- def get_client():
12
- models = [
13
- "Qwen/Qwen2.5-72B-Instruct",
14
- "Qwen/Qwen2.5-7B-Instruct",
15
- "meta-llama/Llama-3.1-8B-Instruct",
16
- ]
17
- for model in models:
18
- try:
19
- c = InferenceClient(model, token=HF_TOKEN)
20
- # Quick test
21
- return c, model
22
- except Exception:
23
- continue
24
- return InferenceClient(models[0], token=HF_TOKEN), models[0]
25
-
26
- client, MODEL_USED = get_client()
27
 
28
  SYSTEM_PROMPT = """You are DeepMed-R1, a medical reasoning AI trained with GRPO and multi-objective clinical rewards on AMD MI300X.
29
 
@@ -57,24 +42,30 @@ def respond(message, history):
57
 
58
  response = ""
59
  try:
60
- for token in client.chat_completion(messages=messages, max_tokens=3000, temperature=0.3, top_p=0.95, stream=True):
61
- delta = token.choices[0].delta.content or ""
62
- response += delta
63
- yield response
 
 
 
 
 
 
 
 
 
 
64
  except Exception as e:
65
  error_msg = str(e)
66
- if "api_key" in error_msg.lower() or "token" in error_msg.lower() or "401" in error_msg:
67
- yield ("⚠️ **Authentication Required**\n\n"
68
- "Please add your HF_TOKEN as a Space secret:\n"
69
- "1. Go to Space Settings → Repository secrets\n"
70
- "2. Add secret: Name=`HF_TOKEN`, Value=your token\n\n"
71
- f"Error: {error_msg}")
72
  else:
73
- yield f"Error: {error_msg}\n\nPlease try again."
74
 
75
 
76
  with gr.Blocks(title="DeepMed-R1", theme=gr.themes.Soft(primary_hue="blue")) as demo:
77
- gr.Markdown(f"""
78
  # 🏥 DeepMed-R1: Medical Reasoning AI
79
 
80
  **Systematic clinical reasoning powered by GRPO + Multi-Objective Clinical Rewards**
 
6
  from huggingface_hub import InferenceClient
7
 
8
  HF_TOKEN = os.environ.get("HF_TOKEN")
9
+ client = InferenceClient(token=HF_TOKEN)
10
 
11
+ MODEL = "Qwen/Qwen2.5-72B-Instruct"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  SYSTEM_PROMPT = """You are DeepMed-R1, a medical reasoning AI trained with GRPO and multi-objective clinical rewards on AMD MI300X.
14
 
 
42
 
43
  response = ""
44
  try:
45
+ stream = client.chat_completion(
46
+ model=MODEL,
47
+ messages=messages,
48
+ max_tokens=3000,
49
+ temperature=0.3,
50
+ top_p=0.95,
51
+ stream=True,
52
+ )
53
+ for chunk in stream:
54
+ if chunk.choices and len(chunk.choices) > 0:
55
+ delta = chunk.choices[0].delta
56
+ if hasattr(delta, "content") and delta.content:
57
+ response += delta.content
58
+ yield response
59
  except Exception as e:
60
  error_msg = str(e)
61
+ if not response:
62
+ yield f"⚠️ Error: {error_msg}\n\nPlease ensure HF_TOKEN is set in Space secrets."
 
 
 
 
63
  else:
64
+ yield response
65
 
66
 
67
  with gr.Blocks(title="DeepMed-R1", theme=gr.themes.Soft(primary_hue="blue")) as demo:
68
+ gr.Markdown("""
69
  # 🏥 DeepMed-R1: Medical Reasoning AI
70
 
71
  **Systematic clinical reasoning powered by GRPO + Multi-Objective Clinical Rewards**