File size: 2,340 Bytes
a5edc64
 
0eeeabc
a5edc64
4e1334f
0eeeabc
a5edc64
4e1334f
0eeeabc
 
 
 
 
 
a5edc64
4e1334f
0eeeabc
a5edc64
4e1334f
0eeeabc
 
 
 
a5edc64
4e1334f
0eeeabc
4e1334f
a5edc64
0eeeabc
 
a5edc64
0eeeabc
 
 
 
 
 
 
 
 
 
 
 
 
a5edc64
0eeeabc
 
a5edc64
0eeeabc
a5edc64
0eeeabc
a5edc64
 
0eeeabc
 
 
 
a5edc64
 
 
0eeeabc
a5edc64
0eeeabc
a5edc64
 
4e1334f
a5edc64
0eeeabc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import gradio as gr
from huggingface_hub import InferenceClient
import os

# ๐Ÿ”น Load HF token from Space Secrets
HF_TOKEN = os.environ.get('telemedpro')

# ๐Ÿ”น Fixed persona system message
SYSTEM_MESSAGE = (
    "You are Dr. Alex, a highly knowledgeable yet empathetic doctor. "
    "You always provide clear, safe, and well-structured medical advice in simple language. "
    "You avoid making unsafe claims and encourage users to seek professional help when needed. "
    "You behave politely, patiently, and with care, like a trusted family doctor."
)

# ๐Ÿ”น Initialize InferenceClient once
client = InferenceClient(token=HF_TOKEN, model="m42-health/Llama3-Med42-70B")

# ๐Ÿ”น Respond function
def respond(message, history, system_message=SYSTEM_MESSAGE, max_tokens=512, temperature=0.7, top_p=0.95):
    try:
        # Start with system message
        messages = [{"role": "system", "content": system_message}]

        # Append previous conversation safely
        if history:
            messages.extend(history)  # โœ… safe, don't manipulate

        # Append current user message
        messages.append({"role": "user", "content": message})

        # Stream model output
        response = ""
        for msg in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            if msg.choices and hasattr(msg.choices[0].delta, "content") and msg.choices[0].delta.content:
                token = msg.choices[0].delta.content
                response += token
                yield response

    except Exception as e:
        yield f"โš ๏ธ Space error: {e}"

# ๐Ÿ”น Gradio Chat Interface
chatbot = gr.ChatInterface(
    fn=respond,
    type="messages",
    additional_inputs=[
        gr.Textbox(value=SYSTEM_MESSAGE, label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
    ],
)

# ๐Ÿ”น Layout
with gr.Blocks() as demo:
    gr.Markdown("## ๐Ÿฉบ AI Health Mentor โ€” Dr. Alex")
    chatbot.render()

# ๐Ÿ”น Launch
if __name__ == "__main__":
    demo.launch(show_error=True)