File size: 5,319 Bytes
8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 8c2c9df 8aceec0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
import time
# =======================================================
# Global session state for multi-step questioning
# =======================================================
session_answers = {}
# =======================================================
# Load Model
# =======================================================
model_name = "augtoma/qCammel-13"
print("Loading tokenizer and model...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
torch_dtype=torch.float16,
trust_remote_code=True,
low_cpu_mem_usage=True
)
model.eval()
print("Model loaded successfully!")
print(f"Device map: {model.hf_device_map}")
print(f"Model device: {next(model.parameters()).device}")
# =======================================================
# Generate Doctor Response
# =======================================================
def generate_doctor_response(history):
global session_answers
user_message = history[-1]["content"]
if not user_message.strip():
history.append({"role": "assistant", "content": "⚠️ Please describe your symptoms or ask a question."})
yield history
return
# Build prompt with context
prompt = """You are an experienced doctor. Ask **one question at a time** to understand the patient's condition. Provide advice only after gathering enough information. Be concise, caring, and professional.\n\n"""
recent_history = history[-10:-1] if len(history) > 10 else history[:-1]
for msg in recent_history:
role = "Patient" if msg["role"] == "user" else "Doctor"
content = msg['content'].replace("⚕️ *Note: This is AI-generated information*", "").strip()
prompt += f"{role}: {content}\n"
prompt += f"Patient: {user_message}\nDoctor:"
# Tokenize input
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generation configuration for concise, interactive answers
gen_config = GenerationConfig(
temperature=0.7,
top_p=0.9,
do_sample=True,
max_new_tokens=80, # short answers
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
repetition_penalty=1.2
)
input_len = inputs["input_ids"].shape[1]
with torch.no_grad():
output_ids = model.generate(**inputs, generation_config=gen_config)
generated_ids = output_ids[0][input_len:]
response = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
# Take only first 2-3 sentences to make it concise
response = ". ".join(response.split(". ")[:3]).strip()
if response.lower().startswith("doctor:"):
response = response[7:].strip()
if len(response) < 10:
response = "I understand your concern. Could you please provide more details about your symptoms?"
# Add assistant placeholder for streaming
history.append({"role": "assistant", "content": ""})
# Stream response token by token
for i in range(0, len(response), 4):
chunk = response[:i+4]
history[-1]["content"] = chunk + "▌"
yield history.copy()
time.sleep(0.015)
# Final response
history[-1]["content"] = response
yield history
# =======================================================
# Gradio Interface
# =======================================================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🩺 AI Doctor Chat Assistant")
chatbot = gr.Chatbot(
label="💬 Doctor Consultation",
type='messages',
avatar_images=(
"https://cdn-icons-png.flaticon.com/512/706/706830.png", # Patient
"https://cdn-icons-png.flaticon.com/512/3774/3774299.png" # Doctor
),
height=500
)
with gr.Row():
user_input = gr.Textbox(
placeholder="Type your symptoms or question here...",
label="🧍 Your Message",
lines=2,
scale=4
)
with gr.Row():
send_btn = gr.Button("💬 Send", variant="primary", scale=1)
clear_btn = gr.Button("🧹 Clear Chat", scale=1)
gr.Examples(
examples=[
"I have a fever of 102°F since yesterday",
"I've been having headaches for the past week",
"I feel very tired all the time",
"I have a sore throat and body aches",
],
inputs=user_input,
label="💡 Example Questions"
)
def respond(message, history):
if history is None:
history = []
if not message.strip():
return "", history
history.append({"role": "user", "content": message})
for updated_history in generate_doctor_response(history):
yield "", updated_history
send_btn.click(respond, [user_input, chatbot], [user_input, chatbot])
user_input.submit(respond, [user_input, chatbot], [user_input, chatbot])
clear_btn.click(lambda: [], None, chatbot, queue=False)
# Launch
if __name__ == "__main__":
demo.queue()
demo.launch(share=True)
|