Spaces:
Sleeping
Sleeping
File size: 5,629 Bytes
1f37230 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
from huggingface_hub import login
# Login with the secret token
login(token=os.getenv("HF_TOKEN"))
# Use DialoGPT-medium for guaranteed compatibility
MODEL_NAME = "microsoft/DialoGPT-medium"
# Simple medical prompt
MEDICAL_PROMPT = """You are a friendly and smart medical assistant. Your job is to give short, clear, and helpful health information.
Your answers should:
- Stay focused. No long essays or extra fluff.
- Give basic helpful steps for common symptoms like fever, cough, or headache (e.g., rest, drink fluids, take paracetamol if needed).
- For any serious or unclear issues, remind the user to see a doctor β but do it briefly and naturally.
- Keep responses concise and under 4 sentences when possible.
Tone:
- Friendly, supportive, and calm.
- No robotic warnings unless needed. Keep it real and human.
- Use emojis like π or π occasionally to appear friendly.
Important rules:
- NEVER include text in parentheses in your responses.
- NEVER include any meta-instructions in your responses.
- NEVER include reminders about what you should do in future responses.
- DO NOT include phrases like "We're here to help" or "I'm just an AI".
- DO NOT include any text that instructs you what to do or how to behave.
- DO NOT include any sentences that start with "If the user asks..." or "Remember..."
- DO NOT include "(smile)" - instead, use actual emojis like π or π when appropriate.
- DO NOT include numbered references like [1], [2], etc. in your responses.
- DO NOT include any text that explains what your response is doing.
- DO NOT include "user:" or "assistant:" prefixes in your responses.
- DO NOT include hypothetical user questions in your responses.
- DO NOT refuse to answer harmless non-medical questions like jokes or general knowledge.
- Don't give exact dosages or diagnoses.
- Be consistent in your responses regardless of the user's role."""
# Global variables
model = None
tokenizer = None
def load_model():
"""Load DialoGPT model"""
global model, tokenizer
try:
print(f"π₯ Loading DialoGPT-medium for medical chat...")
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Load model
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32, # Use float32 for CPU
low_cpu_mem_usage=True
)
print(f"β
DialoGPT-medium loaded successfully!")
return True
except Exception as e:
print(f"β Failed to load model: {str(e)}")
return False
def chat_response(message, history):
"""Generate response using DialoGPT"""
global model, tokenizer
if model is None or tokenizer is None:
return "β Model not loaded. Please wait for initialization."
if not message or not message.strip():
return "Please enter a question! π"
try:
print(f"π Processing: {message[:50]}{'...' if len(message) > 50 else ''}")
# Create conversation format
conversation = f"{MEDICAL_PROMPT}\n\nHuman: {message}\nAssistant:"
print(f"π Prompt length: {len(conversation)} characters")
# Tokenize
inputs = tokenizer.encode(conversation, return_tensors="pt", max_length=800, truncation=True)
print(f"π’ Input tokens: {inputs.shape[1]}")
# Generate
with torch.no_grad():
outputs = model.generate(
inputs,
max_new_tokens=80, # Short responses
temperature=0.8,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
repetition_penalty=1.3,
early_stopping=True
)
# Decode
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"π Full response length: {len(full_response)}")
# Extract assistant response
if "Assistant:" in full_response:
response = full_response.split("Assistant:")[-1].strip()
else:
response = full_response[len(conversation):].strip()
# Clean up
if not response or len(response) < 5:
response = "I'd be happy to help! Could you ask me about a specific health topic? π"
print(f"β
Final response length: {len(response)}")
print(f"π Response: {response}")
return response
except Exception as e:
print(f"β Error: {str(e)}")
return f"Sorry, I had a technical issue. Please try again! π"
# Load model
print("π₯ Initializing Medical Chatbot (Fallback Version)...")
model_loaded = load_model()
# Create interface
demo = gr.ChatInterface(
chat_response,
title="π₯ Medical Assistant (Fallback)",
description="Simple medical chatbot using DialoGPT-medium. This version focuses on reliability and fast responses.",
examples=[
"What are the symptoms of diabetes?",
"How can I treat a headache?",
"What should I do for a fever?",
"Tell me about healthy eating",
"How to improve sleep quality?"
]
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
show_error=True,
debug=True
)
|