medical_model / app_fallback.py
Deva1211's picture
Fixed parameters
1f37230
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
from huggingface_hub import login
# Login with the secret token
login(token=os.getenv("HF_TOKEN"))
# Use DialoGPT-medium for guaranteed compatibility
MODEL_NAME = "microsoft/DialoGPT-medium"
# Simple medical prompt
MEDICAL_PROMPT = """You are a friendly and smart medical assistant. Your job is to give short, clear, and helpful health information.
Your answers should:
- Stay focused. No long essays or extra fluff.
- Give basic helpful steps for common symptoms like fever, cough, or headache (e.g., rest, drink fluids, take paracetamol if needed).
- For any serious or unclear issues, remind the user to see a doctor β€” but do it briefly and naturally.
- Keep responses concise and under 4 sentences when possible.
Tone:
- Friendly, supportive, and calm.
- No robotic warnings unless needed. Keep it real and human.
- Use emojis like 😊 or πŸ‘ occasionally to appear friendly.
Important rules:
- NEVER include text in parentheses in your responses.
- NEVER include any meta-instructions in your responses.
- NEVER include reminders about what you should do in future responses.
- DO NOT include phrases like "We're here to help" or "I'm just an AI".
- DO NOT include any text that instructs you what to do or how to behave.
- DO NOT include any sentences that start with "If the user asks..." or "Remember..."
- DO NOT include "(smile)" - instead, use actual emojis like 😊 or πŸ‘ when appropriate.
- DO NOT include numbered references like [1], [2], etc. in your responses.
- DO NOT include any text that explains what your response is doing.
- DO NOT include "user:" or "assistant:" prefixes in your responses.
- DO NOT include hypothetical user questions in your responses.
- DO NOT refuse to answer harmless non-medical questions like jokes or general knowledge.
- Don't give exact dosages or diagnoses.
- Be consistent in your responses regardless of the user's role."""
# Global variables
model = None
tokenizer = None
def load_model():
"""Load DialoGPT model"""
global model, tokenizer
try:
print(f"πŸ₯ Loading DialoGPT-medium for medical chat...")
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Load model
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32, # Use float32 for CPU
low_cpu_mem_usage=True
)
print(f"βœ… DialoGPT-medium loaded successfully!")
return True
except Exception as e:
print(f"❌ Failed to load model: {str(e)}")
return False
def chat_response(message, history):
"""Generate response using DialoGPT"""
global model, tokenizer
if model is None or tokenizer is None:
return "❌ Model not loaded. Please wait for initialization."
if not message or not message.strip():
return "Please enter a question! 😊"
try:
print(f"πŸ”„ Processing: {message[:50]}{'...' if len(message) > 50 else ''}")
# Create conversation format
conversation = f"{MEDICAL_PROMPT}\n\nHuman: {message}\nAssistant:"
print(f"πŸ“ Prompt length: {len(conversation)} characters")
# Tokenize
inputs = tokenizer.encode(conversation, return_tensors="pt", max_length=800, truncation=True)
print(f"πŸ”’ Input tokens: {inputs.shape[1]}")
# Generate
with torch.no_grad():
outputs = model.generate(
inputs,
max_new_tokens=80, # Short responses
temperature=0.8,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
repetition_penalty=1.3,
early_stopping=True
)
# Decode
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"πŸ” Full response length: {len(full_response)}")
# Extract assistant response
if "Assistant:" in full_response:
response = full_response.split("Assistant:")[-1].strip()
else:
response = full_response[len(conversation):].strip()
# Clean up
if not response or len(response) < 5:
response = "I'd be happy to help! Could you ask me about a specific health topic? 😊"
print(f"βœ… Final response length: {len(response)}")
print(f"πŸ“„ Response: {response}")
return response
except Exception as e:
print(f"❌ Error: {str(e)}")
return f"Sorry, I had a technical issue. Please try again! 😊"
# Load model
print("πŸ₯ Initializing Medical Chatbot (Fallback Version)...")
model_loaded = load_model()
# Create interface
demo = gr.ChatInterface(
chat_response,
title="πŸ₯ Medical Assistant (Fallback)",
description="Simple medical chatbot using DialoGPT-medium. This version focuses on reliability and fast responses.",
examples=[
"What are the symptoms of diabetes?",
"How can I treat a headache?",
"What should I do for a fever?",
"Tell me about healthy eating",
"How to improve sleep quality?"
]
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
show_error=True,
debug=True
)