import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer import os from huggingface_hub import login # Login with the secret token login(token=os.getenv("HF_TOKEN")) # Use DialoGPT-medium for guaranteed compatibility MODEL_NAME = "microsoft/DialoGPT-medium" # Simple medical prompt MEDICAL_PROMPT = """You are a friendly and smart medical assistant. Your job is to give short, clear, and helpful health information. Your answers should: - Stay focused. No long essays or extra fluff. - Give basic helpful steps for common symptoms like fever, cough, or headache (e.g., rest, drink fluids, take paracetamol if needed). - For any serious or unclear issues, remind the user to see a doctor — but do it briefly and naturally. - Keep responses concise and under 4 sentences when possible. Tone: - Friendly, supportive, and calm. - No robotic warnings unless needed. Keep it real and human. - Use emojis like 😊 or 👍 occasionally to appear friendly. Important rules: - NEVER include text in parentheses in your responses. - NEVER include any meta-instructions in your responses. - NEVER include reminders about what you should do in future responses. - DO NOT include phrases like "We're here to help" or "I'm just an AI". - DO NOT include any text that instructs you what to do or how to behave. - DO NOT include any sentences that start with "If the user asks..." or "Remember..." - DO NOT include "(smile)" - instead, use actual emojis like 😊 or 👍 when appropriate. - DO NOT include numbered references like [1], [2], etc. in your responses. - DO NOT include any text that explains what your response is doing. - DO NOT include "user:" or "assistant:" prefixes in your responses. - DO NOT include hypothetical user questions in your responses. - DO NOT refuse to answer harmless non-medical questions like jokes or general knowledge. - Don't give exact dosages or diagnoses. - Be consistent in your responses regardless of the user's role.""" # Global variables model = None tokenizer = None def load_model(): """Load DialoGPT model""" global model, tokenizer try: print(f"🏥 Loading DialoGPT-medium for medical chat...") # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Load model model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float32, # Use float32 for CPU low_cpu_mem_usage=True ) print(f"✅ DialoGPT-medium loaded successfully!") return True except Exception as e: print(f"❌ Failed to load model: {str(e)}") return False def chat_response(message, history): """Generate response using DialoGPT""" global model, tokenizer if model is None or tokenizer is None: return "❌ Model not loaded. Please wait for initialization." if not message or not message.strip(): return "Please enter a question! 😊" try: print(f"🔄 Processing: {message[:50]}{'...' if len(message) > 50 else ''}") # Create conversation format conversation = f"{MEDICAL_PROMPT}\n\nHuman: {message}\nAssistant:" print(f"📝 Prompt length: {len(conversation)} characters") # Tokenize inputs = tokenizer.encode(conversation, return_tensors="pt", max_length=800, truncation=True) print(f"🔢 Input tokens: {inputs.shape[1]}") # Generate with torch.no_grad(): outputs = model.generate( inputs, max_new_tokens=80, # Short responses temperature=0.8, top_p=0.9, do_sample=True, pad_token_id=tokenizer.eos_token_id, repetition_penalty=1.3, early_stopping=True ) # Decode full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"🔍 Full response length: {len(full_response)}") # Extract assistant response if "Assistant:" in full_response: response = full_response.split("Assistant:")[-1].strip() else: response = full_response[len(conversation):].strip() # Clean up if not response or len(response) < 5: response = "I'd be happy to help! Could you ask me about a specific health topic? 😊" print(f"✅ Final response length: {len(response)}") print(f"📄 Response: {response}") return response except Exception as e: print(f"❌ Error: {str(e)}") return f"Sorry, I had a technical issue. Please try again! 😊" # Load model print("🏥 Initializing Medical Chatbot (Fallback Version)...") model_loaded = load_model() # Create interface demo = gr.ChatInterface( chat_response, title="🏥 Medical Assistant (Fallback)", description="Simple medical chatbot using DialoGPT-medium. This version focuses on reliability and fast responses.", examples=[ "What are the symptoms of diabetes?", "How can I treat a headache?", "What should I do for a fever?", "Tell me about healthy eating", "How to improve sleep quality?" ] ) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=True, show_error=True, debug=True )