Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import os | |
| from huggingface_hub import login | |
| # Login with the secret token | |
| login(token=os.getenv("HF_TOKEN")) | |
| # Use DialoGPT-medium for guaranteed compatibility | |
| MODEL_NAME = "microsoft/DialoGPT-medium" | |
| # Simple medical prompt | |
| MEDICAL_PROMPT = """You are a friendly and smart medical assistant. Your job is to give short, clear, and helpful health information. | |
| Your answers should: | |
| - Stay focused. No long essays or extra fluff. | |
| - Give basic helpful steps for common symptoms like fever, cough, or headache (e.g., rest, drink fluids, take paracetamol if needed). | |
| - For any serious or unclear issues, remind the user to see a doctor β but do it briefly and naturally. | |
| - Keep responses concise and under 4 sentences when possible. | |
| Tone: | |
| - Friendly, supportive, and calm. | |
| - No robotic warnings unless needed. Keep it real and human. | |
| - Use emojis like π or π occasionally to appear friendly. | |
| Important rules: | |
| - NEVER include text in parentheses in your responses. | |
| - NEVER include any meta-instructions in your responses. | |
| - NEVER include reminders about what you should do in future responses. | |
| - DO NOT include phrases like "We're here to help" or "I'm just an AI". | |
| - DO NOT include any text that instructs you what to do or how to behave. | |
| - DO NOT include any sentences that start with "If the user asks..." or "Remember..." | |
| - DO NOT include "(smile)" - instead, use actual emojis like π or π when appropriate. | |
| - DO NOT include numbered references like [1], [2], etc. in your responses. | |
| - DO NOT include any text that explains what your response is doing. | |
| - DO NOT include "user:" or "assistant:" prefixes in your responses. | |
| - DO NOT include hypothetical user questions in your responses. | |
| - DO NOT refuse to answer harmless non-medical questions like jokes or general knowledge. | |
| - Don't give exact dosages or diagnoses. | |
| - Be consistent in your responses regardless of the user's role.""" | |
| # Global variables | |
| model = None | |
| tokenizer = None | |
| def load_model(): | |
| """Load DialoGPT model""" | |
| global model, tokenizer | |
| try: | |
| print(f"π₯ Loading DialoGPT-medium for medical chat...") | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Load model | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.float32, # Use float32 for CPU | |
| low_cpu_mem_usage=True | |
| ) | |
| print(f"β DialoGPT-medium loaded successfully!") | |
| return True | |
| except Exception as e: | |
| print(f"β Failed to load model: {str(e)}") | |
| return False | |
| def chat_response(message, history): | |
| """Generate response using DialoGPT""" | |
| global model, tokenizer | |
| if model is None or tokenizer is None: | |
| return "β Model not loaded. Please wait for initialization." | |
| if not message or not message.strip(): | |
| return "Please enter a question! π" | |
| try: | |
| print(f"π Processing: {message[:50]}{'...' if len(message) > 50 else ''}") | |
| # Create conversation format | |
| conversation = f"{MEDICAL_PROMPT}\n\nHuman: {message}\nAssistant:" | |
| print(f"π Prompt length: {len(conversation)} characters") | |
| # Tokenize | |
| inputs = tokenizer.encode(conversation, return_tensors="pt", max_length=800, truncation=True) | |
| print(f"π’ Input tokens: {inputs.shape[1]}") | |
| # Generate | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| inputs, | |
| max_new_tokens=80, # Short responses | |
| temperature=0.8, | |
| top_p=0.9, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| repetition_penalty=1.3, | |
| early_stopping=True | |
| ) | |
| # Decode | |
| full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| print(f"π Full response length: {len(full_response)}") | |
| # Extract assistant response | |
| if "Assistant:" in full_response: | |
| response = full_response.split("Assistant:")[-1].strip() | |
| else: | |
| response = full_response[len(conversation):].strip() | |
| # Clean up | |
| if not response or len(response) < 5: | |
| response = "I'd be happy to help! Could you ask me about a specific health topic? π" | |
| print(f"β Final response length: {len(response)}") | |
| print(f"π Response: {response}") | |
| return response | |
| except Exception as e: | |
| print(f"β Error: {str(e)}") | |
| return f"Sorry, I had a technical issue. Please try again! π" | |
| # Load model | |
| print("π₯ Initializing Medical Chatbot (Fallback Version)...") | |
| model_loaded = load_model() | |
| # Create interface | |
| demo = gr.ChatInterface( | |
| chat_response, | |
| title="π₯ Medical Assistant (Fallback)", | |
| description="Simple medical chatbot using DialoGPT-medium. This version focuses on reliability and fast responses.", | |
| examples=[ | |
| "What are the symptoms of diabetes?", | |
| "How can I treat a headache?", | |
| "What should I do for a fever?", | |
| "Tell me about healthy eating", | |
| "How to improve sleep quality?" | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True, | |
| show_error=True, | |
| debug=True | |
| ) | |