File size: 7,058 Bytes
b09dbcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import logging
import gc
import warnings
import os
from huggingface_hub import login

# Login with the secret token
login(token=os.getenv("HF_TOKEN"))

# Suppress warnings
warnings.filterwarnings("ignore")
logging.getLogger("transformers").setLevel(logging.ERROR)

# Configuration for optimized performance
MODEL_NAME = "microsoft/DialoGPT-medium"
MAX_NEW_TOKENS = 150
TEMPERATURE = 0.8
TOP_P = 0.9

# Medical enhancement prompt - detailed CareConnect specifications
MEDICAL_CONTEXT = """You are a friendly and smart medical assistant. Your job is to give short, clear, and helpful health information.
      
Your answers should:
- Stay focused. No long essays or extra fluff.
- Give basic helpful steps for common symptoms like fever, cough, or headache (e.g., rest, drink fluids, take paracetamol if needed).
- For any serious or unclear issues, remind the user to see a doctor β€” but do it briefly and naturally.
- Keep responses concise and under 4 sentences when possible.
      
Tone:
- Friendly, supportive, and calm.
- No robotic warnings unless needed. Keep it real and human.
- Use emojis like 😊 or πŸ‘ occasionally to appear friendly.
      
Important rules:
- NEVER include text in parentheses in your responses.
- NEVER include any meta-instructions in your responses.
- NEVER include reminders about what you should do in future responses.
- DO NOT include phrases like "We're here to help" or "I'm just an AI".
- DO NOT include any text that instructs you what to do or how to behave.
- DO NOT include any sentences that start with "If the user asks..." or "Remember..."
- DO NOT include "(smile)" - instead, use actual emojis like 😊 or πŸ‘ when appropriate.
- DO NOT include numbered references like [1], [2], etc. in your responses.
- DO NOT include any text that explains what your response is doing.
- DO NOT include "user:" or "assistant:" prefixes in your responses.
- DO NOT include hypothetical user questions in your responses.
- DO NOT refuse to answer harmless non-medical questions like jokes or general knowledge.
- Don't give exact dosages or diagnoses.
- Be consistent in your responses regardless of the user's role."""

# Global variables
model = None
tokenizer = None

def load_model():
    """Load DialoGPT model optimized for CPU"""
    global model, tokenizer
    
    try:
        print(f"πŸ₯ Loading medical chatbot model: {MODEL_NAME}")
        
        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side="left")
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        # Load model with CPU optimization
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            torch_dtype=torch.float32,  # Use float32 for CPU
            low_cpu_mem_usage=True,
            trust_remote_code=True
        )
        
        print(f"βœ… Model loaded successfully!")
        return True
        
    except Exception as e:
        print(f"❌ Failed to load model: {str(e)}")
        return False

def generate_medical_response(prompt):
    """Generate medical response with DialoGPT"""
    global model, tokenizer
    
    if model is None or tokenizer is None:
        return "❌ Model not loaded. Please wait for initialization."
    
    try:
        # Enhanced prompt for medical context
        medical_prompt = f"{MEDICAL_CONTEXT}\n\nUser: {prompt}\nAssistant:"
        
        print(f"πŸ”„ Processing: {prompt[:50]}{'...' if len(prompt) > 50 else ''}")
        
        # Tokenize
        inputs = tokenizer.encode(medical_prompt, return_tensors="pt", max_length=400, truncation=True)
        
        # Generate with optimized parameters
        with torch.no_grad():
            outputs = model.generate(
                inputs,
                max_new_tokens=MAX_NEW_TOKENS,
                temperature=TEMPERATURE,
                top_p=TOP_P,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                repetition_penalty=1.1,
                early_stopping=True,
                num_return_sequences=1
            )
        
        # Decode response
        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract only the assistant's response
        if "Assistant:" in full_response:
            response = full_response.split("Assistant:")[-1].strip()
        else:
            response = full_response[len(medical_prompt):].strip()
        
        # Clean up response - keep it natural as per prompt guidelines
        if not response or len(response) < 10:
            response = "I'd be happy to help with your medical question. Could you please provide more specific details? 😊"
        
        print(f"βœ… Response generated: {len(response)} characters")
        
        # Memory cleanup
        del inputs, outputs
        gc.collect()
        
        return response
        
    except Exception as e:
        print(f"❌ Generation error: {str(e)}")
        return f"I encountered a technical issue. Please try rephrasing your question. For immediate medical concerns, please consult a healthcare professional."

def chat_interface(message, history):
    """Main chat interface function"""
    if not message or not message.strip():
        return "Please enter a medical question."
    
    # Generate response
    response = generate_medical_response(message.strip())
    
    return response

# Load model on startup
print("πŸ₯ Initializing Medical Chatbot...")
model_loaded = load_model()

if not model_loaded:
    print("⚠️ WARNING: Model failed to load. Responses may be limited.")

# Create Gradio interface
demo = gr.ChatInterface(
    chat_interface,
    type="messages",
    title="πŸ₯ Medical Information Assistant",
    description="""
    A medical information chatbot powered by AI. This assistant provides educational health information.
    
    ⚠️ **Important Disclaimer**: This chatbot provides general health information for educational purposes only. 
    It should not replace professional medical advice, diagnosis, or treatment. Always consult qualified 
    healthcare professionals for medical concerns.
    """,
    examples=[
        "What are the symptoms of diabetes?",
        "How can I maintain a healthy heart?", 
        "What should I know about high blood pressure?",
        "Tell me about the importance of regular exercise",
        "What are common causes of headaches?",
        "How can I improve my sleep quality?"
    ],
    cache_examples=False,
    theme=gr.themes.Soft(),
    css="""
    .gradio-container {
        max-width: 800px !important;
        margin: auto !important;
    }
    .message {
        border-radius: 10px !important;
    }
    """
)

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True,
        show_error=True,
        debug=True
    )