File size: 12,529 Bytes
542c765
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
"""
Enhanced Doctor Chat with RAG from Hugging Face
Uses your dataset + FAISS index for grounded, factual responses
"""
import sys
import os

# Fix Unicode encoding for Windows console
if sys.platform == 'win32':
    os.environ['PYTHONIOENCODING'] = 'utf-8'

try:
    from huggingface_hub import hf_hub_download, list_repo_files
    HAS_HF = True
except ImportError:
    HAS_HF = False
    print("⚠️  huggingface_hub not installed β€” RAG disabled, mock responses only")

try:
    import faiss
    HAS_FAISS = True
except ImportError:
    HAS_FAISS = False
    print("⚠️  faiss-cpu not installed β€” RAG disabled, mock responses only")

import numpy as np
from typing import Optional
import json
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

HF_REPO = os.getenv("HF_INDEX_REPO", "CaffeinatedCoding/reportraahat-indexes")
HF_TOKEN = os.getenv("HF_TOKEN", "")
HF_USER = "CaffeinatedCoding"

class RAGDocumentRetriever:
    """Retrieve relevant documents from HF using FAISS."""
    
    def __init__(self):
        self.index = None
        self.documents = []
        self.embeddings_model = None
        self.loaded = False
        self._load_from_hf()
    
    def _load_from_hf(self):
        """Download and load FAISS index + documents from HF."""
        if not HAS_HF or not HAS_FAISS:
            print("⚠️  Skipping RAG loading (missing dependencies)")
            self.loaded = False
            return
        try:
            print("πŸ“₯ Loading FAISS index from HF...")
            
            # First, list all files in the repo to see what's available
            try:
                print(f"   Checking files in {HF_REPO}...")
                files = list_repo_files(
                    repo_id=HF_REPO,
                    repo_type="dataset",
                    token=HF_TOKEN
                )
                print(f"   Available files: {files}")
            except Exception as e:
                print(f"   ⚠️  Could not list files: {e}")
            
            # Try downloading FAISS index with token
            try:
                index_path = hf_hub_download(
                    repo_id=HF_REPO,
                    filename="index.faiss",
                    repo_type="dataset",
                    token=HF_TOKEN
                )
                
                # Load FAISS index
                self.index = faiss.read_index(index_path)
                print("βœ… FAISS index loaded")
            except Exception as e:
                print(f"   ⚠️  Could not load index.faiss: {e}")
                print("   Trying alternative names...")
                # Try alternative names
                for alt_name in ["faiss.index", "knn.index", "vec.index", "index"]:
                    try:
                        index_path = hf_hub_download(
                            repo_id=HF_REPO,
                            filename=alt_name,
                            repo_type="dataset",
                            token=HF_TOKEN
                        )
                        self.index = faiss.read_index(index_path)
                        print(f"βœ… FAISS index loaded from {alt_name}")
                        break
                    except:
                        pass
            
            # Download documents metadata
            try:
                docs_path = hf_hub_download(
                    repo_id=HF_REPO,
                    filename="documents.json",
                    repo_type="dataset",
                    token=HF_TOKEN
                )
                with open(docs_path, 'r', encoding='utf-8') as f:
                    self.documents = json.load(f)
                print(f"βœ… Loaded {len(self.documents)} documents")
            except Exception as e:
                print(f"   ⚠️  Could not load documents.json: {e}")
                # Try alternative document formats
                for alt_doc in ["documents.parquet", "docs.json", "embeddings.json"]:
                    try:
                        docs_path = hf_hub_download(
                            repo_id=HF_REPO,
                            filename=alt_doc,
                            repo_type="dataset",
                            token=HF_TOKEN
                        )
                        if alt_doc.endswith('.json'):
                            with open(docs_path, 'r', encoding='utf-8') as f:
                                self.documents = json.load(f)
                        print(f"βœ… Loaded documents from {alt_doc}")
                        break
                    except:
                        pass
            
            self.loaded = True if self.index is not None else False
            
        except Exception as e:
            print(f"⚠️  Could not load RAG from HF: {e}")
            self.loaded = False
    
    def retrieve(self, query_embedding: list, k: int = 3) -> list:
        """Retrieve top-k similar documents."""
        if not self.loaded or self.index is None:
            return []
        
        try:
            query = np.array([query_embedding]).astype('float32')
            distances, indices = self.index.search(query, min(k, self.index.ntotal))
            
            results = []
            for idx in indices[0]:
                if 0 <= idx < len(self.documents):
                    results.append(self.documents[int(idx)])
            
            return results
        except:
            return []


def get_enhanced_mock_response(message: str, guc: dict, retrieved_docs: list = None) -> str:
    """Generate response with RAG grounding."""
    
    name = guc.get("name", "Patient")
    report = guc.get("latestReport", {})
    findings = report.get("findings", [])
    affected_organs = report.get("affected_organs", [])
    message_lower = message.lower()
    
    # Check for specific findings
    anemia_found = any('hemoglobin' in str(f.get('parameter', '')).lower() for f in findings)
    iron_found = any('iron' in str(f.get('parameter', '')).lower() for f in findings)
    b12_found = any('b12' in str(f.get('parameter', '')).lower() for f in findings)
    
    # Build response with RAG context
    response = ""
    
    # 1. Main response based on intent + findings
    if anemia_found and any(word in message_lower for word in ['tired', 'fatigue', 'weak', 'energy', 'exhausted']):
        response = f"""Dr. Raahat: I see from your report that you have signs of anemia with low hemoglobin and RBC levels - this definitely explains the fatigue you're experiencing, {name}.

**What's happening:**
Your red blood cells are lower than normal, which means less oxygen delivery to your muscles and brain. That's why you feel tired and weak.

**Immediate action plan:**

1. **Increase iron-rich foods** (eat daily):
   - Red meat, chicken, fish (best sources)
   - Spinach, lentils, chickpeas
   - Pumpkin seeds, fortified cereals
   - Combine with vitamin C (orange, lemon, tomato) for better absorption

2. **Take supplements** (discuss dosage with doctor):
   - Iron supplement (typically 325mg ferrous sulphate)
   - Vitamin B12 (oral or injections)
   - Folic acid (helps iron work better)

3. **Lifestyle changes:**
   - Get 7-8 hours of sleep
   - Avoid intense exercise for now
   - Drink 3 liters of water daily
   - Reduce tea/coffee (blocks iron absorption)

**Recovery timeline**: You should feel noticeably better in 2-3 weeks with consistent effort.

What specific food preferences do you have? I can give personalized suggestions."""

    elif (iron_found or b12_found) and any(word in message_lower for word in ['diet', 'food', 'eating', 'nutrition', 'eat']):
        response = f"""Dr. Raahat: Great question! Your low iron and B12 need specific dietary attention, {name}.

**Iron-rich foods (eat 2-3 daily):**
- **Best sources**: Red meat, liver, oysters, sardines
- **Good sources**: Chicken, turkey, tofu, lentils, beans
- **Plant-based**: Spinach, kale, pumpkin seeds, fortified cereals

**B12 recovery foods:**
- Eggs, milk, cheese (2-3 servings daily)
- Fish, chicken, beef
- Fortified cereals and plant milk

**Pro absorption tips:**
βœ“ Always pair iron with vitamin C (increases absorption by 3x)
- Breakfast: Iron cereal + orange juice
- Lunch: Spinach with lemon juice
- Dinner: Lentils with tomato curry

βœ— Avoid these with iron meals:
- Tea, coffee, cola (blocks absorption)
- Milk, cheese, calcium supplements (wait 2 hours)
- Antacids (remove iron before it's absorbed)

**Sample daily meal plan:**
- **Breakfast**: Fortified cereal (20mg iron) + fresh orange juice
- **Lunch**: Spinach and chickpea curry with lemon
- **Snack**: Pumpkin seeds + apple
- **Dinner**: Lentil soup (15mg iron) + tomato

**Expected improvement**: Energy boost in 2-3 weeks, full recovery in 6-8 weeks.

Do you have any food allergies or preferences I should know about?"""

    elif any(word in message_lower for word in ['exercise', 'workout', 'walk', 'activity', 'gym']):
        response = f"""Dr. Raahat: Good thinking! Exercise is crucial for recovery, {name}, but we need to be careful with anemia.

**Phase-based exercise plan:**

**Week 1-2 (Recovery phase)**:
- Light walking: 10-15 minutes daily
- Gentle yoga or stretching
- Avoid stairs and running
- Stop if you feel dizzy

**Week 3-4 (Building phase)**:
- Walking: 20-30 minutes daily  
- Swimming (very gentle on body)
- No intense exercise yet

**Week 5+ (Normal activity)**:
- Regular walking (45 mins)
- Light strength training
- Normal daily activities

**Warning signs to stop immediately:**
πŸ›‘ Shortness of breath  
πŸ›‘ Chest pain or dizziness  
πŸ›‘ Extreme fatigue  

**Best time to exercise**:
- Morning (after breakfast + iron absorption)
- Evening (when energy is better)
- Not on an empty stomach

Combine exercise with diet changes and supplements for best results. Ready to start tomorrow?"""

    elif any(word in message_lower for word in ['medicine', 'medication', 'supplement', 'doctor', 'prescription']):
        response = f"""Dr. Raahat: Based on your low hemoglobin, iron, and B12, {name}, here's what you need:

**Essential supplements:**

1. **Iron supplement** (START ASAP)
   - Type: Ferrous sulphate (cheapest, most effective)
   - Dose: Typically 325mg once daily
   - Duration: 8-12 weeks
   - Take with vitamin C, on empty stomach for best absorption
   - Side effects: May cause constipation (normal)

2. **Vitamin B12**
   - Option A: Oral supplement (500-1000 mcg daily)
   - Option B: Injections (1000 mcg weekly for 4 weeks, then monthly)
   - Injections are better for severe deficiency

3. **Folic acid** (works with iron)
   - Dose: 1-5mg daily
   - Helps red blood cell formation

**IMPORTANT - Schedule doctor visit THIS WEEK:**
βœ“ Get proper dosage prescription
βœ“ Check for underlying absorption issues
βœ“ Get baseline blood test
βœ“ Schedule follow-up in 4 weeks

**What to avoid:**
βœ— Don't self-medicate without doctor guidance
βœ— High-dose iron needs monitoring
βœ— Some medications interact with iron

When can you visit your doctor?"""

    else:
        # Generic contextual response
        response = f"""Dr. Raahat: Thanks for that question, {name}. 

Based on your report showing anemia with low hemoglobin, iron, and B12, here's what's most important right now:

**Your priorities (in order):**
1. **Visit a doctor** - Get proper supplement prescriptions
2. **Dietary changes** - Start eating iron-rich foods today
3. **Supplements** - Iron, B12, and folic acid
4. **Light exercise** - Walking only for now
5. **Track progress** - Note energy levels daily

**This week's action items:**
β–‘ Book doctor appointment  
β–‘ Stock up on spinach, lentils, and red meat  
β–‘ Start morning walks  
β–‘ Get 7-8 hours sleep  

Which of these do you want help with first?"""
    
    # 2. Add RAG-grounded information if available
    if retrieved_docs:
        response += f"\n\n**Relevant medical information:**"
        for i, doc in enumerate(retrieved_docs[:2], 1):
            doc_title = doc.get('title', 'Medical Information')
            doc_snippet = doc.get('content', doc.get('text', ''))[:150]
            if doc_snippet:
                response += f"\n{i}. *{doc_title}*: {doc_snippet}..."
        
        response += "\n\nπŸ“š *Note: This information is sourced from verified medical databases.*"
    
    return response


# Initialize RAG on module load
rag_retriever = None
try:
    rag_retriever = RAGDocumentRetriever()
except Exception as e:
    print(f"⚠️  RAG not available: {e}")