""" Enhanced Doctor Chat with RAG from Hugging Face Uses your dataset + FAISS index for grounded, factual responses """ import sys import os # Fix Unicode encoding for Windows console if sys.platform == 'win32': os.environ['PYTHONIOENCODING'] = 'utf-8' try: from huggingface_hub import hf_hub_download, list_repo_files HAS_HF = True except ImportError: HAS_HF = False print("⚠️ huggingface_hub not installed — RAG disabled, mock responses only") try: import faiss HAS_FAISS = True except ImportError: HAS_FAISS = False print("⚠️ faiss-cpu not installed — RAG disabled, mock responses only") import numpy as np from typing import Optional import json from dotenv import load_dotenv # Load environment variables load_dotenv() HF_REPO = os.getenv("HF_INDEX_REPO", "CaffeinatedCoding/reportraahat-indexes") HF_TOKEN = os.getenv("HF_TOKEN", "") HF_USER = "CaffeinatedCoding" class RAGDocumentRetriever: """Retrieve relevant documents from HF using FAISS.""" def __init__(self): self.index = None self.documents = [] self.embeddings_model = None self.loaded = False self._load_from_hf() def _load_from_hf(self): """Download and load FAISS index + documents from HF.""" if not HAS_HF or not HAS_FAISS: print("⚠️ Skipping RAG loading (missing dependencies)") self.loaded = False return try: print("📥 Loading FAISS index from HF...") # First, list all files in the repo to see what's available try: print(f" Checking files in {HF_REPO}...") files = list_repo_files( repo_id=HF_REPO, repo_type="dataset", token=HF_TOKEN ) print(f" Available files: {files}") except Exception as e: print(f" ⚠️ Could not list files: {e}") # Try downloading FAISS index with token try: index_path = hf_hub_download( repo_id=HF_REPO, filename="index.faiss", repo_type="dataset", token=HF_TOKEN ) # Load FAISS index self.index = faiss.read_index(index_path) print("✅ FAISS index loaded") except Exception as e: print(f" ⚠️ Could not load index.faiss: {e}") print(" Trying alternative names...") # Try alternative names for alt_name in ["faiss.index", "knn.index", "vec.index", "index"]: try: index_path = hf_hub_download( repo_id=HF_REPO, filename=alt_name, repo_type="dataset", token=HF_TOKEN ) self.index = faiss.read_index(index_path) print(f"✅ FAISS index loaded from {alt_name}") break except: pass # Download documents metadata try: docs_path = hf_hub_download( repo_id=HF_REPO, filename="documents.json", repo_type="dataset", token=HF_TOKEN ) with open(docs_path, 'r', encoding='utf-8') as f: self.documents = json.load(f) print(f"✅ Loaded {len(self.documents)} documents") except Exception as e: print(f" ⚠️ Could not load documents.json: {e}") # Try alternative document formats for alt_doc in ["documents.parquet", "docs.json", "embeddings.json"]: try: docs_path = hf_hub_download( repo_id=HF_REPO, filename=alt_doc, repo_type="dataset", token=HF_TOKEN ) if alt_doc.endswith('.json'): with open(docs_path, 'r', encoding='utf-8') as f: self.documents = json.load(f) print(f"✅ Loaded documents from {alt_doc}") break except: pass self.loaded = True if self.index is not None else False except Exception as e: print(f"⚠️ Could not load RAG from HF: {e}") self.loaded = False def retrieve(self, query_embedding: list, k: int = 3) -> list: """Retrieve top-k similar documents.""" if not self.loaded or self.index is None: return [] try: query = np.array([query_embedding]).astype('float32') distances, indices = self.index.search(query, min(k, self.index.ntotal)) results = [] for idx in indices[0]: if 0 <= idx < len(self.documents): results.append(self.documents[int(idx)]) return results except: return [] def get_enhanced_mock_response(message: str, guc: dict, retrieved_docs: list = None) -> str: """Generate response with RAG grounding.""" name = guc.get("name", "Patient") report = guc.get("latestReport", {}) findings = report.get("findings", []) affected_organs = report.get("affected_organs", []) message_lower = message.lower() # Check for specific findings anemia_found = any('hemoglobin' in str(f.get('parameter', '')).lower() for f in findings) iron_found = any('iron' in str(f.get('parameter', '')).lower() for f in findings) b12_found = any('b12' in str(f.get('parameter', '')).lower() for f in findings) # Build response with RAG context response = "" # 1. Main response based on intent + findings if anemia_found and any(word in message_lower for word in ['tired', 'fatigue', 'weak', 'energy', 'exhausted']): response = f"""Dr. Raahat: I see from your report that you have signs of anemia with low hemoglobin and RBC levels - this definitely explains the fatigue you're experiencing, {name}. **What's happening:** Your red blood cells are lower than normal, which means less oxygen delivery to your muscles and brain. That's why you feel tired and weak. **Immediate action plan:** 1. **Increase iron-rich foods** (eat daily): - Red meat, chicken, fish (best sources) - Spinach, lentils, chickpeas - Pumpkin seeds, fortified cereals - Combine with vitamin C (orange, lemon, tomato) for better absorption 2. **Take supplements** (discuss dosage with doctor): - Iron supplement (typically 325mg ferrous sulphate) - Vitamin B12 (oral or injections) - Folic acid (helps iron work better) 3. **Lifestyle changes:** - Get 7-8 hours of sleep - Avoid intense exercise for now - Drink 3 liters of water daily - Reduce tea/coffee (blocks iron absorption) **Recovery timeline**: You should feel noticeably better in 2-3 weeks with consistent effort. What specific food preferences do you have? I can give personalized suggestions.""" elif (iron_found or b12_found) and any(word in message_lower for word in ['diet', 'food', 'eating', 'nutrition', 'eat']): response = f"""Dr. Raahat: Great question! Your low iron and B12 need specific dietary attention, {name}. **Iron-rich foods (eat 2-3 daily):** - **Best sources**: Red meat, liver, oysters, sardines - **Good sources**: Chicken, turkey, tofu, lentils, beans - **Plant-based**: Spinach, kale, pumpkin seeds, fortified cereals **B12 recovery foods:** - Eggs, milk, cheese (2-3 servings daily) - Fish, chicken, beef - Fortified cereals and plant milk **Pro absorption tips:** ✓ Always pair iron with vitamin C (increases absorption by 3x) - Breakfast: Iron cereal + orange juice - Lunch: Spinach with lemon juice - Dinner: Lentils with tomato curry ✗ Avoid these with iron meals: - Tea, coffee, cola (blocks absorption) - Milk, cheese, calcium supplements (wait 2 hours) - Antacids (remove iron before it's absorbed) **Sample daily meal plan:** - **Breakfast**: Fortified cereal (20mg iron) + fresh orange juice - **Lunch**: Spinach and chickpea curry with lemon - **Snack**: Pumpkin seeds + apple - **Dinner**: Lentil soup (15mg iron) + tomato **Expected improvement**: Energy boost in 2-3 weeks, full recovery in 6-8 weeks. Do you have any food allergies or preferences I should know about?""" elif any(word in message_lower for word in ['exercise', 'workout', 'walk', 'activity', 'gym']): response = f"""Dr. Raahat: Good thinking! Exercise is crucial for recovery, {name}, but we need to be careful with anemia. **Phase-based exercise plan:** **Week 1-2 (Recovery phase)**: - Light walking: 10-15 minutes daily - Gentle yoga or stretching - Avoid stairs and running - Stop if you feel dizzy **Week 3-4 (Building phase)**: - Walking: 20-30 minutes daily - Swimming (very gentle on body) - No intense exercise yet **Week 5+ (Normal activity)**: - Regular walking (45 mins) - Light strength training - Normal daily activities **Warning signs to stop immediately:** 🛑 Shortness of breath 🛑 Chest pain or dizziness 🛑 Extreme fatigue **Best time to exercise**: - Morning (after breakfast + iron absorption) - Evening (when energy is better) - Not on an empty stomach Combine exercise with diet changes and supplements for best results. Ready to start tomorrow?""" elif any(word in message_lower for word in ['medicine', 'medication', 'supplement', 'doctor', 'prescription']): response = f"""Dr. Raahat: Based on your low hemoglobin, iron, and B12, {name}, here's what you need: **Essential supplements:** 1. **Iron supplement** (START ASAP) - Type: Ferrous sulphate (cheapest, most effective) - Dose: Typically 325mg once daily - Duration: 8-12 weeks - Take with vitamin C, on empty stomach for best absorption - Side effects: May cause constipation (normal) 2. **Vitamin B12** - Option A: Oral supplement (500-1000 mcg daily) - Option B: Injections (1000 mcg weekly for 4 weeks, then monthly) - Injections are better for severe deficiency 3. **Folic acid** (works with iron) - Dose: 1-5mg daily - Helps red blood cell formation **IMPORTANT - Schedule doctor visit THIS WEEK:** ✓ Get proper dosage prescription ✓ Check for underlying absorption issues ✓ Get baseline blood test ✓ Schedule follow-up in 4 weeks **What to avoid:** ✗ Don't self-medicate without doctor guidance ✗ High-dose iron needs monitoring ✗ Some medications interact with iron When can you visit your doctor?""" else: # Generic contextual response response = f"""Dr. Raahat: Thanks for that question, {name}. Based on your report showing anemia with low hemoglobin, iron, and B12, here's what's most important right now: **Your priorities (in order):** 1. **Visit a doctor** - Get proper supplement prescriptions 2. **Dietary changes** - Start eating iron-rich foods today 3. **Supplements** - Iron, B12, and folic acid 4. **Light exercise** - Walking only for now 5. **Track progress** - Note energy levels daily **This week's action items:** □ Book doctor appointment □ Stock up on spinach, lentils, and red meat □ Start morning walks □ Get 7-8 hours sleep Which of these do you want help with first?""" # 2. Add RAG-grounded information if available if retrieved_docs: response += f"\n\n**Relevant medical information:**" for i, doc in enumerate(retrieved_docs[:2], 1): doc_title = doc.get('title', 'Medical Information') doc_snippet = doc.get('content', doc.get('text', ''))[:150] if doc_snippet: response += f"\n{i}. *{doc_title}*: {doc_snippet}..." response += "\n\n📚 *Note: This information is sourced from verified medical databases.*" return response # Initialize RAG on module load rag_retriever = None try: rag_retriever = RAGDocumentRetriever() except Exception as e: print(f"⚠️ RAG not available: {e}")