ReportRaahat / backend /app /ml /enhanced_chat.py
ReportRaahat CI
Deploy from GitHub: cbc36259c5ce4062cd4e64b876308f9378e3ebe2
542c765
"""
Enhanced Doctor Chat with RAG from Hugging Face
Uses your dataset + FAISS index for grounded, factual responses
"""
import sys
import os
# Fix Unicode encoding for Windows console
if sys.platform == 'win32':
os.environ['PYTHONIOENCODING'] = 'utf-8'
try:
from huggingface_hub import hf_hub_download, list_repo_files
HAS_HF = True
except ImportError:
HAS_HF = False
print("⚠️ huggingface_hub not installed β€” RAG disabled, mock responses only")
try:
import faiss
HAS_FAISS = True
except ImportError:
HAS_FAISS = False
print("⚠️ faiss-cpu not installed β€” RAG disabled, mock responses only")
import numpy as np
from typing import Optional
import json
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
HF_REPO = os.getenv("HF_INDEX_REPO", "CaffeinatedCoding/reportraahat-indexes")
HF_TOKEN = os.getenv("HF_TOKEN", "")
HF_USER = "CaffeinatedCoding"
class RAGDocumentRetriever:
"""Retrieve relevant documents from HF using FAISS."""
def __init__(self):
self.index = None
self.documents = []
self.embeddings_model = None
self.loaded = False
self._load_from_hf()
def _load_from_hf(self):
"""Download and load FAISS index + documents from HF."""
if not HAS_HF or not HAS_FAISS:
print("⚠️ Skipping RAG loading (missing dependencies)")
self.loaded = False
return
try:
print("πŸ“₯ Loading FAISS index from HF...")
# First, list all files in the repo to see what's available
try:
print(f" Checking files in {HF_REPO}...")
files = list_repo_files(
repo_id=HF_REPO,
repo_type="dataset",
token=HF_TOKEN
)
print(f" Available files: {files}")
except Exception as e:
print(f" ⚠️ Could not list files: {e}")
# Try downloading FAISS index with token
try:
index_path = hf_hub_download(
repo_id=HF_REPO,
filename="index.faiss",
repo_type="dataset",
token=HF_TOKEN
)
# Load FAISS index
self.index = faiss.read_index(index_path)
print("βœ… FAISS index loaded")
except Exception as e:
print(f" ⚠️ Could not load index.faiss: {e}")
print(" Trying alternative names...")
# Try alternative names
for alt_name in ["faiss.index", "knn.index", "vec.index", "index"]:
try:
index_path = hf_hub_download(
repo_id=HF_REPO,
filename=alt_name,
repo_type="dataset",
token=HF_TOKEN
)
self.index = faiss.read_index(index_path)
print(f"βœ… FAISS index loaded from {alt_name}")
break
except:
pass
# Download documents metadata
try:
docs_path = hf_hub_download(
repo_id=HF_REPO,
filename="documents.json",
repo_type="dataset",
token=HF_TOKEN
)
with open(docs_path, 'r', encoding='utf-8') as f:
self.documents = json.load(f)
print(f"βœ… Loaded {len(self.documents)} documents")
except Exception as e:
print(f" ⚠️ Could not load documents.json: {e}")
# Try alternative document formats
for alt_doc in ["documents.parquet", "docs.json", "embeddings.json"]:
try:
docs_path = hf_hub_download(
repo_id=HF_REPO,
filename=alt_doc,
repo_type="dataset",
token=HF_TOKEN
)
if alt_doc.endswith('.json'):
with open(docs_path, 'r', encoding='utf-8') as f:
self.documents = json.load(f)
print(f"βœ… Loaded documents from {alt_doc}")
break
except:
pass
self.loaded = True if self.index is not None else False
except Exception as e:
print(f"⚠️ Could not load RAG from HF: {e}")
self.loaded = False
def retrieve(self, query_embedding: list, k: int = 3) -> list:
"""Retrieve top-k similar documents."""
if not self.loaded or self.index is None:
return []
try:
query = np.array([query_embedding]).astype('float32')
distances, indices = self.index.search(query, min(k, self.index.ntotal))
results = []
for idx in indices[0]:
if 0 <= idx < len(self.documents):
results.append(self.documents[int(idx)])
return results
except:
return []
def get_enhanced_mock_response(message: str, guc: dict, retrieved_docs: list = None) -> str:
"""Generate response with RAG grounding."""
name = guc.get("name", "Patient")
report = guc.get("latestReport", {})
findings = report.get("findings", [])
affected_organs = report.get("affected_organs", [])
message_lower = message.lower()
# Check for specific findings
anemia_found = any('hemoglobin' in str(f.get('parameter', '')).lower() for f in findings)
iron_found = any('iron' in str(f.get('parameter', '')).lower() for f in findings)
b12_found = any('b12' in str(f.get('parameter', '')).lower() for f in findings)
# Build response with RAG context
response = ""
# 1. Main response based on intent + findings
if anemia_found and any(word in message_lower for word in ['tired', 'fatigue', 'weak', 'energy', 'exhausted']):
response = f"""Dr. Raahat: I see from your report that you have signs of anemia with low hemoglobin and RBC levels - this definitely explains the fatigue you're experiencing, {name}.
**What's happening:**
Your red blood cells are lower than normal, which means less oxygen delivery to your muscles and brain. That's why you feel tired and weak.
**Immediate action plan:**
1. **Increase iron-rich foods** (eat daily):
- Red meat, chicken, fish (best sources)
- Spinach, lentils, chickpeas
- Pumpkin seeds, fortified cereals
- Combine with vitamin C (orange, lemon, tomato) for better absorption
2. **Take supplements** (discuss dosage with doctor):
- Iron supplement (typically 325mg ferrous sulphate)
- Vitamin B12 (oral or injections)
- Folic acid (helps iron work better)
3. **Lifestyle changes:**
- Get 7-8 hours of sleep
- Avoid intense exercise for now
- Drink 3 liters of water daily
- Reduce tea/coffee (blocks iron absorption)
**Recovery timeline**: You should feel noticeably better in 2-3 weeks with consistent effort.
What specific food preferences do you have? I can give personalized suggestions."""
elif (iron_found or b12_found) and any(word in message_lower for word in ['diet', 'food', 'eating', 'nutrition', 'eat']):
response = f"""Dr. Raahat: Great question! Your low iron and B12 need specific dietary attention, {name}.
**Iron-rich foods (eat 2-3 daily):**
- **Best sources**: Red meat, liver, oysters, sardines
- **Good sources**: Chicken, turkey, tofu, lentils, beans
- **Plant-based**: Spinach, kale, pumpkin seeds, fortified cereals
**B12 recovery foods:**
- Eggs, milk, cheese (2-3 servings daily)
- Fish, chicken, beef
- Fortified cereals and plant milk
**Pro absorption tips:**
βœ“ Always pair iron with vitamin C (increases absorption by 3x)
- Breakfast: Iron cereal + orange juice
- Lunch: Spinach with lemon juice
- Dinner: Lentils with tomato curry
βœ— Avoid these with iron meals:
- Tea, coffee, cola (blocks absorption)
- Milk, cheese, calcium supplements (wait 2 hours)
- Antacids (remove iron before it's absorbed)
**Sample daily meal plan:**
- **Breakfast**: Fortified cereal (20mg iron) + fresh orange juice
- **Lunch**: Spinach and chickpea curry with lemon
- **Snack**: Pumpkin seeds + apple
- **Dinner**: Lentil soup (15mg iron) + tomato
**Expected improvement**: Energy boost in 2-3 weeks, full recovery in 6-8 weeks.
Do you have any food allergies or preferences I should know about?"""
elif any(word in message_lower for word in ['exercise', 'workout', 'walk', 'activity', 'gym']):
response = f"""Dr. Raahat: Good thinking! Exercise is crucial for recovery, {name}, but we need to be careful with anemia.
**Phase-based exercise plan:**
**Week 1-2 (Recovery phase)**:
- Light walking: 10-15 minutes daily
- Gentle yoga or stretching
- Avoid stairs and running
- Stop if you feel dizzy
**Week 3-4 (Building phase)**:
- Walking: 20-30 minutes daily
- Swimming (very gentle on body)
- No intense exercise yet
**Week 5+ (Normal activity)**:
- Regular walking (45 mins)
- Light strength training
- Normal daily activities
**Warning signs to stop immediately:**
πŸ›‘ Shortness of breath
πŸ›‘ Chest pain or dizziness
πŸ›‘ Extreme fatigue
**Best time to exercise**:
- Morning (after breakfast + iron absorption)
- Evening (when energy is better)
- Not on an empty stomach
Combine exercise with diet changes and supplements for best results. Ready to start tomorrow?"""
elif any(word in message_lower for word in ['medicine', 'medication', 'supplement', 'doctor', 'prescription']):
response = f"""Dr. Raahat: Based on your low hemoglobin, iron, and B12, {name}, here's what you need:
**Essential supplements:**
1. **Iron supplement** (START ASAP)
- Type: Ferrous sulphate (cheapest, most effective)
- Dose: Typically 325mg once daily
- Duration: 8-12 weeks
- Take with vitamin C, on empty stomach for best absorption
- Side effects: May cause constipation (normal)
2. **Vitamin B12**
- Option A: Oral supplement (500-1000 mcg daily)
- Option B: Injections (1000 mcg weekly for 4 weeks, then monthly)
- Injections are better for severe deficiency
3. **Folic acid** (works with iron)
- Dose: 1-5mg daily
- Helps red blood cell formation
**IMPORTANT - Schedule doctor visit THIS WEEK:**
βœ“ Get proper dosage prescription
βœ“ Check for underlying absorption issues
βœ“ Get baseline blood test
βœ“ Schedule follow-up in 4 weeks
**What to avoid:**
βœ— Don't self-medicate without doctor guidance
βœ— High-dose iron needs monitoring
βœ— Some medications interact with iron
When can you visit your doctor?"""
else:
# Generic contextual response
response = f"""Dr. Raahat: Thanks for that question, {name}.
Based on your report showing anemia with low hemoglobin, iron, and B12, here's what's most important right now:
**Your priorities (in order):**
1. **Visit a doctor** - Get proper supplement prescriptions
2. **Dietary changes** - Start eating iron-rich foods today
3. **Supplements** - Iron, B12, and folic acid
4. **Light exercise** - Walking only for now
5. **Track progress** - Note energy levels daily
**This week's action items:**
β–‘ Book doctor appointment
β–‘ Stock up on spinach, lentils, and red meat
β–‘ Start morning walks
β–‘ Get 7-8 hours sleep
Which of these do you want help with first?"""
# 2. Add RAG-grounded information if available
if retrieved_docs:
response += f"\n\n**Relevant medical information:**"
for i, doc in enumerate(retrieved_docs[:2], 1):
doc_title = doc.get('title', 'Medical Information')
doc_snippet = doc.get('content', doc.get('text', ''))[:150]
if doc_snippet:
response += f"\n{i}. *{doc_title}*: {doc_snippet}..."
response += "\n\nπŸ“š *Note: This information is sourced from verified medical databases.*"
return response
# Initialize RAG on module load
rag_retriever = None
try:
rag_retriever = RAGDocumentRetriever()
except Exception as e:
print(f"⚠️ RAG not available: {e}")