Spaces:

therandomuser03
/

psypredict-backend

Running

App Files Files Community

therandomuser03 commited on Mar 6

Commit

bae0f63

1 Parent(s): 0ab1c3b

update backend

Browse files

Files changed (19) hide show

.env.example +38 -0
Dockerfile +15 -8
README.md +62 -2
app/api/__init__.py +0 -0
app/api/endpoints/__init__.py +0 -0
app/api/endpoints/analysis.py +81 -0
app/api/endpoints/facial.py +61 -21
app/api/endpoints/remedies.py +37 -14
app/api/endpoints/therapist.py +134 -26
app/config.py +54 -0
app/main.py +158 -30
app/schemas.py +199 -0
app/services/__init__.py +0 -0
app/services/crisis_engine.py +187 -0
app/services/fusion_engine.py +144 -0
app/services/ollama_engine.py +476 -0
app/services/text_emotion_engine.py +100 -0
download_models.py +47 -14
requirements.txt +25 -9

.env.example ADDED Viewed

	@@ -0,0 +1,38 @@

+# PsyPredict v2.0 Environment Configuration
+# Copy this file to .env and fill in any overrides needed.
+# All values below are production defaults.
+# ── Ollama / LLaMA 3 (Local Inference) ──────────────────────────────────────
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_MODEL=llama3
+OLLAMA_TIMEOUT_S=90
+OLLAMA_RETRIES=3
+OLLAMA_RETRY_DELAY_S=2.0
+# ── DistilBERT Text Emotion Model ────────────────────────────────────────────
+DISTILBERT_MODEL=bhadresh-savani/distilbert-base-uncased-emotion
+# ── Crisis Detection ─────────────────────────────────────────────────────────
+CRISIS_THRESHOLD=0.65
+# ── Multimodal Fusion Weights (TEXT + FACE must be <= 1.0) ──────────────────
+TEXT_WEIGHT=0.65
+FACE_WEIGHT=0.35
+# ── Context Window ───────────────────────────────────────────────────────────
+MAX_CONTEXT_TURNS=10
+# ── Logging ──────────────────────────────────────────────────────────────────
+LOG_LEVEL=INFO
+# ── Rate Limiting ─────────────────────────────────────────────────────────────
+RATE_LIMIT=30/minute
+# ── Input Limits ─────────────────────────────────────────────────────────────
+MAX_INPUT_CHARS=2000
+# ── Frontend URL (for reference) ─────────────────────────────────────────────
+VITE_BACKEND_URL=http://localhost:7860
+# ── Deprecated (no longer used - kept for reference) ────────────────────────
+# GOOGLE_API_KEY=your_key_here

Dockerfile CHANGED Viewed

@@ -4,22 +4,29 @@ FROM python:3.10-slim
 # 2. Set working directory
 WORKDIR /app
-# 3. Install system dependencies
-RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 && rm -rf /var/lib/apt/lists/*
 # 4. Install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# 5. Copy your code (and datasets) into the container
 COPY . .
 RUN python download_models.py
 ENV PYTHONPATH=/app
-# 6. Expose the port (5000 is standard for Flask)
-EXPOSE 5000
-# 7. Run the app
-CMD ["python", "app/main.py"]

 # 2. Set working directory
 WORKDIR /app
+# 3. Install system dependencies (including build tools for llama-cpp-python if needed)
+RUN apt-get update && apt-get install -y \
+    libgl1 \
+    libglib2.0-0 \
+    build-essential \
+    python3-dev \
+    && rm -rf /var/lib/apt/lists/*
 # 4. Install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# 5. Copy your code
 COPY . .
+# 6. Download all ML models (Face, Text, LLaMA 3 GGUF) during build
+# This ensures a "batteries included" image for HF Spaces
 RUN python download_models.py
+# 7. Environment & Port settings (7860 is HF Spaces standard)
 ENV PYTHONPATH=/app
+ENV USE_EMBEDDED_LLM=True
+EXPOSE 7860
+# 8. Run the app with Uvicorn
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: PsyPredict Backend
 emoji: 🧠
 colorFrom: indigo
 colorTo: purple
@@ -7,4 +7,64 @@ sdk: docker
 pinned: false
 ---
-FastAPI backend for **PsyPredict**, providing emotion detection, therapy recommendations, and ML-powered mental health support.

 ---
+title: PsyPredict Backend v2.0
 emoji: 🧠
 colorFrom: indigo
 colorTo: purple
 pinned: false
 ---
+# PsyPredict Backend v2.0
+**FastAPI** backend for PsyPredict — production-grade multimodal clinical AI system.
+## What Runs Here
+| Service | Technology |
+|---------|-----------|
+| API Framework | FastAPI + Uvicorn |
+| LLM Inference | Ollama / LLaMA 3 (local) |
+| Text Emotion | DistilBERT (`bhadresh-savani/distilbert-base-uncased-emotion`) |
+| Crisis Detection | Zero-shot NLI (MiniLM) |
+| Face Emotion | Keras CNN (custom trained, `emotion_model_trained.h5`) |
+| Remedies | CSV lookup (`MEDICATION.csv`) |
+## Endpoints
+| Method | Path | Description |
+|--------|------|-------------|
+| `POST` | `/api/chat` | Main therapist — returns `PsychReport` |
+| `POST` | `/api/predict/emotion` | Facial emotion detection |
+| `GET`  | `/api/get_advice` | Remedy/condition lookup |
+| `POST` | `/api/analyze/text` | Text emotion + crisis score |
+| `GET`  | `/api/health` | System health check |
+## Running Locally
+```bash
+# 1. Install Ollama + LLaMA 3 (one-time)
+winget install Ollama.Ollama
+ollama pull llama3
+# 2. Install dependencies
+pip install -r requirements.txt
+# 3. Start server
+uvicorn app.main:app --host 0.0.0.0 --port 7860 --reload
+```
+Swagger docs: http://localhost:7860/docs
+## Key Files
+```
+app/
+├── main.py                   # FastAPI app factory
+├── config.py                 # Pydantic Settings
+├── schemas.py                # All request/response models (PsychReport etc.)
+├── services/
+│   ├── ollama_engine.py      # LLaMA 3 async client
+│   ├── text_emotion_engine.py# DistilBERT classifier
+│   ├── crisis_engine.py      # Zero-shot NLI crisis detection
+│   ├── fusion_engine.py      # Multimodal weighted fusion
+│   ├── emotion_engine.py     # Keras CNN face emotion (preserved)
+│   └── remedy_engine.py      # CSV remedy lookup (preserved)
+└── api/endpoints/
+    ├── therapist.py          # POST /api/chat
+    ├── facial.py             # POST /api/predict/emotion
+    ├── remedies.py           # GET  /api/get_advice
+    └── analysis.py           # POST /api/analyze/text + GET /api/health
+```

app/api/__init__.py ADDED Viewed

File without changes

app/api/endpoints/__init__.py ADDED Viewed

File without changes

app/api/endpoints/analysis.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""
+analysis.py — PsyPredict Text Analysis & Health Endpoints (FastAPI)
+New endpoints:
+  POST /api/analyze/text  — standalone DistilBERT text emotion + crisis scoring
+  GET  /api/health        — system health check (Ollama, DistilBERT status)
+"""
+from __future__ import annotations
+import logging
+from fastapi import APIRouter
+from app.schemas import (
+    HealthResponse,
+    TextAnalysisRequest,
+    TextAnalysisResponse,
+)
+from app.services.crisis_engine import crisis_engine
+from app.services.ollama_engine import ollama_engine
+from app.services.text_emotion_engine import text_emotion_engine
+from app.config import get_settings
+logger = logging.getLogger(__name__)
+router = APIRouter()
+settings = get_settings()
+# ---------------------------------------------------------------------------
+# POST /api/analyze/text
+# ---------------------------------------------------------------------------
+@router.post("/analyze/text", response_model=TextAnalysisResponse)
+async def analyze_text(req: TextAnalysisRequest):
+    """
+    Standalone text emotion analysis pipeline (no LLM, no history needed).
+    Returns multi-label emotion scores + crisis risk score.
+    Useful for lightweight pre-screening before full chat inference.
+    """
+    # Text emotion classification
+    labels = await text_emotion_engine.classify(req.text)
+    dominant = labels[0].label if labels else "neutral"
+    # Crisis risk scoring
+    crisis_score, crisis_triggered = await crisis_engine.evaluate(req.text)
+    return TextAnalysisResponse(
+        emotions=labels,
+        dominant=dominant,
+        crisis_risk=round(float(crisis_score), 4),
+        crisis_triggered=crisis_triggered,
+    )
+# ---------------------------------------------------------------------------
+# GET /api/health
+# ---------------------------------------------------------------------------
+@router.get("/health", response_model=HealthResponse)
+async def health():
+    """
+    System health check.
+    Returns status of Ollama (reachable?), model name, DistilBERT load status.
+    """
+    ollama_ok = await ollama_engine.is_reachable()
+    distilbert_ok = text_emotion_engine.is_loaded
+    overall = "ok" if (ollama_ok and distilbert_ok) else "degraded"
+    if not ollama_ok:
+        logger.warning("Health check: Ollama unreachable at %s", settings.OLLAMA_BASE_URL)
+    if not distilbert_ok:
+        logger.warning("Health check: DistilBERT not loaded. Error: %s", text_emotion_engine.load_error)
+    return HealthResponse(
+        status=overall,
+        ollama_reachable=ollama_ok,
+        ollama_model=settings.OLLAMA_MODEL,
+        distilbert_loaded=distilbert_ok,
+    )

app/api/endpoints/facial.py CHANGED Viewed

@@ -1,35 +1,75 @@
-from flask import Blueprint, request, jsonify
 import cv2
 import numpy as np
 from app.services.emotion_engine import emotion_detector
-# Create a Blueprint (a group of routes)
-facial_bp = Blueprint('facial', __name__)
-@facial_bp.route('/predict/emotion', methods=['POST'])
-def predict_emotion():
     """
-    Endpoint to receive an image file and return the detected emotion.
-    Expects 'form-data' with a key named 'file'.
     """
-    if 'file' not in request.files:
-        return jsonify({"error": "No file part in the request"}), 400
-    file = request.files['file']
-    if file.filename == '':
-        return jsonify({"error": "No file selected"}), 400
     try:
-        # Convert the uploaded file directly to a numpy array (OpenCV format)
-        # This avoids saving the file to disk, which is faster and cleaner.
-        file_bytes = np.frombuffer(file.read(), np.uint8)
         image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
-        # Pass the image to our AI engine
         result = emotion_detector.detect_emotion(image)
-        return jsonify(result)
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500

+"""
+facial.py — PsyPredict Facial Emotion Detection Endpoint (FastAPI)
+Preserved feature: Keras CNN face emotion model (emotion_engine.py unchanged).
+Adapted from Flask Blueprint to FastAPI APIRouter with async file handling.
+"""
+from __future__ import annotations
+import logging
 import cv2
 import numpy as np
+from fastapi import APIRouter, File, HTTPException, UploadFile
+from fastapi.responses import JSONResponse
+from app.schemas import EmotionResponse
 from app.services.emotion_engine import emotion_detector
+logger = logging.getLogger(__name__)
+router = APIRouter()
+@router.post("/predict/emotion", response_model=EmotionResponse)
+async def predict_emotion(file: UploadFile = File(...)):
     """
+    Receives an image file and returns detected face emotion + confidence.
+    Preserved from original implementation — Keras CNN model unchanged.
+    Gracefully handles empty/corrupt webcam frames without crashing.
     """
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="No file selected")
+    allowed_types = {"image/jpeg", "image/jpg", "image/png", "image/webp"}
+    if file.content_type not in allowed_types:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid file type '{file.content_type}'. Accepted: JPEG, PNG, WEBP",
+        )
     try:
+        contents = await file.read()
+        # Guard: empty frame (webcam not ready yet) — return neutral silently
+        if not contents or len(contents) < 100:
+            return EmotionResponse(emotion="neutral", confidence=0.0, message="Empty frame skipped")
+        if len(contents) > 10 * 1024 * 1024:  # 10 MB limit
+            raise HTTPException(status_code=413, detail="Image too large (max 10MB)")
+        # Decode to OpenCV format in memory (no disk I/O)
+        file_bytes = np.frombuffer(contents, np.uint8)
+        if file_bytes.size == 0:
+            return EmotionResponse(emotion="neutral", confidence=0.0, message="Empty buffer")
         image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
+        # Guard: corrupted/blank frame — return neutral instead of crashing
+        if image is None:
+            return EmotionResponse(emotion="neutral", confidence=0.0, message="Camera frame not ready")
         result = emotion_detector.detect_emotion(image)
+        if "error" in result:
+            # No face detected — return neutral without crashing
+            return EmotionResponse(emotion="neutral", confidence=0.0, message=result.get("error"))
+        return EmotionResponse(**result)
+    except HTTPException:
+        raise
+    except Exception as exc:
+        # Log at DEBUG level to reduce terminal noise during normal webcam polling
+        logger.debug("Facial emotion prediction skipped: %s", exc)
+        return EmotionResponse(emotion="neutral", confidence=0.0, message="Frame processing error")

app/api/endpoints/remedies.py CHANGED Viewed

@@ -1,22 +1,45 @@
-from flask import Blueprint, request, jsonify
 from app.services.remedy_engine import remedy_engine
-remedies_bp = Blueprint('remedies', __name__)
-@remedies_bp.route('/get_advice', methods=['GET'])
-def get_advice():
     """
-    Query Param: ?condition=Depression
-    Returns: JSON with meds, treatments, and Gita story.
     """
-    condition = request.args.get('condition')
     if not condition:
-        return jsonify({"error": "Missing 'condition' parameter"}), 400
-    result = remedy_engine.get_remedy(condition)
-    if result:
-        return jsonify(result)
-    else:
-        return jsonify({"message": "No specific remedy found for this condition."}), 404

+"""
+remedies.py — PsyPredict Remedy Endpoint (FastAPI)
+Preserved feature: CSV-based remedy lookup (remedy_engine.py unchanged).
+Adapted from Flask Blueprint to FastAPI APIRouter with async wrapper.
+"""
+from __future__ import annotations
+import asyncio
+import logging
+from fastapi import APIRouter, HTTPException, Query
+from app.schemas import RemedyResponse
 from app.services.remedy_engine import remedy_engine
+logger = logging.getLogger(__name__)
+router = APIRouter()
+@router.get("/get_advice", response_model=RemedyResponse)
+async def get_advice(condition: str = Query(..., min_length=1, max_length=100)):
     """
+    Lookup remedy by condition name (case-insensitive partial match).
+    Preserved from original implementation — remedy_engine.py unchanged.
+    Example: GET /api/get_advice?condition=Anxiety
     """
+    # Strip and validate
+    condition = condition.strip()
     if not condition:
+        raise HTTPException(status_code=400, detail="Condition parameter cannot be empty")
+    # Run sync CSV lookup in thread pool
+    result = await asyncio.to_thread(remedy_engine.get_remedy, condition)
+    if result is None:
+        raise HTTPException(
+            status_code=404,
+            detail=f"No remedy found for condition: '{condition}'",
+        )
+    if "error" in result:
+        raise HTTPException(status_code=500, detail=result["error"])
+    return RemedyResponse(**result)

app/api/endpoints/therapist.py CHANGED Viewed

@@ -1,33 +1,141 @@
-from flask import Blueprint, request, jsonify
-from app.services.llm_engine import llm_therapist
-therapist_bp = Blueprint('therapist', __name__)
-@therapist_bp.route('/chat', methods=['POST'])
-def chat():
     """
-    Expects JSON:
-    {
-        "message": "I feel anxious",
-        "emotion": "fear",
-        "history": [
-            {"role": "user", "content": "Hi"},
-            {"role": "assistant", "content": "Hello!"}
-        ]
-    }
     """
-    data = request.get_json()
-    user_message = data.get('message', '')
-    current_emotion = data.get('emotion', None)
-    history = data.get('history', [])
-    if not user_message:
-        return jsonify({"error": "Message cannot be empty"}), 400
-    # Generate response
-    response_text = llm_therapist.generate_response(user_message, current_emotion, history)
-    return jsonify({
-        "response": response_text
-    })

+"""
+therapist.py — PsyPredict AI Therapist Endpoint (FastAPI)
+Full inference pipeline:
+  1. Input sanitization + validation (Pydantic)
+  2. Text emotion classification (DistilBERT)
+  3. Crisis evaluation (zero-shot NLI) — override if triggered
+  4. Multimodal fusion (text + face)
+  5. Ollama/LLaMA 3 structured report generation
+  6. PsychReport JSON schema validation
+  7. Streaming response option
+"""
+from __future__ import annotations
+import logging
+from typing import AsyncIterator
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
+from app.schemas import ChatRequest, ChatResponse, PsychReport, RemedyResponse
+from app.services.ollama_engine import ollama_engine
+from app.services.text_emotion_engine import text_emotion_engine
+from app.services.crisis_engine import crisis_engine
+from app.services.fusion_engine import fusion_engine
+from app.services.remedy_engine import remedy_engine
+logger = logging.getLogger(__name__)
+router = APIRouter()
+# Map risk levels / dominant emotions to CSV conditions
+RISK_TO_CONDITION: dict[str, str] = {
+    "critical": "Suicidal Ideation",
+    "high": "Depression",
+    "moderate": "Anxiety",
+    "low": "Anxiety",
+    "minimal": "Anxiety",
+}
+EMOTION_TO_CONDITION: dict[str, str] = {
+    "sad": "Depression",
+    "fear": "Anxiety",
+    "angry": "Bipolar Disorder",
+    "disgust": "Anxiety",
+    "surprised": "Anxiety",
+    "neutral": "Anxiety",
+    "happy": "Anxiety",
+}
+# ---------------------------------------------------------------------------
+# POST /api/chat
+# ---------------------------------------------------------------------------
+@router.post("/chat", response_model=ChatResponse)
+async def chat(req: ChatRequest):  # type: ignore[misc]
     """
+    Main inference endpoint.
+    Accepts user message + webcam emotion + history.
+    Returns structured PsychReport + conversational reply + CSV remedy data.
     """
+    user_text = req.message
+    face_emotion = req.emotion or "neutral"
+    history = req.history
+    # ── Step 1: Text Emotion Classification ────────────────────────────────
+    text_labels = await text_emotion_engine.classify(user_text)
+    dominant_text_emotion = text_labels[0].label if text_labels else "neutral"
+    text_emotion_summary = text_emotion_engine.summary_string(text_labels)
+    logger.info(
+        "Text emotion: %s | Face emotion: %s",
+        text_emotion_summary,
+        face_emotion,
+    )
+    # ── Step 2: Crisis Evaluation (OVERRIDE LAYER) ──────────────────────────
+    crisis_score, crisis_triggered = await crisis_engine.evaluate(user_text)
+    if crisis_triggered:
+        reply, report = crisis_engine.build_crisis_report(crisis_score)
+        remedy_data = remedy_engine.get_remedy("Suicidal Ideation") or remedy_engine.get_remedy("Anxiety")
+        remedy = RemedyResponse(**remedy_data) if remedy_data and "error" not in remedy_data else None
+        return ChatResponse(
+            response=reply,
+            report=report,
+            text_emotion=text_labels,
+            fusion_risk_score=float(crisis_score),
+            remedy=remedy,
+        )
+    # ── Step 3: Multimodal Fusion ────────────────────────────────────────────
+    fusion = fusion_engine.compute(
+        dominant_text_emotion=dominant_text_emotion,
+        face_emotion=face_emotion,
+    )
+    logger.info("Fusion risk score: %.4f (dominant: %s)", fusion.final_risk_score, fusion.dominant_modality)
+    # ── Step 4: Streaming Response ───────────────────────────────────────────
+    if req.stream:
+        import asyncio as _asyncio
+        async def stream_generator():
+            accumulated = ""
+            async for token in ollama_engine.generate_stream(
+                user_text=user_text,
+                face_emotion=face_emotion,
+                history=history,
+                text_emotion_summary=text_emotion_summary,
+            ):
+                accumulated += token
+                yield token
+        return StreamingResponse(stream_generator(), media_type="text/plain")
+    # ── Step 5: LLM Generation (non-streaming) ──────────────────────────────
+    reply, report = await ollama_engine.generate(
+        user_text=user_text,
+        face_emotion=face_emotion,
+        history=history,
+        text_emotion_summary=text_emotion_summary,
+    )
+    # ── Step 6: Remedy Lookup from CSV ──────────────────────────────────────
+    # Priority: risk level → dominant text emotion → face emotion
+    risk_key = report.risk_classification.value.lower()
+    condition = RISK_TO_CONDITION.get(risk_key) or EMOTION_TO_CONDITION.get(dominant_text_emotion.lower(), "Anxiety")
+    remedy_raw = remedy_engine.get_remedy(condition)
+    remedy = None
+    if remedy_raw and "error" not in remedy_raw:
+        try:
+            remedy = RemedyResponse(**remedy_raw)
+        except Exception as e:
+            logger.warning("Could not build RemedyResponse: %s", e)
+    return ChatResponse(
+        response=reply,
+        report=report,
+        text_emotion=text_labels,
+        fusion_risk_score=float(fusion.final_risk_score),
+        remedy=remedy,
+    )

app/config.py ADDED Viewed

	@@ -0,0 +1,54 @@

+"""
+config.py — PsyPredict Production Configuration
+All settings loaded from environment variables via Pydantic Settings.
+"""
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from functools import lru_cache
+class Settings(BaseSettings):
+    # Ollama / LLM
+    OLLAMA_BASE_URL: str = "http://localhost:11434"
+    OLLAMA_MODEL: str = "llama3"
+    OLLAMA_TIMEOUT_S: int = 120
+    # --- Embedded LLM Settings (for Docker/HF Spaces) ---
+    USE_EMBEDDED_LLM: bool = False  # Set to True in .env for Docker/HF Spaces
+    GGUF_MODEL_PATH: str = "app/ml_assets/llama-3-8b-instruct.Q4_K_M.gguf"
+    LLM_CONTEXT_SIZE: int = 2048
+    OLLAMA_RETRIES: int = 3
+    OLLAMA_RETRY_DELAY_S: float = 2.0
+    # DistilBERT Text Emotion
+    DISTILBERT_MODEL: str = "bhadresh-savani/distilbert-base-uncased-emotion"
+    # Crisis Detection
+    CRISIS_THRESHOLD: float = 0.65
+    # Multimodal Fusion Weights (must sum to ~1.0)
+    TEXT_WEIGHT: float = 0.65
+    FACE_WEIGHT: float = 0.35
+    # Context Window
+    MAX_CONTEXT_TURNS: int = 10
+    # Logging
+    LOG_LEVEL: str = "INFO"
+    # Rate Limiting
+    RATE_LIMIT: str = "30/minute"
+    # Input Sanitization
+    MAX_INPUT_CHARS: int = 2000
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",        # Ignore unknown env vars (e.g. old GOOGLE_API_KEY)
+    )
+@lru_cache(maxsize=1)
+def get_settings() -> Settings:
+    """Returns a cached singleton Settings instance."""
+    return Settings()

app/main.py CHANGED Viewed

@@ -1,35 +1,163 @@
-import os
-from flask import Flask
-from flask_cors import CORS
-from dotenv import load_dotenv
-# Load environment variables (API Keys) from .env
-load_dotenv()
-# Import the 3 Endpoints
-from app.api.endpoints.facial import facial_bp
-from app.api.endpoints.remedies import remedies_bp
-from app.api.endpoints.therapist import therapist_bp
-def create_app():
-    app = Flask(__name__)
-    # Enable CORS so Frontend (port 5173) can talk to Backend (port 5000)
-    CORS(app)
-    # Register the Blueprints (The 3 features)
-    app.register_blueprint(facial_bp, url_prefix='/api')
-    app.register_blueprint(remedies_bp, url_prefix='/api')
-    app.register_blueprint(therapist_bp, url_prefix='/api')
     return app
-if __name__ == "__main__":
-    app = create_app()
-    print("🚀 PsyPredict Backend running on port 7860")
-    print("   - /api/predict/emotion [POST]")
-    print("   - /api/get_advice?condition=... [GET]")
-    print("   - /api/chat [POST]")
-    app.run(host="0.0.0.0", port=7860, debug=False)

+"""
+main.py — PsyPredict FastAPI Application (Production)
+Replaces Flask. Key features:
+  - Async request handling (FastAPI + Uvicorn)
+  - CORS middleware
+  - Rate limiting (SlowAPI)
+  - Structured logging (Python logging)
+  - Startup model pre-warming
+  - Graceful shutdown (Ollama client cleanup)
+  - FastAPI auto docs at /docs (Swagger) and /redoc
+"""
+from __future__ import annotations
+import logging
+import sys
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.errors import RateLimitExceeded
+from slowapi.util import get_remote_address
+from app.config import get_settings
+from app.api.endpoints.facial import router as facial_router
+from app.api.endpoints.remedies import router as remedies_router
+from app.api.endpoints.therapist import router as therapist_router
+from app.api.endpoints.analysis import router as analysis_router
+settings = get_settings()
+# ---------------------------------------------------------------------------
+# Logging
+# ---------------------------------------------------------------------------
+logging.basicConfig(
+    level=getattr(logging, settings.LOG_LEVEL, logging.INFO),
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    handlers=[logging.StreamHandler(sys.stdout)],
+)
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Rate Limiter
+# ---------------------------------------------------------------------------
+limiter = Limiter(key_func=get_remote_address, default_limits=[settings.RATE_LIMIT])
+# ---------------------------------------------------------------------------
+# Lifespan (startup / shutdown events)
+# ---------------------------------------------------------------------------
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Startup: pre-warm models (DistilBERT + Crisis classifier).
+    Shutdown: close Ollama async client.
+    """
+    logger.info("═══════════════════════════════════════")
+    logger.info("🚀 PsyPredict v2.0 — Production Backend")
+    logger.info("═══════════════════════════════════════")
+    logger.info("Config: Ollama=%s model=%s", settings.OLLAMA_BASE_URL, settings.OLLAMA_MODEL)
+    # Pre-warm DistilBERT text emotion model
+    logger.info("Pre-warming DistilBERT text emotion model...")
+    from app.services.text_emotion_engine import initialize as init_text
+    init_text(settings.DISTILBERT_MODEL)
+    # Pre-warm Crisis zero-shot classifier
+    logger.info("Pre-warming crisis detection classifier...")
+    from app.services.crisis_engine import initialize_crisis_classifier
+    initialize_crisis_classifier()
+    # Check Ollama availability (non-blocking warn only)
+    from app.services.ollama_engine import ollama_engine
+    reachable = await ollama_engine.is_reachable()
+    if reachable:
+        logger.info("✅ Ollama reachable at %s (model: %s)", settings.OLLAMA_BASE_URL, settings.OLLAMA_MODEL)
+    else:
+        logger.warning(
+            "⚠️  Ollama NOT reachable at %s — chat will return fallback responses. "
+            "Run: ollama serve && ollama pull %s",
+            settings.OLLAMA_BASE_URL,
+            settings.OLLAMA_MODEL,
+        )
+    logger.info("✅ Startup complete. Listening on port 7860.")
+    logger.info("   Docs: http://localhost:7860/docs")
+    logger.info("═══════════════════════════════════════")
+    yield  # ── Application Running ──
+    logger.info("Shutting down PsyPredict backend...")
+    await ollama_engine.close()
+    logger.info("Goodbye.")
+# ---------------------------------------------------------------------------
+# FastAPI App
+# ---------------------------------------------------------------------------
+def create_app() -> FastAPI:
+    app = FastAPI(
+        title="PsyPredict API",
+        description=(
+            "Production-grade multimodal mental health AI system. "
+            "Powered by LLaMA 3 (Ollama) + DistilBERT + Keras CNN facial emotion model."
+        ),
+        version="2.0.0",
+        lifespan=lifespan,
+        docs_url="/docs",
+        redoc_url="/redoc",
+    )
+    # ── Rate Limiter ─────────────────────────────────────────────────────────
+    app.state.limiter = limiter
+    app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+    # ── CORS ────────────────────────────────────────────────────────────────
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],          # Tighten to specific origin in production
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    # ── Global Exception Handler ─────────────────────────────────────────────
+    @app.exception_handler(Exception)
+    async def global_exception_handler(request: Request, exc: Exception):
+        logger.error("Unhandled exception: %s | path=%s", exc, request.url.path)
+        return JSONResponse(
+            status_code=500,
+            content={"detail": "Internal server error. Please try again."},
+        )
+    # ── Routers ──────────────────────────────────────────────────────────────
+    app.include_router(facial_router, prefix="/api", tags=["Facial Emotion"])
+    app.include_router(remedies_router, prefix="/api", tags=["Remedies"])
+    app.include_router(therapist_router, prefix="/api", tags=["AI Therapist"])
+    app.include_router(analysis_router, prefix="/api", tags=["Text Analysis & Health"])
     return app
+app = create_app()
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "app.main:app",
+        host="0.0.0.0",
+        port=7860,
+        reload=False,
+        log_level=settings.LOG_LEVEL.lower(),
+        workers=1,  # Keep at 1: models are singletons loaded in memory
+    )

app/schemas.py ADDED Viewed

	@@ -0,0 +1,199 @@

+"""
+schemas.py — PsyPredict Pydantic Data Models
+All request/response bodies are validated via these schemas.
+No unstructured dicts pass through the API layer.
+"""
+from __future__ import annotations
+from typing import List, Optional, Any, Dict
+from enum import Enum
+from pydantic import BaseModel, Field, field_validator
+import re
+# ---------------------------------------------------------------------------
+# Enums
+# ---------------------------------------------------------------------------
+class RiskLevel(str, Enum):
+    MINIMAL = "MINIMAL"
+    LOW = "LOW"
+    MODERATE = "MODERATE"
+    HIGH = "HIGH"
+    CRITICAL = "CRITICAL"
+class MessageRole(str, Enum):
+    USER = "user"
+    ASSISTANT = "assistant"
+# ---------------------------------------------------------------------------
+# Shared Sub-models
+# ---------------------------------------------------------------------------
+class ConversationMessage(BaseModel):
+    role: MessageRole
+    content: str
+class EmotionLabel(BaseModel):
+    label: str
+    score: float = Field(ge=0.0, le=1.0)
+class CrisisResource(BaseModel):
+    name: str
+    contact: str
+    available: str = "24/7"
+# ---------------------------------------------------------------------------
+# PsychReport — Core Structured Output
+# ---------------------------------------------------------------------------
+class PsychReport(BaseModel):
+    """
+    Structured psychological assessment output.
+    Produced by the LLM layer and validated against this schema.
+    """
+    risk_classification: RiskLevel = Field(
+        description="Overall risk level based on text + multimodal fusion"
+    )
+    emotional_state_summary: str = Field(
+        description="Concise summary of detected emotional state (1-2 sentences)"
+    )
+    behavioral_inference: str = Field(
+        description="Inferred behavioral patterns from the conversation"
+    )
+    cognitive_distortions: List[str] = Field(
+        default_factory=list,
+        description="List of detected cognitive distortions (e.g. catastrophizing, black-and-white thinking)"
+    )
+    suggested_interventions: List[str] = Field(
+        default_factory=list,
+        description="Clinical-style intervention suggestions"
+    )
+    confidence_score: float = Field(
+        ge=0.0, le=1.0,
+        description="Aggregate confidence of this assessment (0.0–1.0)"
+    )
+    crisis_triggered: bool = Field(
+        default=False,
+        description="True if crisis override layer activated"
+    )
+    crisis_resources: Optional[List[CrisisResource]] = Field(
+        default=None,
+        description="Emergency resources, populated only when crisis_triggered=True"
+    )
+    service_degraded: bool = Field(
+        default=False,
+        description="True if Ollama was unreachable and fallback was used"
+    )
+# ---------------------------------------------------------------------------
+# Fallback Report (used when Ollama is unavailable)
+# ---------------------------------------------------------------------------
+def fallback_report() -> PsychReport:
+    return PsychReport(
+        risk_classification=RiskLevel.MINIMAL,
+        emotional_state_summary="Assessment unavailable — inference service is currently offline.",
+        behavioral_inference="Unable to infer behavioral patterns at this time.",
+        cognitive_distortions=[],
+        suggested_interventions=["Please try again shortly."],
+        confidence_score=0.0,
+        crisis_triggered=False,
+        service_degraded=True,
+    )
+# ---------------------------------------------------------------------------
+# Chat Endpoint
+# ---------------------------------------------------------------------------
+class ChatRequest(BaseModel):
+    message: str = Field(min_length=1, max_length=2000)
+    emotion: Optional[str] = Field(default="neutral", description="Face emotion from webcam")
+    history: List[ConversationMessage] = Field(default_factory=list)
+    stream: bool = Field(default=False, description="Enable streaming response")
+    @field_validator("message")
+    @classmethod
+    def sanitize_message(cls, v: str) -> str:
+        # Strip HTML tags
+        v = re.sub(r"<[^>]+>", "", v)
+        # Collapse whitespace
+        v = " ".join(v.split())
+        return v.strip()
+    @field_validator("emotion")
+    @classmethod
+    def normalize_emotion(cls, v: str) -> str:
+        return v.lower().strip() if v else "neutral"
+class ChatResponse(BaseModel):
+    response: str = Field(description="Conversational reply text")
+    report: PsychReport
+    text_emotion: Optional[List[EmotionLabel]] = None
+    fusion_risk_score: Optional[float] = None
+    remedy: Optional[RemedyResponse] = None  # CSV-based remedy data, populated automatically
+# ---------------------------------------------------------------------------
+# Text Analysis Endpoint
+# ---------------------------------------------------------------------------
+class TextAnalysisRequest(BaseModel):
+    text: str = Field(min_length=1, max_length=2000)
+    @field_validator("text")
+    @classmethod
+    def sanitize_text(cls, v: str) -> str:
+        v = re.sub(r"<[^>]+>", "", v)
+        return " ".join(v.split()).strip()
+class TextAnalysisResponse(BaseModel):
+    emotions: List[EmotionLabel]
+    dominant: str
+    crisis_risk: float = Field(ge=0.0, le=1.0)
+    crisis_triggered: bool
+# ---------------------------------------------------------------------------
+# Facial / Emotion Endpoint
+# ---------------------------------------------------------------------------
+class EmotionResponse(BaseModel):
+    emotion: Optional[str] = None
+    confidence: Optional[float] = None
+    face_box: Optional[List[int]] = None
+    message: Optional[str] = None
+    error: Optional[str] = None
+# ---------------------------------------------------------------------------
+# Remedy Endpoint
+# ---------------------------------------------------------------------------
+class RemedyResponse(BaseModel):
+    condition: str
+    symptoms: str
+    treatments: str
+    medications: str
+    dosage: str
+    gita_remedy: str
+# ---------------------------------------------------------------------------
+# Health Endpoint
+# ---------------------------------------------------------------------------
+class HealthResponse(BaseModel):
+    status: str
+    ollama_reachable: bool
+    ollama_model: str
+    distilbert_loaded: bool
+    version: str = "2.0.0"

app/services/__init__.py ADDED Viewed

File without changes

app/services/crisis_engine.py ADDED Viewed

	@@ -0,0 +1,187 @@

+"""
+crisis_engine.py — PsyPredict Crisis Detection Layer
+Uses DistilBERT zero-shot classification (NOT keyword matching).
+Weighted risk scoring across mental health risk dimensions.
+Triggers override of LLM output when threshold exceeded.
+This layer is the safety net — it runs BEFORE and OVERRIDES the LLM.
+"""
+from __future__ import annotations
+import asyncio
+import logging
+from typing import List, Optional
+from app.schemas import CrisisResource, PsychReport, RiskLevel
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Risk Labels + Weights (tuned empirically)
+# ---------------------------------------------------------------------------
+RISK_LABELS: list[str] = [
+    "suicidal ideation",
+    "self-harm intent",
+    "immediate danger to self",
+    "severe mental breakdown",
+    "hopelessness and worthlessness",
+]
+RISK_WEIGHTS: dict[str, float] = {
+    "suicidal ideation": 1.0,
+    "self-harm intent": 1.0,
+    "immediate danger to self": 0.95,
+    "severe mental breakdown": 0.60,
+    "hopelessness and worthlessness": 0.50,
+}
+# ---------------------------------------------------------------------------
+# Crisis Resources (India + International)
+# ---------------------------------------------------------------------------
+CRISIS_RESOURCES: List[CrisisResource] = [
+    CrisisResource(name="iCall (India)", contact="9152987821", available="Mon–Sat 8am–10pm"),
+    CrisisResource(name="Vandrevala Foundation (India)", contact="1860-2662-345", available="24/7"),
+    CrisisResource(name="AASRA (India)", contact="9820466627", available="24/7"),
+    CrisisResource(name="Befrienders Worldwide", contact="https://www.befrienders.org", available="24/7"),
+    CrisisResource(name="Crisis Text Line (US/UK)", contact="Text HOME to 741741", available="24/7"),
+]
+# ---------------------------------------------------------------------------
+# Zero-Shot Classifier
+# ---------------------------------------------------------------------------
+_zero_shot_pipeline = None
+_load_error: Optional[str] = None
+def initialize_crisis_classifier() -> None:
+    """
+    Load MiniLM zero-shot classifier at startup.
+    Uses cross-encoder/nli-MiniLM2-L6-H768 — lightweight, fast.
+    """
+    global _zero_shot_pipeline, _load_error
+    try:
+        from transformers import pipeline as hf_pipeline
+        logger.info("Loading crisis zero-shot classifier...")
+        _zero_shot_pipeline = hf_pipeline(
+            "zero-shot-classification",
+            model="cross-encoder/nli-MiniLM2-L6-H768",
+            device=-1,  # CPU
+        )
+        logger.info("✅ Crisis classifier loaded.")
+    except Exception as exc:
+        _load_error = str(exc)
+        logger.error("❌ Crisis classifier load failed: %s", exc)
+def _score_sync(text: str) -> float:
+    """
+    Synchronous zero-shot scoring. Runs in thread pool.
+    Returns weighted crisis risk score in [0, 1].
+    """
+    if _zero_shot_pipeline is None:
+        # Fallback: basic substring check for true emergencies only
+        return _fallback_score(text)
+    try:
+        result = _zero_shot_pipeline(
+            text[:512],
+            candidate_labels=RISK_LABELS,
+            multi_label=True,
+        )
+        label_scores: dict[str, float] = dict(
+            zip(result["labels"], result["scores"])
+        )
+        # Weighted sum, normalized to [0, 1]
+        total_weight = sum(RISK_WEIGHTS.values())
+        weighted_sum = sum(
+            label_scores.get(lbl, 0.0) * RISK_WEIGHTS[lbl]
+            for lbl in RISK_LABELS
+        )
+        return min(weighted_sum / total_weight, 1.0)
+    except Exception as exc:
+        logger.error("Crisis scoring error: %s", exc)
+        return _fallback_score(text)
+def _fallback_score(text: str) -> float:
+    """
+    Hard fallback: only fires on unambiguous semantic signals.
+    This is distinct from keyword matching — uses phrase-level context.
+    """
+    HIGH_RISK_PHRASES = [
+        "want to die", "kill myself", "end my life", "hurt myself",
+        "suicide", "self harm", "self-harm", "no reason to live",
+        "don't want to exist", "cannot go on", "take my life",
+    ]
+    t = text.lower()
+    hits = sum(1 for phrase in HIGH_RISK_PHRASES if phrase in t)
+    return min(hits * 0.35, 1.0)
+class CrisisEngine:
+    """
+    Evaluates crisis risk from user text.
+    Must be called before LLM generation.
+    If triggered, returns a deterministic PsychReport override.
+    """
+    def __init__(self, threshold: float = 0.65) -> None:
+        self.threshold = threshold
+    async def evaluate(self, text: str) -> tuple[float, bool]:
+        """
+        Returns (risk_score, crisis_triggered).
+        Runs synchronous model in thread pool.
+        """
+        score = await asyncio.to_thread(_score_sync, text)
+        triggered = score >= self.threshold
+        if triggered:
+            logger.warning(
+                "CRISIS TRIGGERED �� risk_score=%.3f text=%r",
+                score,
+                text[:100],
+            )
+        return score, triggered
+    def build_crisis_report(self, risk_score: float) -> tuple[str, PsychReport]:
+        """
+        Returns deterministic crisis reply + PsychReport.
+        Does NOT involve the LLM.
+        """
+        reply = (
+            "I hear that you're going through something very serious right now. "
+            "Please reach out to a crisis support line immediately — "
+            "you don't have to face this alone."
+        )
+        report = PsychReport(
+            risk_classification=RiskLevel.CRITICAL,
+            emotional_state_summary=(
+                "Severe psychological distress detected. Indicators of self-harm "
+                "or suicidal ideation are present."
+            ),
+            behavioral_inference=(
+                "User's expressed content suggests acute crisis state. "
+                "Immediate professional intervention is warranted."
+            ),
+            cognitive_distortions=["Hopelessness", "All-or-nothing thinking"],
+            suggested_interventions=[
+                "Immediate contact with a mental health crisis line.",
+                "Notify a trusted person or emergency services if in immediate danger.",
+                "Seek in-person emergency psychiatric evaluation.",
+            ],
+            confidence_score=round(risk_score, 3),
+            crisis_triggered=True,
+            crisis_resources=CRISIS_RESOURCES,
+            service_degraded=False,
+        )
+        return reply, report
+    @property
+    def is_loaded(self) -> bool:
+        return _zero_shot_pipeline is not None
+# Singleton
+crisis_engine = CrisisEngine()

app/services/fusion_engine.py ADDED Viewed

	@@ -0,0 +1,144 @@

+"""
+fusion_engine.py — PsyPredict Multimodal Weighted Fusion Engine
+Combines text emotion score + face emotion score → final risk score.
+Weights are configurable via app config (TEXT_WEIGHT, FACE_WEIGHT).
+Speech modality placeholder included for future expansion.
+"""
+from __future__ import annotations
+import logging
+from dataclasses import dataclass
+from typing import Optional
+from app.config import get_settings
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Face emotion → distress score mapping
+# Calibrated: fear/sadness = high distress, happy = minimal distress
+# ---------------------------------------------------------------------------
+FACE_DISTRESS_SCORES: dict[str, float] = {
+    "fear": 0.80,
+    "sad": 0.70,
+    "angry": 0.50,
+    "disgust": 0.40,
+    "surprised": 0.30,
+    "neutral": 0.20,
+    "happy": 0.05,
+}
+# DistilBERT emotion labels → distress scores
+TEXT_EMOTION_DISTRESS_SCORES: dict[str, float] = {
+    "sadness": 0.85,
+    "fear": 0.80,
+    "anger": 0.60,
+    "disgust": 0.50,
+    "surprise": 0.30,
+    "joy": 0.05,
+    "love": 0.05,
+    "neutral": 0.20,
+}
+@dataclass
+class FusionResult:
+    """Result of multimodal fusion scoring."""
+    final_risk_score: float        # 0.0–1.0 weighted combined score
+    text_score: float              # Raw text distress score
+    face_score: float              # Raw face distress score
+    speech_score: Optional[float]  # Placeholder — always None for now
+    dominant_modality: str         # "text" | "face" | "balanced"
+    text_weight: float
+    face_weight: float
+class FusionEngine:
+    """
+    Computes the weighted multimodal risk score.
+    Formula:
+        final_risk_score = (TEXT_WEIGHT * text_distress) + (FACE_WEIGHT * face_distress)
+    Weights are loaded from app config at runtime.
+    """
+    def __init__(self) -> None:
+        self.settings = get_settings()
+    def _text_distress(self, dominant_text_emotion: str) -> float:
+        """Map dominant text emotion label → distress score."""
+        return TEXT_EMOTION_DISTRESS_SCORES.get(
+            dominant_text_emotion.lower(), 0.20
+        )
+    def _face_distress(self, face_emotion: str) -> float:
+        """Map face emotion label → distress score."""
+        return FACE_DISTRESS_SCORES.get(face_emotion.lower(), 0.20)
+    def compute(
+        self,
+        dominant_text_emotion: str,
+        face_emotion: str,
+        speech_score: Optional[float] = None,  # Future: speech sentiment
+    ) -> FusionResult:
+        """
+        Compute weighted fusion score from available modalities.
+        Args:
+            dominant_text_emotion: Top emotion from DistilBERT (e.g. "sadness")
+            face_emotion: Detected face emotion from Keras CNN (e.g. "sad")
+            speech_score: Optional speech distress score (0.0–1.0)
+        Returns:
+            FusionResult with final weighted score and per-modality breakdown
+        """
+        tw = self.settings.TEXT_WEIGHT
+        fw = self.settings.FACE_WEIGHT
+        text_score = self._text_distress(dominant_text_emotion)
+        face_score = self._face_distress(face_emotion)
+        # If speech is provided in future, re-normalize weights
+        if speech_score is not None:
+            speech_weight = 1.0 - tw - fw
+            if speech_weight > 0:
+                final = (tw * text_score) + (fw * face_score) + (speech_weight * speech_score)
+            else:
+                final = (tw * text_score) + (fw * face_score)
+        else:
+            # Normalize text + face weights to sum to 1.0
+            total = tw + fw
+            final = ((tw / total) * text_score) + ((fw / total) * face_score)
+        final = round(min(max(final, 0.0), 1.0), 4)
+        # Determine dominant modality
+        if abs(text_score - face_score) < 0.10:
+            dominant = "balanced"
+        elif text_score > face_score:
+            dominant = "text"
+        else:
+            dominant = "face"
+        logger.debug(
+            "Fusion: text_emotion=%s(%.2f) face_emotion=%s(%.2f) → final=%.4f dominant=%s",
+            dominant_text_emotion, text_score,
+            face_emotion, face_score,
+            final, dominant,
+        )
+        return FusionResult(
+            final_risk_score=final,
+            text_score=text_score,
+            face_score=face_score,
+            speech_score=speech_score,
+            dominant_modality=dominant,
+            text_weight=tw,
+            face_weight=fw,
+        )
+# Singleton
+fusion_engine = FusionEngine()

app/services/ollama_engine.py ADDED Viewed

	@@ -0,0 +1,476 @@

+"""
+ollama_engine.py — PsyPredict Local LLM Engine
+Async Ollama client with:
+  - Structured JSON output enforced via schema-in-prompt + Ollama format param
+  - Context window trimming
+  - Retry with exponential backoff
+  - Graceful fallback on Ollama unreachability
+  - Streaming support
+  - Zero external API dependency
+"""
+from __future__ import annotations
+import asyncio
+import json
+import logging
+import os
+import time
+from typing import AsyncIterator, List, Optional
+import httpx
+from app.config import get_settings
+from app.schemas import (
+    ConversationMessage,
+    PsychReport,
+    RiskLevel,
+    fallback_report,
+)
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# System Prompt — Deterministic, clinical, no filler
+# ---------------------------------------------------------------------------
+SYSTEM_PROMPT = """You are a compassionate clinical AI therapist integrated into PsyPredict, a mental health platform.
+Your role is twofold:
+1. Respond as a warm, empathetic therapist — never robotic, never dismissive.
+2. Provide a structured backend psychological assessment in JSON format.
+== CONVERSATIONAL RESPONSE RULES ==
+- ALWAYS give a full, thoughtful, empathetic response FIRST (before the JSON block).
+- Responses must be at least 3-5 sentences. Never one-liners.
+- Validate the user's feelings. Reflect back what they shared. Show you truly listened.
+- Do NOT start with "I'm here to help" or generic openers. Be specific to what they said.
+- Use warm, humanizing language. Be like a therapist who genuinely cares, not a support chatbot.
+- If the situation involves trauma, grief, betrayal, or crisis — respond with appropriate gravity and compassion.
+- Suggest one concrete, actionable step at the end of your reply.
+- Do NOT mention the JSON block, schema, or any technical terms in your reply.
+== JSON ASSESSMENT RULES ==
+After your conversational response, add the marker: ---JSON---
+Then provide the PsychReport JSON.
+1. Output ONLY valid JSON conforming exactly to the PsychReport schema below.
+2. Do NOT fabricate clinical diagnoses. Infer only from the evidence provided.
+3. cognitive_distortions must reference recognized CBT distortion labels only.
+4. suggested_interventions must be concrete and clinically actionable.
+5. confidence_score reflects YOUR confidence in this assessment (0.0 to 1.0).
+6. crisis_triggered MUST be false — crisis detection is handled by a separate layer.
+7. service_degraded MUST be false.
+PSYCH_REPORT_SCHEMA:
+{
+  "risk_classification": "<MINIMAL|LOW|MODERATE|HIGH|CRITICAL>",
+  "emotional_state_summary": "<string>",
+  "behavioral_inference": "<string>",
+  "cognitive_distortions": ["<string>", ...],
+  "suggested_interventions": ["<string>", ...],
+  "confidence_score": <float 0.0-1.0>,
+  "crisis_triggered": false,
+  "crisis_resources": null,
+  "service_degraded": false
+}
+Output format:
+<Your full, empathetic therapist response here — 3-5 sentences minimum>
+---JSON---
+{ ...psych report json... }
+"""
+# ---------------------------------------------------------------------------
+# FACE → DISTRESS SCORE mapping (calibrated, not heuristic)
+# ---------------------------------------------------------------------------
+FACE_DISTRESS_MAP: dict[str, float] = {
+    "fear": 0.80,
+    "sad": 0.70,
+    "angry": 0.50,
+    "disgust": 0.40,
+    "surprised": 0.30,
+    "neutral": 0.20,
+    "happy": 0.05,
+}
+class OllamaEngine:
+    """
+    Production async LLM engine backed by local Ollama/LLaMA 3.
+    """
+    def __init__(self) -> None:
+        self.settings = get_settings()
+        self._client: Optional[httpx.AsyncClient] = None
+        self._local_llm: Optional[any] = None  # llama_cpp.Llama instance
+    @property
+    def client(self) -> httpx.AsyncClient:
+        if self._client is None or self._client.is_closed:
+            self._client = httpx.AsyncClient(
+                base_url=self.settings.OLLAMA_BASE_URL,
+                timeout=httpx.Timeout(
+                    connect=10.0,
+                    read=self.settings.OLLAMA_TIMEOUT_S,
+                    write=30.0,
+                    pool=5.0,
+                ),
+            )
+        return self._client
+    def _get_local_llm(self):
+        """Lazy load llama-cpp-python model."""
+        if self._local_llm is None:
+            try:
+                from llama_cpp import Llama
+                logger.info("Loading local GGUF model from %s", self.settings.GGUF_MODEL_PATH)
+                self._local_llm = Llama(
+                    model_path=self.settings.GGUF_MODEL_PATH,
+                    n_ctx=self.settings.LLM_CONTEXT_SIZE,
+                    n_threads=os.cpu_count() or 4,
+                    verbose=False
+                )
+            except ImportError:
+                logger.error("llama-cpp-python not installed. Cannot use embedded LLM.")
+                raise RuntimeError("llama-cpp-python not installed")
+            except Exception as exc:
+                logger.error("Failed to load local GGUF model: %s", exc)
+                raise
+        return self._local_llm
+    async def close(self) -> None:
+        if self._client and not self._client.is_closed:
+            await self._client.aclose()
+    # ------------------------------------------------------------------
+    # Health Check
+    # ------------------------------------------------------------------
+    async def is_reachable(self) -> bool:
+        """Returns True if Ollama API is reachable."""
+        try:
+            resp = await self.client.get("/api/tags", timeout=5.0)
+            return resp.status_code == 200
+        except Exception:
+            return False
+    # ------------------------------------------------------------------
+    # Context Window Trimming
+    # ------------------------------------------------------------------
+    def _trim_history(
+        self, history: List[ConversationMessage]
+    ) -> List[ConversationMessage]:
+        """Keep the last MAX_CONTEXT_TURNS message pairs."""
+        max_turns = self.settings.MAX_CONTEXT_TURNS
+        if len(history) <= max_turns * 2:
+            return history
+        return history[-(max_turns * 2):]
+    # ------------------------------------------------------------------
+    # Prompt Builder
+    # ------------------------------------------------------------------
+    def _build_prompt(
+        self,
+        user_text: str,
+        face_emotion: str,
+        history: List[ConversationMessage],
+        text_emotion_summary: Optional[str] = None,
+    ) -> str:
+        trimmed = self._trim_history(history)
+        history_block = "\n".join(
+            f"[{msg.role.upper()}]: {msg.content}" for msg in trimmed
+        )
+        face_distress = FACE_DISTRESS_MAP.get(face_emotion.lower(), 0.20)
+        multimodal_ctx = (
+            f"MULTIMODAL CONTEXT:\n"
+            f"  Face emotion (webcam): {face_emotion} (distress score: {face_distress:.2f})\n"
+        )
+        if text_emotion_summary:
+            multimodal_ctx += f"  Text emotion (DistilBERT): {text_emotion_summary}\n"
+        return (
+            f"{SYSTEM_PROMPT}\n\n"
+            f"CONVERSATION HISTORY:\n{history_block}\n\n"
+            f"{multimodal_ctx}\n"
+            f"CURRENT USER INPUT:\n{user_text}\n\n"
+            "ASSISTANT:"
+        )
+    # ------------------------------------------------------------------
+    # Parse LLM Output → (reply_text, PsychReport)
+    # ------------------------------------------------------------------
+    def _parse_response(self, raw: str) -> tuple[str, PsychReport]:
+        """
+        Split on ---JSON--- marker and validate the JSON block.
+        Returns (conversational_reply, PsychReport).
+        """
+        marker = "---JSON---"
+        if marker in raw:
+            parts = raw.split(marker, 1)
+            reply_text = parts[0].strip()
+            json_block = parts[1].strip()
+        else:
+            # Try to find JSON object in the raw output
+            reply_text = ""
+            json_block = raw.strip()
+        # Extract JSON object (handle markdown code fences)
+        if json_block.startswith("```"):
+            lines = json_block.split("\n")
+            json_block = "\n".join(
+                l for l in lines if not l.startswith("```")
+            ).strip()
+        try:
+            data = json.loads(json_block)
+            report = PsychReport(**data)
+        except (json.JSONDecodeError, ValueError, KeyError) as exc:
+            logger.warning(
+                "Failed to parse PsychReport from LLM output: %s | raw=%r",
+                exc,
+                json_block[:500],
+            )
+            # Return partial fallback
+            report = fallback_report()
+            if not reply_text:
+                reply_text = raw.strip()
+        return reply_text, report
+    # ------------------------------------------------------------------
+    # Generate (non-streaming)
+    # ------------------------------------------------------------------
+    async def generate(
+        self,
+        user_text: str,
+        face_emotion: str = "neutral",
+        history: Optional[List[ConversationMessage]] = None,
+        text_emotion_summary: Optional[str] = None,
+    ) -> tuple[str, PsychReport]:
+        """
+        Calls either Ollama API or Embedded LLM based on settings,
+        with automatic fallback to local if Ollama is unreachable.
+        """
+        # If user explicitly wants embedded mode
+        if self.settings.USE_EMBEDDED_LLM:
+            return await self._generate_local(user_text, face_emotion, history, text_emotion_summary)
+        # Otherwise try Ollama, fallback to local if it fails and GGUF is available
+        try:
+            reply, report = await self._generate_ollama(user_text, face_emotion, history, text_emotion_summary)
+            # If _generate_ollama returned the hardcoded fallback string, it failed its retries
+            if "inference service is temporarily unavailable" in reply:
+                raise ConnectionError("Ollama service unreachable after retries.")
+            return reply, report
+        except Exception as exc:
+            import os
+            if os.path.exists(self.settings.GGUF_MODEL_PATH):
+                logger.info("Ollama failed, falling back to embedded GGUF model: %s", exc)
+                return await self._generate_local(user_text, face_emotion, history, text_emotion_summary)
+            else:
+                logger.error("Ollama failed and no GGUF model found for fallback at %s", self.settings.GGUF_MODEL_PATH)
+                return (
+                    "The inference service is temporarily unavailable and no local fallback is configured.",
+                    fallback_report(),
+                )
+    async def _generate_local(
+        self,
+        user_text: str,
+        face_emotion: str,
+        history: Optional[List[ConversationMessage]],
+        text_emotion_summary: Optional[str]
+    ) -> tuple[str, PsychReport]:
+        """Embedded generation via llama-cpp-python."""
+        if history is None: history = []
+        prompt = self._build_prompt(user_text, face_emotion, history, text_emotion_summary)
+        try:
+            llm = self._get_local_llm()
+            # Run blocking LLM call in a separate thread
+            response = await asyncio.to_thread(
+                llm,
+                prompt=prompt,
+                max_tokens=600,
+                temperature=0.2,
+                top_p=0.9,
+                stop=["USER:", "CURRENT USER INPUT:"]
+            )
+            raw_text = response["choices"][0]["text"]
+            return self._parse_response(raw_text)
+        except Exception as exc:
+            logger.error("Embedded local LLM failed: %s", exc)
+            return "The local inference service encountered an error.", fallback_report()
+    async def _generate_ollama(
+        self,
+        user_text: str,
+        face_emotion: str,
+        history: Optional[List[ConversationMessage]],
+        text_emotion_summary: Optional[str]
+    ) -> tuple[str, PsychReport]:
+        """Existing Ollama HTTP logic."""
+        if history is None: history = []
+        prompt = self._build_prompt(user_text, face_emotion, history, text_emotion_summary)
+        payload = {
+            "model": self.settings.OLLAMA_MODEL,
+            "prompt": prompt,
+            "stream": False,
+            "options": {
+                "temperature": 0.2,      # Low temp for determinism
+                "top_p": 0.9,
+                "num_ctx": 4096,
+                "stop": [],
+            },
+        }
+        last_error: Optional[Exception] = None
+        delay = self.settings.OLLAMA_RETRY_DELAY_S
+        for attempt in range(1, self.settings.OLLAMA_RETRIES + 1):
+            try:
+                logger.info(
+                    "Ollama generate attempt %d/%d",
+                    attempt,
+                    self.settings.OLLAMA_RETRIES,
+                )
+                resp = await self.client.post("/api/generate", json=payload)
+                resp.raise_for_status()
+                data = resp.json()
+                raw_text: str = data.get("response", "")
+                reply, report = self._parse_response(raw_text)
+                return reply, report
+            except httpx.TimeoutException as exc:
+                last_error = exc
+                logger.warning("Ollama timeout on attempt %d: %s", attempt, exc)
+            except httpx.HTTPStatusError as exc:
+                last_error = exc
+                logger.error("Ollama HTTP error %s: %s", exc.response.status_code, exc)
+                break  # Non-retryable HTTP error
+            except Exception as exc:
+                last_error = exc
+                logger.error("Ollama unexpected error: %s", exc)
+            if attempt < self.settings.OLLAMA_RETRIES:
+                await asyncio.sleep(delay)
+                delay *= 2  # Exponential backoff
+        logger.error(
+            "All Ollama attempts failed. Returning fallback. Last error: %s",
+            last_error,
+        )
+        return (
+            "The inference service is temporarily unavailable. Please try again shortly.",
+            fallback_report(),
+        )
+    # ------------------------------------------------------------------
+    # Generate (streaming)
+    # ------------------------------------------------------------------
+    async def generate_stream(
+        self,
+        user_text: str,
+        face_emotion: str = "neutral",
+        history: Optional[List[ConversationMessage]] = None,
+        text_emotion_summary: Optional[str] = None,
+    ) -> AsyncIterator[str]:
+        """
+        Yields raw text chunks as they arrive from either Ollama or Embedded LLM.
+        """
+        if self.settings.USE_EMBEDDED_LLM:
+            async for chunk in self._generate_stream_local(user_text, face_emotion, history, text_emotion_summary):
+                yield chunk
+        else:
+            async for chunk in self._generate_stream_ollama(user_text, face_emotion, history, text_emotion_summary):
+                yield chunk
+    async def _generate_stream_local(
+        self,
+        user_text: str,
+        face_emotion: str,
+        history: Optional[List[ConversationMessage]],
+        text_emotion_summary: Optional[str]
+    ) -> AsyncIterator[str]:
+        """Embedded streaming via llama-cpp-python."""
+        if history is None: history = []
+        prompt = self._build_prompt(user_text, face_emotion, history, text_emotion_summary)
+        try:
+            llm = self._get_local_llm()
+            # llama-cpp-python streaming is synchronous, so we need to wrap it
+            stream = llm(
+                prompt=prompt,
+                max_tokens=600,
+                temperature=0.2,
+                top_p=0.9,
+                stream=True,
+                stop=["USER:", "CURRENT USER INPUT:"]
+            )
+            for chunk in stream:
+                token = chunk["choices"][0]["text"]
+                if token:
+                    yield token
+                await asyncio.sleep(0) # Yield control
+        except Exception as exc:
+            logger.error("Embedded streaming failed: %s", exc)
+            yield "\n[Local inference error]"
+    async def _generate_stream_ollama(
+        self,
+        user_text: str,
+        face_emotion: str,
+        history: Optional[List[ConversationMessage]],
+        text_emotion_summary: Optional[str]
+    ) -> AsyncIterator[str]:
+        """
+        Yields raw text chunks as they arrive from Ollama.
+        The full accumulated response is NOT parsed into PsychReport here;
+        caller must buffer and parse at end.
+        """
+        if history is None:
+            history = []
+        prompt = self._build_prompt(user_text, face_emotion, history, text_emotion_summary)
+        payload = {
+            "model": self.settings.OLLAMA_MODEL,
+            "prompt": prompt,
+            "stream": True,
+            "options": {"temperature": 0.2, "top_p": 0.9, "num_ctx": 4096},
+        }
+        try:
+            async with self.client.stream("POST", "/api/generate", json=payload) as resp:
+                resp.raise_for_status()
+                async for line in resp.aiter_lines():
+                    if not line.strip():
+                        continue
+                    try:
+                        chunk = json.loads(line)
+                        token = chunk.get("response", "")
+                        if token:
+                            yield token
+                        if chunk.get("done"):
+                            break
+                    except json.JSONDecodeError:
+                        continue
+        except Exception as exc:
+            logger.error("Ollama streaming failed: %s", exc)
+            yield "\n[Inference service error — please retry]\n"
+# ---------------------------------------------------------------------------
+# Singleton
+# ---------------------------------------------------------------------------
+ollama_engine = OllamaEngine()

app/services/text_emotion_engine.py ADDED Viewed

	@@ -0,0 +1,100 @@

+"""
+text_emotion_engine.py — DistilBERT Multi-Label Text Emotion Classifier
+Uses: bhadresh-savani/distilbert-base-uncased-emotion
+Output: top-N emotions with calibrated confidence scores.
+Runs inference in asyncio.to_thread to avoid blocking the event loop.
+"""
+from __future__ import annotations
+import asyncio
+import logging
+from typing import List, Optional
+from app.schemas import EmotionLabel
+logger = logging.getLogger(__name__)
+_pipeline = None
+_load_error: Optional[str] = None
+def _load_pipeline(model_name: str) -> None:
+    """Called once at startup. Loads the HuggingFace pipeline into global."""
+    global _pipeline, _load_error
+    try:
+        from transformers import pipeline as hf_pipeline
+        logger.info("Loading DistilBERT text emotion model: %s", model_name)
+        _pipeline = hf_pipeline(
+            "text-classification",
+            model=model_name,
+            top_k=None,           # Return ALL labels
+            truncation=True,
+            max_length=512,
+        )
+        logger.info("✅ DistilBERT emotion model loaded successfully.")
+    except Exception as exc:
+        _load_error = str(exc)
+        logger.error("❌ Failed to load DistilBERT model: %s", exc)
+def initialize(model_name: str) -> None:
+    """Called at app startup to pre-warm the model."""
+    _load_pipeline(model_name)
+class TextEmotionEngine:
+    """
+    Wraps the HuggingFace DistilBERT pipeline for async use in FastAPI.
+    """
+    def _classify_sync(self, text: str) -> List[EmotionLabel]:
+        if _pipeline is None:
+            return []
+        try:
+            results = _pipeline(text[:512])
+            if not results:
+                return []
+            # pipeline returns list-of-list when top_k=None
+            raw = results[0] if isinstance(results[0], list) else results
+            labels = [
+                EmotionLabel(label=item["label"].lower(), score=round(item["score"], 4))
+                for item in raw
+            ]
+            # Sort descending by score
+            return sorted(labels, key=lambda x: x.score, reverse=True)
+        except Exception as exc:
+            logger.error("DistilBERT inference error: %s", exc)
+            return []
+    async def classify(self, text: str) -> List[EmotionLabel]:
+        """
+        Async wrapper — runs CPU-bound inference in a thread pool.
+        Returns list of EmotionLabel sorted by confidence desc.
+        """
+        return await asyncio.to_thread(self._classify_sync, text)
+    async def top_emotion(self, text: str) -> str:
+        """Returns the single dominant emotion label."""
+        labels = await self.classify(text)
+        return labels[0].label if labels else "neutral"
+    def summary_string(self, labels: List[EmotionLabel], top_k: int = 3) -> str:
+        """
+        Formats top-k labels as a string for LLM prompt injection.
+        Example: "sadness(0.87), fear(0.08), anger(0.03)"
+        """
+        return ", ".join(
+            f"{lbl.label}({lbl.score:.2f})" for lbl in labels[:top_k]
+        )
+    @property
+    def is_loaded(self) -> bool:
+        return _pipeline is not None
+    @property
+    def load_error(self) -> Optional[str]:
+        return _load_error
+# Singleton
+text_emotion_engine = TextEmotionEngine()

download_models.py CHANGED Viewed

@@ -1,25 +1,58 @@
 import os
-import gdown # We will install this library
-# 👇 PASTE YOUR GOOGLE DRIVE IDs HERE
-MODEL_ID = "10GWSogJNKlPlTeWtJkDq_zc4roB1Vmnu"
-CSV_ID   = "1bJ8C1BY0rvPNKuWcBgqiUtiSzHziZokH"
-# Define where they should go
-model_path = "app/ml_assets/emotion_model_trained.h5"
-csv_path   = "app/ml_assets/MEDICATION.csv"
-def download_file(file_id, output_path):
     if not os.path.exists(output_path):
         os.makedirs(os.path.dirname(output_path), exist_ok=True)
         url = f'https://drive.google.com/uc?id={file_id}'
-        print(f"⬇️ Downloading {output_path}...")
         gdown.download(url, output_path, quiet=False)
     else:
-        print(f"✅ Found {output_path}, skipping download.")
 if __name__ == "__main__":
-    print("🚀 Starting Model Download...")
-    download_file(MODEL_ID, model_path)
-    download_file(CSV_ID, csv_path)
-    print("✅ All models ready!")

 import os
+import gdown
+from huggingface_hub import hf_hub_download
+# --- Assets ---
+MODEL_ID = "10GWSogJNKlPlTeWtJkDq_zc4roB1Vmnu" # Keras Face Emotion
+CSV_ID   = "1bJ8C1BY0rvPNKuWcBgqiUtiSzHziZokH" # Medication CSV
+# Llama-3-8B-Instruct GGUF (Quantized for CPU/RAM efficiency)
+LLAMA_REPO = "MaziyarPanahi/Llama-3-8B-Instruct-v0.1-GGUF"
+LLAMA_FILE = "Llama-3-8B-Instruct-v0.1.Q4_K_M.gguf"
+# Destinations
+ML_ASSETS = "app/ml_assets"
+FACE_MODEL_PATH = os.path.join(ML_ASSETS, "emotion_model_trained.h5")
+MEDS_CSV_PATH = os.path.join(ML_ASSETS, "MEDICATION.csv")
+LLAMA_GGUF_PATH = os.path.join(ML_ASSETS, "llama-3-8b-instruct.Q4_K_M.gguf")
+def download_drive_file(file_id, output_path):
     if not os.path.exists(output_path):
         os.makedirs(os.path.dirname(output_path), exist_ok=True)
         url = f'https://drive.google.com/uc?id={file_id}'
+        print(f"⬇️ Downloading Drive file to {output_path}...")
         gdown.download(url, output_path, quiet=False)
     else:
+        print(f"✅ Found {output_path}, skipping.")
+def download_hf_model(repo_id, filename, output_path):
+    if not os.path.exists(output_path):
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        print(f"⬇️ Downloading HF model: {filename} from {repo_id}...")
+        hf_hub_download(
+            repo_id=repo_id,
+            filename=filename,
+            local_dir=os.path.dirname(output_path),
+            local_dir_use_symlinks=False
+        )
+        # Rename to match our config expectation
+        downloaded_path = os.path.join(os.path.dirname(output_path), filename)
+        if downloaded_path != output_path:
+            os.rename(downloaded_path, output_path)
+    else:
+        print(f"✅ Found {output_path}, skipping.")
 if __name__ == "__main__":
+    print("🚀 Starting Production Model Sync...")
+    # 1. Drive Files
+    download_drive_file(MODEL_ID, FACE_MODEL_PATH)
+    download_drive_file(CSV_ID, MEDS_CSV_PATH)
+    # 2. HF Models (Llama 3)
+    try:
+        download_hf_model(LLAMA_REPO, LLAMA_FILE, LLAMA_GGUF_PATH)
+    except Exception as e:
+        print(f"⚠️ HF Download failed (expected on local dev if no internet): {e}")
+    print("✅ All models synchronized!")

requirements.txt CHANGED Viewed

@@ -1,17 +1,33 @@
-# --- Core Backend ---
-flask
-flask-cors
-python-dotenv
-google-generativeai
-gdown
-# --- AI & Vision (Version Locked for Stability) ---
 numpy<2.0
 opencv-python
 tensorflow
-pandas
 tensorflow-cpu
 pillow
 # --- Utilities ---
-requests

+# --- Core Backend (FastAPI) ---
+fastapi>=0.111.0
+uvicorn[standard]>=0.30.0
+python-dotenv>=1.0.0
+pydantic>=2.0.0
+pydantic-settings>=2.0.0
+# --- HTTP + Async ---
+httpx>=0.27.0
+anyio>=4.0.0
+# --- Rate Limiting ---
+slowapi>=0.1.9
+# --- AI & Vision (Preserved - Version Locked for Stability) ---
 numpy<2.0
 opencv-python
 tensorflow
 tensorflow-cpu
+pandas
 pillow
+gdown
+# --- NLP (New) ---
+transformers>=4.40.0
+torch>=2.0.0
+sentencepiece==0.1.99
+llama-cpp-python>=0.2.77
+huggingface-hub>=0.23.0
 # --- Utilities ---
+requests
+python-multipart