Spaces:

Param20h
/

PDF-Assit_RAG

Running

App Files Files Community

kishalll commited on 5 days ago

Commit

d22e308

2 Parent(s): 0418a93 b1a5e35

Resolve merge conflict in dashboard/page.tsx wrt document rename and updated auth guard

Browse files

Files changed (33) hide show

.env.example +18 -1
.gitignore +2 -1
README.md +15 -1
backend/app/auth.py +17 -5
backend/app/celery_app.py +23 -0
backend/app/config.py +9 -0
backend/app/evaluation/__init__.py +2 -0
backend/app/evaluation/ragas_pipeline.py +292 -0
backend/app/routes/auth.py +206 -1
backend/app/routes/documents.py +16 -151
backend/app/schemas.py +1 -0
backend/app/services/document_ingestion.py +27 -3
backend/app/tasks.py +22 -0
backend/evaluation/ragas_sample_questions.jsonl +50 -0
backend/requirements.txt +2 -0
backend/scripts/run_ragas_eval.py +59 -0
backend/tests/test_auth.py +77 -0
backend/tests/test_document_upload_validation.py +7 -3
backend/tests/test_documents.py +5 -5
backend/tests/test_ragas_pipeline.py +76 -0
docker-compose.yml +46 -2
docs/ARCHITECTURE.md +8 -6
frontend/e2e/auth-and-chat.spec.ts +8 -2
frontend/package-lock.json +1 -0
frontend/src/app/dashboard/page.tsx +12 -9
frontend/src/app/login/page.tsx +24 -4
frontend/src/app/register/page.tsx +12 -3
frontend/src/components/auth/HuggingFaceSignInButton.tsx +58 -0
frontend/src/components/layout/Header.tsx +9 -1
frontend/src/lib/api.ts +6 -2
frontend/src/store/auth-store.ts +17 -9
grafana_dashboard.json +1025 -0
package-lock.json +0 -6

.env.example CHANGED Viewed

@@ -55,6 +55,16 @@ ALLOWED_ORIGINS=http://localhost:3000,http://localhost:7860
 # Optional — required only for Google sign-in.
 # NEXT_PUBLIC_GOOGLE_CLIENT_ID=your_google_oauth_client_id.apps.googleusercontent.com
 # ── File Upload ─────────────────────────────────────────────
 # Directory where uploaded documents (PDFs, DOCXs, etc.) are stored.
@@ -69,13 +79,20 @@ ALLOWED_ORIGINS=http://localhost:3000,http://localhost:7860
 # Optional — defaults to "pdf,docx,txt,md"
 # ALLOWED_EXTENSIONS=pdf,docx,txt,md
-# ── HuggingFace (Required for LLM inference) ────────────────
 # HuggingFace API token. Used to call the Inference API for LLM responses.
 # Get yours: https://huggingface.co/settings/tokens (free tier available)
 # Required (app won't generate answers without it)
 HF_TOKEN=your_huggingface_token_here
 # ── LLM Configuration ───────────────────────────────────────
 # HuggingFace model ID used for answer generation.

 # Optional — required only for Google sign-in.
 # NEXT_PUBLIC_GOOGLE_CLIENT_ID=your_google_oauth_client_id.apps.googleusercontent.com
+# ── Celery / Redis Background Processing ───────────────────
+# Redis URL used by FastAPI to enqueue PDF processing jobs.
+# Optional — defaults to redis://localhost:6379/0
+# CELERY_BROKER_URL=redis://localhost:6379/0
+# Redis URL used by Celery to store task results/status.
+# Optional — defaults to redis://localhost:6379/1
+# CELERY_RESULT_BACKEND=redis://localhost:6379/1
 # ── File Upload ─────────────────────────────────────────────
 # Directory where uploaded documents (PDFs, DOCXs, etc.) are stored.
 # Optional — defaults to "pdf,docx,txt,md"
 # ALLOWED_EXTENSIONS=pdf,docx,txt,md
+# ── HuggingFace (Required for LLM inference and OAuth) ───────
 # HuggingFace API token. Used to call the Inference API for LLM responses.
 # Get yours: https://huggingface.co/settings/tokens (free tier available)
 # Required (app won't generate answers without it)
 HF_TOKEN=your_huggingface_token_here
+# HuggingFace OAuth variables for native login support
+# Optional — required only for Hugging Face sign-in
+HF_CLIENT_ID=your_hf_oauth_client_id
+HF_CLIENT_SECRET=your_hf_oauth_client_secret
+HF_REDIRECT_URI=http://localhost:8000/api/v1/auth/callback/huggingface
+FRONTEND_URL=http://localhost:3000
 # ── LLM Configuration ───────────────────────────────────────
 # HuggingFace model ID used for answer generation.

.gitignore CHANGED Viewed

@@ -8,6 +8,7 @@ __pycache__/
 # Data (runtime generated)
 data/
 *.db
 # Environment
 .env
@@ -29,4 +30,4 @@ Thumbs.db
 # Misc
 *.log
 static/
-.planning/

 # Data (runtime generated)
 data/
 *.db
+backend/evaluation/ragas_results.json
 # Environment
 .env
 # Misc
 *.log
 static/
+.planning/

README.md CHANGED Viewed

@@ -362,6 +362,8 @@ DATABASE_URL=sqlite:///./data/app.db
 HF_TOKEN=hf_your_huggingface_token_here
 UPLOAD_DIR=./data/uploads
 CHROMA_PERSIST_DIR=./data/chroma_db
 ```
 > Get your free HuggingFace token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
@@ -410,7 +412,7 @@ npm run dev
 ```bash
 docker compose up --build
-# → Full stack at http://localhost:7860
 ```
 <br/>
@@ -491,6 +493,10 @@ docker compose up --build
 |---|---|---|---|---|
 | `SECRET_KEY` | ✅ | — | JWT signing & session secret. Use a strong random string. | Generate: `python -c "import secrets; print(secrets.token_urlsafe(32))"` |
 | `HF_TOKEN` | ✅ | — | HuggingFace API token for LLM inference via Inference API. | [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) (free) |
 | `ENVIRONMENT` | ❌ | `development` | Runtime mode. Set to `production` for deployment to lock CORS. | — |
 | `DEBUG` | ❌ | `False` | Enable debug mode with detailed error pages. Never enable in production. | — |
 | `ALLOWED_ORIGINS` | ❌ | `http://localhost:3000,http://localhost:7860` | Comma-separated CORS origins (only enforced in production). | Your deployed domain(s) |
@@ -499,6 +505,8 @@ docker compose up --build
 | `JWT_EXPIRY_HOURS` | ❌ | `72` | JWT token lifetime in hours before re-login is required. | — |
 | `GOOGLE_CLIENT_ID` | ❌ | — | Google OAuth web client ID used by FastAPI to verify ID tokens. | [Google Cloud Console](https://console.cloud.google.com/apis/credentials) |
 | `NEXT_PUBLIC_GOOGLE_CLIENT_ID` | ❌ | — | Google OAuth web client ID exposed to the Next.js Google sign-in button. | [Google Cloud Console](https://console.cloud.google.com/apis/credentials) |
 | `UPLOAD_DIR` | ❌ | `./data/uploads` | Local directory for storing uploaded documents. | — |
 | `MAX_FILE_SIZE_MB` | ❌ | `50` | Maximum allowed upload file size in MB. | — |
 | `ALLOWED_EXTENSIONS` | ❌ | `pdf,docx,txt,md` | Comma-separated list of permitted file extensions. | — |
@@ -524,6 +532,12 @@ docker compose up --build
 |---------|-------------|
 | `uvicorn app.main:app --reload` | Start FastAPI with hot reload |
 | `uvicorn app.main:app --port 8000` | Start FastAPI on port 8000 |
 ### Frontend (`frontend/`)

 HF_TOKEN=hf_your_huggingface_token_here
 UPLOAD_DIR=./data/uploads
 CHROMA_PERSIST_DIR=./data/chroma_db
+CELERY_BROKER_URL=redis://localhost:6379/0
+CELERY_RESULT_BACKEND=redis://localhost:6379/1
 ```
 > Get your free HuggingFace token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
 ```bash
 docker compose up --build
+# → FastAPI, Redis, Celery worker, and Postgres at http://localhost:7860
 ```
 <br/>
 |---|---|---|---|---|
 | `SECRET_KEY` | ✅ | — | JWT signing & session secret. Use a strong random string. | Generate: `python -c "import secrets; print(secrets.token_urlsafe(32))"` |
 | `HF_TOKEN` | ✅ | — | HuggingFace API token for LLM inference via Inference API. | [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) (free) |
+| `HF_CLIENT_ID` | ❌ | — | HuggingFace OAuth client ID. Required only for Hugging Face sign-in. | [HuggingFace Developer Settings](https://huggingface.co/settings/connected-applications) |
+| `HF_CLIENT_SECRET` | ❌ | — | HuggingFace OAuth client secret. Required only for Hugging Face sign-in. | [HuggingFace Developer Settings](https://huggingface.co/settings/connected-applications) |
+| `HF_REDIRECT_URI` | ❌ | `http://localhost:8000/api/v1/auth/callback/huggingface` | HuggingFace OAuth callback redirect URI. | — |
+| `FRONTEND_URL` | ❌ | `http://localhost:3000` | Frontend URL to redirect to after OAuth callback finishes. | — |
 | `ENVIRONMENT` | ❌ | `development` | Runtime mode. Set to `production` for deployment to lock CORS. | — |
 | `DEBUG` | ❌ | `False` | Enable debug mode with detailed error pages. Never enable in production. | — |
 | `ALLOWED_ORIGINS` | ❌ | `http://localhost:3000,http://localhost:7860` | Comma-separated CORS origins (only enforced in production). | Your deployed domain(s) |
 | `JWT_EXPIRY_HOURS` | ❌ | `72` | JWT token lifetime in hours before re-login is required. | — |
 | `GOOGLE_CLIENT_ID` | ❌ | — | Google OAuth web client ID used by FastAPI to verify ID tokens. | [Google Cloud Console](https://console.cloud.google.com/apis/credentials) |
 | `NEXT_PUBLIC_GOOGLE_CLIENT_ID` | ❌ | — | Google OAuth web client ID exposed to the Next.js Google sign-in button. | [Google Cloud Console](https://console.cloud.google.com/apis/credentials) |
+| `CELERY_BROKER_URL` | ❌ | `redis://localhost:6379/0` | Redis broker URL used by FastAPI to queue document ingestion jobs. | Redis |
+| `CELERY_RESULT_BACKEND` | ❌ | `redis://localhost:6379/1` | Redis backend URL used by Celery to store task state/results. | Redis |
 | `UPLOAD_DIR` | ❌ | `./data/uploads` | Local directory for storing uploaded documents. | — |
 | `MAX_FILE_SIZE_MB` | ❌ | `50` | Maximum allowed upload file size in MB. | — |
 | `ALLOWED_EXTENSIONS` | ❌ | `pdf,docx,txt,md` | Comma-separated list of permitted file extensions. | — |
 |---------|-------------|
 | `uvicorn app.main:app --reload` | Start FastAPI with hot reload |
 | `uvicorn app.main:app --port 8000` | Start FastAPI on port 8000 |
+| `python scripts/run_ragas_eval.py --user-id <user-id>` | Run the 50-question RAGAS comparison for vector search vs GraphRAG |
+The RAGAS script reads `backend/evaluation/ragas_sample_questions.jsonl`,
+generates answers from standard vector contexts and vector-plus-GraphRAG
+contexts, then writes aggregate scores to `backend/evaluation/ragas_results.json`.
+Pass `--document-id <document-id>` to evaluate one indexed document.
 ### Frontend (`frontend/`)

backend/app/auth.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Optional, Any
 import jwt
 import bcrypt
-from fastapi import Depends, HTTPException, status
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from sqlalchemy.orm import Session
@@ -15,7 +15,7 @@ from app.database import get_db
 from app.models import User, UserRole
 settings = get_settings()
-security = HTTPBearer()
 # ── Password Hashing ─────────────────────────────────
@@ -96,11 +96,23 @@ def decode_invite_token(token: str) -> Optional[dict[str, Any]]:
 import hashlib
 def get_current_user(
-    credentials: HTTPAuthorizationCredentials = Depends(security),
     db: Session = Depends(get_db),
 ) -> User:
-    """Dependency: extract and validate user from JWT bearer token or API key."""
-    token = credentials.credentials
     # Check if token is an API key
     if token.startswith("pdf_rag_"):

 import jwt
 import bcrypt
+from fastapi import Depends, HTTPException, status, Cookie
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from sqlalchemy.orm import Session
 from app.models import User, UserRole
 settings = get_settings()
+security = HTTPBearer(auto_error=False)
 # ── Password Hashing ─────────────────────────────────
 import hashlib
 def get_current_user(
+    credentials: Optional[HTTPAuthorizationCredentials] = Depends(security),
+    access_token: Optional[str] = Cookie(None),
     db: Session = Depends(get_db),
 ) -> User:
+    """Dependency: extract and validate user from JWT bearer token, API key, or secure cookie."""
+    token = None
+    if credentials:
+        token = credentials.credentials
+    elif access_token:
+        token = access_token
+    if not token:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid or expired token",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
     # Check if token is an API key
     if token.startswith("pdf_rag_"):

backend/app/celery_app.py ADDED Viewed

	@@ -0,0 +1,23 @@

+"""Celery application configured for Redis-backed background jobs."""
+from celery import Celery
+from app.config import get_settings
+settings = get_settings()
+celery_app = Celery(
+    "pdf_assistant_rag",
+    broker=settings.CELERY_BROKER_URL,
+    backend=settings.CELERY_RESULT_BACKEND,
+    include=["app.tasks"],
+)
+celery_app.conf.update(
+    task_track_started=settings.CELERY_TASK_TRACK_STARTED,
+    task_serializer="json",
+    result_serializer="json",
+    accept_content=["json"],
+    timezone="UTC",
+)

backend/app/config.py CHANGED Viewed

@@ -23,12 +23,21 @@ class Settings(BaseSettings):
     JWT_ACCESS_EXPIRY_MINUTES: int = 15
     JWT_REFRESH_EXPIRY_DAYS: int = 7
     GOOGLE_CLIENT_ID: str = ""
     # Google Drive background sync
     DRIVE_SYNC_ENABLED: bool = False
     DRIVE_SYNC_INTERVAL_MINUTES: int = 60
     GOOGLE_SERVICE_ACCOUNT_FILE: str = ""
     # ── File Upload ──────────────────────────────────────
     UPLOAD_DIR: str = "./data/uploads"
     MAX_UPLOAD_SIZE_MB: int = 20

     JWT_ACCESS_EXPIRY_MINUTES: int = 15
     JWT_REFRESH_EXPIRY_DAYS: int = 7
     GOOGLE_CLIENT_ID: str = ""
+    HF_CLIENT_ID: str = ""
+    HF_CLIENT_SECRET: str = ""
+    HF_REDIRECT_URI: str = ""
+    FRONTEND_URL: str = "http://localhost:3000"
     # Google Drive background sync
     DRIVE_SYNC_ENABLED: bool = False
     DRIVE_SYNC_INTERVAL_MINUTES: int = 60
     GOOGLE_SERVICE_ACCOUNT_FILE: str = ""
+    # Celery / Redis background processing
+    CELERY_BROKER_URL: str = "redis://localhost:6379/0"
+    CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
+    CELERY_TASK_TRACK_STARTED: bool = True
     # ── File Upload ──────────────────────────────────────
     UPLOAD_DIR: str = "./data/uploads"
     MAX_UPLOAD_SIZE_MB: int = 20

backend/app/evaluation/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Evaluation helpers for offline RAG quality checks."""
2	+

backend/app/evaluation/ragas_pipeline.py ADDED Viewed

	@@ -0,0 +1,292 @@

+"""RAGAS evaluation pipeline for vector search versus GraphRAG."""
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from statistics import mean
+from typing import Any, Callable, Iterable, Optional
+from huggingface_hub import InferenceClient
+from app.config import get_settings
+from app.rag.embeddings import embed_query
+from app.rag.graph_retriever import get_entity_context
+from app.rag.vectorstore import query_chunks
+settings = get_settings()
+AnswerGenerator = Callable[[str, list[str]], str]
+@dataclass(frozen=True)
+class EvaluationQuestion:
+    id: str
+    question: str
+    reference: str
+@dataclass(frozen=True)
+class EvaluationRecord:
+    id: str
+    mode: str
+    question: str
+    reference: str
+    response: str
+    contexts: list[str]
+def load_questions(dataset_path: Path, limit: int = 50) -> list[EvaluationQuestion]:
+    """Load a JSONL RAGAS dataset and validate the required fields."""
+    questions: list[EvaluationQuestion] = []
+    with dataset_path.open("r", encoding="utf-8") as handle:
+        for line_number, line in enumerate(handle, start=1):
+            stripped = line.strip()
+            if not stripped:
+                continue
+            try:
+                row = json.loads(stripped)
+            except json.JSONDecodeError as exc:
+                raise ValueError(f"Invalid JSON on line {line_number}: {exc}") from exc
+            missing = {"id", "question", "reference"} - set(row)
+            if missing:
+                fields = ", ".join(sorted(missing))
+                raise ValueError(f"Line {line_number} is missing required field(s): {fields}")
+            questions.append(
+                EvaluationQuestion(
+                    id=str(row["id"]),
+                    question=str(row["question"]).strip(),
+                    reference=str(row["reference"]).strip(),
+                )
+            )
+            if len(questions) >= limit:
+                break
+    if len(questions) < limit:
+        raise ValueError(f"Expected {limit} evaluation questions, found {len(questions)}")
+    return questions
+def retrieve_vector_contexts(
+    question: str,
+    user_id: str,
+    document_id: Optional[str] = None,
+    top_k: Optional[int] = None,
+) -> list[str]:
+    """Retrieve plain vector-search contexts for a question."""
+    query_embedding = embed_query(question)
+    chunks = query_chunks(
+        query_embedding=query_embedding,
+        user_id=user_id,
+        document_id=document_id,
+        top_k=top_k or settings.TOP_K_RETRIEVAL,
+    )
+    return _chunk_texts(chunks)
+def retrieve_graphrag_contexts(
+    question: str,
+    user_id: str,
+    document_id: Optional[str] = None,
+    top_k: Optional[int] = None,
+) -> list[str]:
+    """Retrieve vector contexts and append GraphRAG relationship context."""
+    contexts = retrieve_vector_contexts(
+        question=question,
+        user_id=user_id,
+        document_id=document_id,
+        top_k=top_k,
+    )
+    graph_context = get_entity_context(
+        query=question,
+        user_id=user_id,
+        document_id=document_id,
+    )
+    return append_graph_context(contexts, graph_context)
+def append_graph_context(contexts: list[str], graph_context: str) -> list[str]:
+    """Return contexts plus graph context when GraphRAG found relationships."""
+    clean_graph_context = graph_context.strip()
+    if not clean_graph_context:
+        return contexts
+    return [*contexts, clean_graph_context]
+def generate_grounded_answer(question: str, contexts: list[str]) -> str:
+    """Generate an answer using only retrieved contexts."""
+    if not contexts:
+        return "I do not have enough retrieved context to answer this question."
+    client = InferenceClient(token=settings.HF_TOKEN)
+    context_block = "\n\n".join(
+        f"Context {index}:\n{context}" for index, context in enumerate(contexts, start=1)
+    )
+    prompt = (
+        "Answer the question using only the provided context. "
+        "If the context is insufficient, say that the answer is not available in the context.\n\n"
+        f"{context_block}\n\nQuestion: {question}"
+    )
+    response = client.chat_completion(
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a careful RAG evaluator that only uses supplied evidence.",
+            },
+            {"role": "user", "content": prompt},
+        ],
+        model=settings.LLM_MODEL,
+        max_tokens=min(settings.LLM_MAX_NEW_TOKENS, 512),
+        temperature=0.0,
+    )
+    if not response.choices:
+        return ""
+    return (response.choices[0].message.content or "").strip()
+def collect_records(
+    questions: Iterable[EvaluationQuestion],
+    user_id: str,
+    document_id: Optional[str] = None,
+    answer_generator: AnswerGenerator = generate_grounded_answer,
+) -> dict[str, list[EvaluationRecord]]:
+    """Build vector and GraphRAG samples ready for RAGAS."""
+    grouped: dict[str, list[EvaluationRecord]] = {"vector": [], "graphrag": []}
+    for item in questions:
+        vector_contexts = retrieve_vector_contexts(
+            question=item.question,
+            user_id=user_id,
+            document_id=document_id,
+        )
+        graphrag_contexts = retrieve_graphrag_contexts(
+            question=item.question,
+            user_id=user_id,
+            document_id=document_id,
+        )
+        grouped["vector"].append(
+            EvaluationRecord(
+                id=item.id,
+                mode="vector",
+                question=item.question,
+                reference=item.reference,
+                response=answer_generator(item.question, vector_contexts),
+                contexts=vector_contexts,
+            )
+        )
+        grouped["graphrag"].append(
+            EvaluationRecord(
+                id=item.id,
+                mode="graphrag",
+                question=item.question,
+                reference=item.reference,
+                response=answer_generator(item.question, graphrag_contexts),
+                contexts=graphrag_contexts,
+            )
+        )
+    return grouped
+def evaluate_records(records: list[EvaluationRecord]) -> dict[str, float]:
+    """Run RAGAS over collected records and return mean metric scores."""
+    from langchain_huggingface import HuggingFaceEndpoint
+    from ragas import EvaluationDataset, evaluate
+    from ragas.llms import LangchainLLMWrapper
+    from ragas.metrics import Faithfulness, FactualCorrectness, LLMContextRecall
+    dataset = EvaluationDataset.from_list(
+        [
+            {
+                "user_input": record.question,
+                "retrieved_contexts": record.contexts,
+                "response": record.response,
+                "reference": record.reference,
+            }
+            for record in records
+        ]
+    )
+    evaluator_llm = LangchainLLMWrapper(
+        HuggingFaceEndpoint(
+            repo_id=settings.LLM_MODEL,
+            huggingfacehub_api_token=settings.HF_TOKEN,
+            max_new_tokens=512,
+            temperature=0.0,
+            timeout=300,
+        )
+    )
+    result = evaluate(
+        dataset=dataset,
+        metrics=[
+            Faithfulness(),
+            FactualCorrectness(),
+            LLMContextRecall(),
+        ],
+        llm=evaluator_llm,
+    )
+    return summarize_ragas_result(result)
+def compare_pipelines(grouped_records: dict[str, list[EvaluationRecord]]) -> dict[str, Any]:
+    """Evaluate both retrieval modes and include metric deltas."""
+    vector_scores = evaluate_records(grouped_records["vector"])
+    graphrag_scores = evaluate_records(grouped_records["graphrag"])
+    metrics = sorted(set(vector_scores) | set(graphrag_scores))
+    return {
+        "vector": vector_scores,
+        "graphrag": graphrag_scores,
+        "delta": {
+            metric: round(graphrag_scores.get(metric, 0.0) - vector_scores.get(metric, 0.0), 4)
+            for metric in metrics
+        },
+    }
+def summarize_ragas_result(result: Any) -> dict[str, float]:
+    """Normalize RAGAS result objects into mean metric scores."""
+    if hasattr(result, "to_pandas"):
+        dataframe = result.to_pandas()
+        scores: dict[str, float] = {}
+        for column in dataframe.columns:
+            values = [
+                float(value)
+                for value in dataframe[column].tolist()
+                if isinstance(value, (int, float)) and value == value
+            ]
+            if values:
+                scores[str(column)] = round(mean(values), 4)
+        return scores
+    if isinstance(result, dict):
+        return {
+            str(key): round(float(value), 4)
+            for key, value in result.items()
+            if isinstance(value, (int, float))
+        }
+    scores = getattr(result, "scores", None)
+    if isinstance(scores, list):
+        by_metric: dict[str, list[float]] = {}
+        for row in scores:
+            if not isinstance(row, dict):
+                continue
+            for key, value in row.items():
+                if isinstance(value, (int, float)):
+                    by_metric.setdefault(str(key), []).append(float(value))
+        return {key: round(mean(values), 4) for key, values in by_metric.items()}
+    raise TypeError(f"Unsupported RAGAS result type: {type(result)!r}")
+def _chunk_texts(chunks: list[dict[str, Any]]) -> list[str]:
+    return [str(chunk["text"]) for chunk in chunks if chunk.get("text")]

backend/app/routes/auth.py CHANGED Viewed

@@ -3,8 +3,11 @@ Auth API routes — register, login, and user profile.
 """
 import re
 import secrets
 from datetime import datetime, timezone
-from fastapi import APIRouter, Body, Depends, HTTPException, status
 from langsmith import expect
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.orm import Session
@@ -479,3 +482,205 @@ def get_auth_config():
     return {
         "google_client_id": settings.GOOGLE_CLIENT_ID
     }

 """
 import re
 import secrets
+from typing import Optional
 from datetime import datetime, timezone
+from fastapi import APIRouter, Depends, HTTPException, status, Cookie, Response, Body
+from fastapi.responses import RedirectResponse
+import httpx
 from langsmith import expect
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.orm import Session
     return {
         "google_client_id": settings.GOOGLE_CLIENT_ID
     }
+def _unique_google_username(email: str, db: Session) -> str:
+    """
+    Generate a unique username based on the email.
+    """
+    base = email.split("@")[0]
+    base = re.sub(r"[^a-zA-Z0-9_-]", "", base)
+    base = base[:70]
+    candidate = base
+    suffix = 1
+    while db.query(User).filter(User.username == candidate).first():
+        suffix += 1
+        suffix_text = f"-{suffix}"
+        candidate = f"{base[:80 - len(suffix_text)]}{suffix_text}"
+    return candidate
+@router.get("/login/huggingface")
+def huggingface_login(response: Response):
+    """
+    Generates a secure state, stores it in an HttpOnly cookie,
+    and returns the Hugging Face OAuth authorization URL.
+    """
+    if not settings.HF_CLIENT_ID or not settings.HF_REDIRECT_URI:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Hugging Face OAuth is not configured",
+        )
+    # Generate CSRF state
+    state = secrets.token_urlsafe(32)
+    # Store state in cookie (valid for 10 minutes)
+    response.set_cookie(
+        key="oauth_state",
+        value=state,
+        httponly=True,
+        secure=settings.ENVIRONMENT == "production",
+        samesite="lax",
+        max_age=600,  # 10 minutes
+    )
+    # Build Hugging Face authorize URL
+    scope = "openid profile email"
+    auth_url = (
+        f"https://huggingface.co/oauth/authorize?"
+        f"client_id={settings.HF_CLIENT_ID}&"
+        f"redirect_uri={settings.HF_REDIRECT_URI}&"
+        f"scope={scope}&"
+        f"state={state}&"
+        f"response_type=code"
+    )
+    return {"url": auth_url}
+@router.get("/callback/huggingface")
+async def huggingface_callback(
+    code: str,
+    state: str,
+    response: Response,
+    oauth_state: Optional[str] = Cookie(None),
+    db: Session = Depends(get_db),
+):
+    """
+    Verifies state, exchanges code for access token,
+    gets user info, upserts user, sets HttpOnly JWT cookies,
+    and redirects to the frontend dashboard.
+    """
+    # 1. Verify CSRF State
+    if not oauth_state or state != oauth_state:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="State verification failed. Possible CSRF attack.",
+        )
+    # 2. Exchange code for access_token via Hugging Face API
+    token_url = "https://huggingface.co/oauth/token"
+    headers = {"Content-Type": "application/x-www-form-urlencoded"}
+    data = {
+        "grant_type": "authorization_code",
+        "code": code,
+        "redirect_uri": settings.HF_REDIRECT_URI,
+        "client_id": settings.HF_CLIENT_ID,
+        "client_secret": settings.HF_CLIENT_SECRET,
+    }
+    async with httpx.AsyncClient() as client:
+        try:
+            token_response = await client.post(token_url, headers=headers, data=data)
+            token_response.raise_for_status()
+            token_data = token_response.json()
+        except httpx.HTTPStatusError as e:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail=f"Failed to exchange code: {e.response.text}",
+            )
+        except Exception as e:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail=f"Token exchange error: {str(e)}",
+            )
+    hf_access_token = token_data.get("access_token")
+    if not hf_access_token:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="No access token returned from Hugging Face",
+        )
+    # 3. Fetch user profile data via /oauth/userinfo
+    userinfo_url = "https://huggingface.co/oauth/userinfo"
+    userinfo_headers = {"Authorization": f"Bearer {hf_access_token}"}
+    async with httpx.AsyncClient() as client:
+        try:
+            userinfo_response = await client.get(userinfo_url, headers=userinfo_headers)
+            userinfo_response.raise_for_status()
+            user_data = userinfo_response.json()
+        except Exception as e:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail=f"Failed to retrieve Hugging Face user info: {str(e)}",
+            )
+    email = user_data.get("email")
+    username = user_data.get("preferred_username") or user_data.get("username") or user_data.get("name")
+    if not email:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Hugging Face account email is required but not provided",
+        )
+    email = email.lower()
+    if not username:
+        username = email.split("@")[0]
+    # 4. Upsert user in the DB
+    user = db.query(User).filter(User.email == email).first()
+    if not user:
+        # Check if username is already taken
+        username = _unique_google_username(email, db)
+        user = User(
+            username=username,
+            email=email,
+            hashed_password=hash_password(secrets.token_urlsafe(32)),
+        )
+        db.add(user)
+        db.commit()
+        db.refresh(user)
+    user.last_login = datetime.now(timezone.utc)
+    db.commit()
+    db.refresh(user)
+    # 5. Generate secure session JWT tokens for our app
+    access_token = create_access_token(user.id)
+    refresh_token = create_refresh_token(user.id)
+    # 6. Set tokens as HttpOnly cookies and Redirect
+    redirect_dest = f"{settings.FRONTEND_URL}/dashboard" if settings.ENVIRONMENT == "development" else "/dashboard"
+    response = RedirectResponse(
+        url=redirect_dest,
+        status_code=status.HTTP_307_TEMPORARY_REDIRECT,
+    )
+    response.set_cookie(
+        key="access_token",
+        value=access_token,
+        httponly=True,
+        secure=settings.ENVIRONMENT == "production",
+        samesite="lax",
+        max_age=settings.JWT_ACCESS_EXPIRY_MINUTES * 60,
+    )
+    response.set_cookie(
+        key="refresh_token",
+        value=refresh_token,
+        httponly=True,
+        secure=settings.ENVIRONMENT == "production",
+        samesite="lax",
+        max_age=settings.JWT_REFRESH_EXPIRY_DAYS * 24 * 60 * 60,
+    )
+    # Delete the oauth_state cookie
+    response.delete_cookie(key="oauth_state")
+    return response
+@router.post("/logout")
+def logout(response: Response):
+    """
+    Logs out the user by clearing the secure session cookies.
+    """
+    response.delete_cookie(key="access_token")
+    response.delete_cookie(key="refresh_token")
+    return {"message": "Successfully logged out"}

backend/app/routes/documents.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
 Document management routes — upload, list, delete, and serve PDF files.
-Background ingestion via FastAPI BackgroundTasks.
 """
 import os
 import sys
@@ -14,7 +14,7 @@ from pathlib import Path
 import shutil
 import tempfile
 from urllib.parse import urlparse
-from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, BackgroundTasks, status, Query
 from fastapi.responses import FileResponse
 from sqlalchemy.orm import Session
@@ -30,8 +30,7 @@ from app.schemas import (
 )
 from app.auth import get_current_user
 from app.config import get_settings
-from app.rag.chunker import chunk_document, get_page_count
-from app.rag.vectorstore import store_chunks
 try:
     from crawl4ai import AsyncWebCrawler
@@ -137,133 +136,6 @@ async def validate_upload(file: UploadFile):
         pass
-def _ingest_document(document_id: str, filepath: str, original_name: str, user_id: str):
-    """
-    Process a document in the background: chunk document, generate embeddings, and store in ChromaDB,
-    calls document summary function, and update the database record.
-    This function is intended to be run as a background task.
-    It creates its own database session, updates the
-    document status, extracts text, splits into chunks, generates embeddings,
-    stores everything in ChromaDB, calls summary function, updates the document record with page count,
-    chunk count, and summary, and marks the document as 'ready'.
-    On failure, it sets status to 'failed' and records the error message.
-    Args:
-        document_id: Unique identifier of the document in the database.
-        filepath: Absolute or relative path to the uploaded file on disk.
-        original_name: original filename provided by the user (for logging and metadata).
-        user_id: Identifier of the user who owns the document.
-    Returns:
-        None
-    Note:
-        This function does not raise exceptions to the caller;
-        all errors are logged and the database record is updated accordingly.
-    """
-    from app.database import SessionLocal
-    db = SessionLocal()
-    try:
-        doc = (
-            db.query(Document)
-            .filter(Document.id == document_id, Document.is_deleted.is_(False))
-            .first()
-        )
-        if not doc:
-            logger.error(f"Document {document_id} not found for ingestion")
-            return
-        # Update status to processing
-        doc.status = "processing"
-        db.commit()
-        # Get page count
-        page_count = get_page_count(filepath)
-        doc.page_count = page_count
-        # Chunk document with optional chunk size and overlap parameters from the document record, falling back to global defaults if not set
-        chunk_size = doc.chunk_size
-        chunk_overlap = doc.chunk_overlap
-        try:
-            kwargs = {}
-            if chunk_size is not None:
-                kwargs["chunk_size"] = chunk_size
-            if chunk_overlap is not None:
-                kwargs["chunk_overlap"] = chunk_overlap
-            if kwargs:
-                chunks = chunk_document(filepath, **kwargs)
-            else:
-                chunks = chunk_document(filepath)
-        except TypeError:
-            # Backward-compatible fallback for chunk_document implementations/tests
-            # that only accept (filepath)
-            chunks = chunk_document(filepath)
-        if not chunks:
-            doc.status = "failed"
-            doc.error_message = "No text could be extracted from the document"
-            db.commit()
-            return
-        # Build and persist a lightweight entity co-occurrence graph for GraphRAG.
-        try:
-            from app.rag.graph_builder import build_graph, save_graph
-            graph = build_graph(chunks)
-            save_graph(graph, user_id=user_id, document_id=document_id)
-        except Exception as e:
-            logger.warning(f"Could not build knowledge graph for document {document_id}: {e}")
-        # Store embeddings in ChromaDB
-        chunk_count = store_chunks(
-            chunks=chunks,
-            document_id=document_id,
-            filename=original_name,
-            user_id=user_id,
-        )
-        # Generate summary and update document record
-        try:
-            from app.rag.summarizer import generate_document_summary
-            summary = generate_document_summary(filepath, max_sentences=2)
-            if summary:
-                doc.summary = summary
-                db.commit() # Update document record with summary
-        except Exception as e:
-            logger.warning(f"Could not import summarizer for document {document_id}: {e}")
-            doc.summary = None
-        # Update document record
-        doc.chunk_count = chunk_count
-        doc.status = "ready"
-        db.commit()
-        logger.info(f"Document {document_id} ingested: {page_count} pages, {chunk_count} chunks")
-    except Exception as e:
-        logger.error(f"Ingestion error for {document_id}: {e}")
-        try:
-            doc = (
-                db.query(Document)
-                .filter(Document.id == document_id, Document.is_deleted.is_(False))
-                .first()
-            )
-            if doc:
-                doc.status = "failed"
-                doc.error_message = str(e)[:500]
-                db.commit()
-        except Exception:
-            pass
-    finally:
-        db.close()
 def _crawl_in_new_loop(url: str) -> str:
     """Run the async crawler in a fresh event loop on a worker thread.
     On Windows this must be a ProactorEventLoop to support subprocesses.
@@ -295,7 +167,6 @@ def _crawl_in_new_loop(url: str) -> str:
 @router.post("/upload", response_model=DocumentResponse, status_code=status.HTTP_202_ACCEPTED)
 async def upload_document(
-    background_tasks: BackgroundTasks,
     file: UploadFile = File(...),
     user: User = Depends(get_current_user),
     db: Session = Depends(get_db),
@@ -305,12 +176,11 @@ async def upload_document(
     Validates the uploaded file (extension, size, MIME type, integrity),
     saves it to the user's directory, creates a database record with status
-    'pending', schedules a background task for chunking and embedding, and
-    returns 202 Accepted immediately so large documents do not block the API
-    request while embeddings are generated.
     Args:
-        background_tasks: FastAPI BackgroundTasks instance to run the ingestion process asynchronously.
         file: The uploaded file, provided as a multipart/form-data field in the request.
         user: The currently authenticated user, injected by the `get_current_user` dependency.
         db: Database session, injected by the `get_db` dependency.
@@ -364,21 +234,19 @@ async def upload_document(
     db.commit()
     db.refresh(document)
-    # ── Trigger background ingestion ─────────────────
-    background_tasks.add_task(
-        _ingest_document,
         document_id=document.id,
         filepath=filepath,
         original_name=file.filename,
         user_id=user.id,
     )
-    return DocumentResponse.model_validate(document)
 @router.post("/urlupload", status_code=status.HTTP_202_ACCEPTED)
 async def upload_document_url(
         payload: UploadUrl,
-        background_tasks: BackgroundTasks,
         user: User = Depends(get_current_user),
         db: Session = Depends(get_db),
 ):
@@ -450,16 +318,15 @@ async def upload_document_url(
         db.commit()
         db.refresh(document)
-        # ── Trigger background ingestion ───────────────────────
-        background_tasks.add_task(
-            _ingest_document,
             document_id=document.id,
             filepath=filepath,
             original_name=original_name,
             user_id=user.id,
         )
-        return DocumentResponse.model_validate(document)
     except HTTPException:
         raise
@@ -716,7 +583,6 @@ def delete_document(
 def update_chunk_settings(
     document_id: str,
     settings_update: ChunkSettings,
-    background_tasks: BackgroundTasks,
     user: User = Depends(get_current_user),
     db: Session = Depends(get_db),
 ):
@@ -727,7 +593,6 @@ def update_chunk_settings(
     Args:
         document_id: The unique identifier of the document to update.
         settings_update: A ChunkSettings object containing the chunk_size and chunk_overlap values.
-        background_tasks: FastAPI BackgroundTasks instance to run the ingestion process asynchronously.
         user: The currently authenticated user, injected by the `get_current_user` dependency.
         db: Database session, injected by the `get_db` dependency.
@@ -768,13 +633,13 @@ def update_chunk_settings(
     doc.summary = None
     db.commit()
-    # Trigger background ingestion with updated chunk settings. The _ingest_document function will read the new chunk settings from the document record and re-chunk the document accordingly.
-    background_tasks.add_task(
-        _ingest_document,
         document_id=doc.id,
         filepath=os.path.join(settings.UPLOAD_DIR, user.id, doc.filename),
         original_name=doc.original_name,
         user_id=user.id,
     )
     # Return the updated document record with new chunk settings
-    return DocumentResponse.model_validate(doc)

 """
 Document management routes — upload, list, delete, and serve PDF files.
+Background ingestion via Celery workers.
 """
 import os
 import sys
 import shutil
 import tempfile
 from urllib.parse import urlparse
+from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, status, Query
 from fastapi.responses import FileResponse
 from sqlalchemy.orm import Session
 )
 from app.auth import get_current_user
 from app.config import get_settings
+from app.tasks import process_document
 try:
     from crawl4ai import AsyncWebCrawler
         pass
 def _crawl_in_new_loop(url: str) -> str:
     """Run the async crawler in a fresh event loop on a worker thread.
     On Windows this must be a ProactorEventLoop to support subprocesses.
 @router.post("/upload", response_model=DocumentResponse, status_code=status.HTTP_202_ACCEPTED)
 async def upload_document(
     file: UploadFile = File(...),
     user: User = Depends(get_current_user),
     db: Session = Depends(get_db),
     Validates the uploaded file (extension, size, MIME type, integrity),
     saves it to the user's directory, creates a database record with status
+    'pending', queues a Celery task for chunking and embedding, and returns
+    202 Accepted immediately so large documents do not block the API request
+    while embeddings are generated.
     Args:
         file: The uploaded file, provided as a multipart/form-data field in the request.
         user: The currently authenticated user, injected by the `get_current_user` dependency.
         db: Database session, injected by the `get_db` dependency.
     db.commit()
     db.refresh(document)
+    # ── Queue background ingestion ─────────────────
+    task = process_document.delay(
         document_id=document.id,
         filepath=filepath,
         original_name=file.filename,
         user_id=user.id,
     )
+    return DocumentResponse.model_validate(document).model_copy(update={"task_id": task.id})
 @router.post("/urlupload", status_code=status.HTTP_202_ACCEPTED)
 async def upload_document_url(
         payload: UploadUrl,
         user: User = Depends(get_current_user),
         db: Session = Depends(get_db),
 ):
         db.commit()
         db.refresh(document)
+        # ── Queue background ingestion ───────────────────────
+        task = process_document.delay(
             document_id=document.id,
             filepath=filepath,
             original_name=original_name,
             user_id=user.id,
         )
+        return DocumentResponse.model_validate(document).model_copy(update={"task_id": task.id})
     except HTTPException:
         raise
 def update_chunk_settings(
     document_id: str,
     settings_update: ChunkSettings,
     user: User = Depends(get_current_user),
     db: Session = Depends(get_db),
 ):
     Args:
         document_id: The unique identifier of the document to update.
         settings_update: A ChunkSettings object containing the chunk_size and chunk_overlap values.
         user: The currently authenticated user, injected by the `get_current_user` dependency.
         db: Database session, injected by the `get_db` dependency.
     doc.summary = None
     db.commit()
+    # Queue ingestion with updated chunk settings. The worker reads the new
+    # settings from the document record before re-chunking.
+    task = process_document.delay(
         document_id=doc.id,
         filepath=os.path.join(settings.UPLOAD_DIR, user.id, doc.filename),
         original_name=doc.original_name,
         user_id=user.id,
     )
     # Return the updated document record with new chunk settings
+    return DocumentResponse.model_validate(doc).model_copy(update={"task_id": task.id})

backend/app/schemas.py CHANGED Viewed

@@ -119,6 +119,7 @@ class DocumentResponse(BaseModel):
     error_message: Optional[str] = None
     uploaded_at: datetime
     summary: Optional[str] = None # New field for document summary
     class Config:
         from_attributes = True

     error_message: Optional[str] = None
     uploaded_at: datetime
     summary: Optional[str] = None # New field for document summary
+    task_id: Optional[str] = None
     class Config:
         from_attributes = True

backend/app/services/document_ingestion.py CHANGED Viewed

@@ -17,18 +17,31 @@ def ingest_document(document_id: str, filepath: str, original_name: str, user_id
     db = SessionLocal()
     try:
-        doc = db.query(Document).filter(Document.id == document_id).first()
         if not doc:
             logger.error("Document %s not found for ingestion", document_id)
             return
         doc.status = "processing"
         db.commit()
         page_count = get_page_count(filepath)
         doc.page_count = page_count
-        chunks = chunk_document(filepath)
         if not chunks:
             doc.status = "failed"
@@ -36,6 +49,14 @@ def ingest_document(document_id: str, filepath: str, original_name: str, user_id
             db.commit()
             return
         chunk_count = store_chunks(
             chunks=chunks,
             document_id=document_id,
@@ -69,7 +90,10 @@ def ingest_document(document_id: str, filepath: str, original_name: str, user_id
     except Exception as e:
         logger.error("Ingestion error for %s: %s", document_id, e)
         try:
-            doc = db.query(Document).filter(Document.id == document_id).first()
             if doc:
                 doc.status = "failed"
                 doc.error_message = str(e)[:500]

     db = SessionLocal()
     try:
+        doc = db.query(Document).filter(
+            Document.id == document_id,
+            Document.is_deleted.is_(False),
+        ).first()
         if not doc:
             logger.error("Document %s not found for ingestion", document_id)
             return
         doc.status = "processing"
+        doc.error_message = None
         db.commit()
         page_count = get_page_count(filepath)
         doc.page_count = page_count
+        try:
+            chunk_kwargs = {}
+            if doc.chunk_size is not None:
+                chunk_kwargs["chunk_size"] = doc.chunk_size
+            if doc.chunk_overlap is not None:
+                chunk_kwargs["chunk_overlap"] = doc.chunk_overlap
+            chunks = chunk_document(filepath, **chunk_kwargs)
+        except TypeError:
+            # Preserve compatibility with patched/test implementations.
+            chunks = chunk_document(filepath)
         if not chunks:
             doc.status = "failed"
             db.commit()
             return
+        try:
+            from app.rag.graph_builder import build_graph, save_graph
+            graph = build_graph(chunks)
+            save_graph(graph, user_id=user_id, document_id=document_id)
+        except Exception as e:
+            logger.warning("Could not build knowledge graph for document %s: %s", document_id, e)
         chunk_count = store_chunks(
             chunks=chunks,
             document_id=document_id,
     except Exception as e:
         logger.error("Ingestion error for %s: %s", document_id, e)
         try:
+            doc = db.query(Document).filter(
+                Document.id == document_id,
+                Document.is_deleted.is_(False),
+            ).first()
             if doc:
                 doc.status = "failed"
                 doc.error_message = str(e)[:500]

backend/app/tasks.py ADDED Viewed

	@@ -0,0 +1,22 @@

+"""Celery tasks for document processing."""
+from app.celery_app import celery_app
+from app.services.document_ingestion import ingest_document
+@celery_app.task(bind=True, name="app.tasks.process_document")
+def process_document(
+    self,
+    document_id: str,
+    filepath: str,
+    original_name: str,
+    user_id: str,
+) -> dict[str, str]:
+    """Run the RAG ingestion pipeline for a stored document."""
+    ingest_document(
+        document_id=document_id,
+        filepath=filepath,
+        original_name=original_name,
+        user_id=user_id,
+    )
+    return {"document_id": document_id, "status": "completed"}

backend/evaluation/ragas_sample_questions.jsonl ADDED Viewed

	@@ -0,0 +1,50 @@

+{"id":"q001","question":"What is the main purpose of PDF-Assistant-RAG?","reference":"PDF-Assistant-RAG helps users upload documents, retrieve relevant document context, and ask questions answered through a retrieval-augmented generation workflow."}
+{"id":"q002","question":"Which backend framework serves the API?","reference":"The backend API is served by FastAPI."}
+{"id":"q003","question":"Which frontend framework is used for the application interface?","reference":"The frontend is a Next.js application."}
+{"id":"q004","question":"What does the document upload route do before saving permanent state?","reference":"The upload route validates filename, extension, size, MIME type, and parser readability before moving a file into permanent storage."}
+{"id":"q005","question":"Which vector database stores retrieved document chunks?","reference":"ChromaDB stores document chunks for vector retrieval."}
+{"id":"q006","question":"Which embedding model is configured by default?","reference":"The default embedding model is sentence-transformers/all-MiniLM-L6-v2."}
+{"id":"q007","question":"What is the default embedding dimension?","reference":"The default embedding dimension is 384."}
+{"id":"q008","question":"What is the purpose of TOP_K_RETRIEVAL?","reference":"TOP_K_RETRIEVAL controls how many candidate chunks are retrieved before reranking."}
+{"id":"q009","question":"What is the purpose of TOP_K_RERANK?","reference":"TOP_K_RERANK controls how many reranked chunks are finally passed to answer generation."}
+{"id":"q010","question":"Which model family is used for reranking by default?","reference":"The default reranker is a cross-encoder model, cross-encoder/ms-marco-MiniLM-L-6-v2."}
+{"id":"q011","question":"How does the backend identify authenticated users?","reference":"Authenticated routes use JWT identity through the current-user dependency."}
+{"id":"q012","question":"What data must user-facing routes filter by?","reference":"User-facing routes must filter documents, files, vector chunks, and chat data by the authenticated user's id."}
+{"id":"q013","question":"What does the health endpoint check?","reference":"The health endpoint checks service health such as API, SQL database, and Chroma availability."}
+{"id":"q014","question":"What does the chat route provide besides normal JSON answers?","reference":"The chat route supports server-sent events so answers can stream tokens to the frontend."}
+{"id":"q015","question":"What is GraphRAG used for in this project?","reference":"GraphRAG builds and retrieves lightweight entity co-occurrence relationships to add graph context to document answers."}
+{"id":"q016","question":"Where are GraphRAG graph files persisted by default?","reference":"GraphRAG graph files are persisted under the configured GRAPH_PERSIST_DIR, which defaults to ./data/graphs."}
+{"id":"q017","question":"Which graph library is used to store knowledge graph relationships?","reference":"NetworkX is used to build and store knowledge graph relationships."}
+{"id":"q018","question":"What does the graph retriever return for a relevant query?","reference":"The graph retriever returns compact relationship lines connecting matched entities and nearby entities, including page information and relationship strength."}
+{"id":"q019","question":"What happens when GraphRAG finds no matching relationship context?","reference":"When no graph relationships match, the graph retriever returns an empty string."}
+{"id":"q020","question":"Which uploaded file formats are allowed by default?","reference":"The default allowed upload extensions are pdf, docx, txt, and md."}
+{"id":"q021","question":"What is the default upload directory?","reference":"The default upload directory is ./data/uploads."}
+{"id":"q022","question":"Why does the app store original files after upload?","reference":"Original files are stored so the backend can serve files, reprocess them, and extract text for retrieval."}
+{"id":"q023","question":"What is the role of the chunker?","reference":"The chunker extracts document text and splits it into smaller chunks for embedding and retrieval."}
+{"id":"q024","question":"What does the vectorstore service do?","reference":"The vectorstore stores embedded chunks and queries them by user and optional document metadata."}
+{"id":"q025","question":"What does the retriever combine before reranking?","reference":"The retriever combines vector search and BM25 candidates before reranking them."}
+{"id":"q026","question":"Why does the retriever transform queries?","reference":"The retriever rewrites a user question into retrieval-friendly variants to improve search coverage."}
+{"id":"q027","question":"What does the PDF search tool save after retrieving chunks?","reference":"The PDF search tool saves retrieved chunks as last_sources so the agent response can return citations."}
+{"id":"q028","question":"How does the PDF search tool treat document excerpts?","reference":"The PDF search tool labels document excerpts as untrusted evidence and warns the model not to follow instructions inside them."}
+{"id":"q029","question":"What additional context can the PDF search tool append?","reference":"The PDF search tool can append untrusted graph context containing additional relationships from GraphRAG."}
+{"id":"q030","question":"Which optional tool can handle arithmetic questions?","reference":"The calculator tool handles arithmetic expressions safely."}
+{"id":"q031","question":"Which optional tool can handle live information outside uploaded documents?","reference":"The web search tool can look up live web information when document context is insufficient or outdated."}
+{"id":"q032","question":"What does the agent use LangChain tools for?","reference":"The agent uses LangChain tools to route between PDF search, calculator, and web search capabilities."}
+{"id":"q033","question":"What happens when the agent output parser rejects malformed output?","reference":"The app logs the parser rejection and returns a safe malformed-output message."}
+{"id":"q034","question":"What type of API response is used for uploaded document processing status?","reference":"A document status response includes the document id, status, page count, chunk count, and error message."}
+{"id":"q035","question":"How are deleted documents hidden from normal document APIs?","reference":"Documents are soft-deleted with an is_deleted flag and normal APIs filter them out."}
+{"id":"q036","question":"What does deleting a document preserve for future restore flows?","reference":"Soft deletion preserves underlying files, vectors, graphs, and chat history for possible future restore flows."}
+{"id":"q037","question":"What is the purpose of CHUNK_SIZE?","reference":"CHUNK_SIZE controls the number of characters in each document chunk."}
+{"id":"q038","question":"What is the purpose of CHUNK_OVERLAP?","reference":"CHUNK_OVERLAP controls how much text overlaps between adjacent chunks to preserve boundary context."}
+{"id":"q039","question":"Which HuggingFace setting controls answer length?","reference":"LLM_MAX_NEW_TOKENS controls the maximum number of generated tokens for answers."}
+{"id":"q040","question":"Which HuggingFace setting controls answer randomness?","reference":"LLM_TEMPERATURE controls sampling randomness during answer generation."}
+{"id":"q041","question":"What environment variable stores the HuggingFace token?","reference":"HF_TOKEN stores the HuggingFace API token used for inference."}
+{"id":"q042","question":"Why should DEBUG not be enabled in production?","reference":"DEBUG enables detailed behavior intended for development and should not be enabled in production."}
+{"id":"q043","question":"How are production CORS origins configured?","reference":"Production CORS origins are configured through ALLOWED_ORIGINS."}
+{"id":"q044","question":"What database is used by default for local development?","reference":"The default database URL points to a local SQLite database at ./data/app.db."}
+{"id":"q045","question":"What database does Docker Compose provide for the stack?","reference":"Docker Compose provides a PostgreSQL database service for the stack."}
+{"id":"q046","question":"What is the contributor target branch for pull requests?","reference":"Contributor pull requests should target the dev branch."}
+{"id":"q047","question":"Which branch is production protected for deployment?","reference":"The main branch is treated as the production branch for deployment."}
+{"id":"q048","question":"Where can developers view Swagger locally?","reference":"Developers can view Swagger at /docs when the backend is running locally."}
+{"id":"q049","question":"What does the architecture document focus on?","reference":"The architecture document focuses on how requests move through the system and how major runtime components interact."}
+{"id":"q050","question":"Why is a RAGAS evaluation pipeline useful for this project?","reference":"A RAGAS evaluation pipeline provides quantitative scores to compare standard vector search with GraphRAG and track retrieval and answer quality over time."}

backend/requirements.txt CHANGED Viewed

@@ -38,6 +38,7 @@ langchain-huggingface
 langchain-text-splitters
 langsmith
 rank-bm25
 # Embeddings & ML
 sentence-transformers
@@ -56,6 +57,7 @@ huggingface-hub
 gunicorn
 slowapi
 prometheus-fastapi-instrumentator
 # File Validation
 #sudo apt-get install libmagic1 // for Debian/Ubuntu

 langchain-text-splitters
 langsmith
 rank-bm25
+ragas>=0.3.0
 # Embeddings & ML
 sentence-transformers
 gunicorn
 slowapi
 prometheus-fastapi-instrumentator
+celery[redis]
 # File Validation
 #sudo apt-get install libmagic1 // for Debian/Ubuntu

backend/scripts/run_ragas_eval.py ADDED Viewed

	@@ -0,0 +1,59 @@

+"""Run a 50-question RAGAS comparison for vector search and GraphRAG."""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+ROOT = Path(__file__).resolve().parents[2]
+BACKEND_DIR = ROOT / "backend"
+if str(BACKEND_DIR) not in sys.path:
+    sys.path.insert(0, str(BACKEND_DIR))
+DEFAULT_DATASET = BACKEND_DIR / "evaluation" / "ragas_sample_questions.jsonl"
+DEFAULT_OUTPUT = BACKEND_DIR / "evaluation" / "ragas_results.json"
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Evaluate vector search versus GraphRAG with RAGAS.",
+    )
+    parser.add_argument("--user-id", required=True, help="Owner user id for indexed documents.")
+    parser.add_argument("--document-id", help="Optional single document id to evaluate.")
+    parser.add_argument("--dataset", type=Path, default=DEFAULT_DATASET)
+    parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
+    parser.add_argument("--limit", type=int, default=50)
+    return parser.parse_args()
+def main() -> None:
+    args = parse_args()
+    from app.evaluation.ragas_pipeline import collect_records, compare_pipelines, load_questions
+    questions = load_questions(args.dataset, limit=args.limit)
+    grouped_records = collect_records(
+        questions=questions,
+        user_id=args.user_id,
+        document_id=args.document_id,
+    )
+    scores = compare_pipelines(grouped_records)
+    payload = {
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "dataset": str(args.dataset),
+        "question_count": len(questions),
+        "user_id": args.user_id,
+        "document_id": args.document_id,
+        "scores": scores,
+    }
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    args.output.write_text(json.dumps(payload, indent=2), encoding="utf-8")
+    print(json.dumps(payload["scores"], indent=2))
+    print(f"Wrote RAGAS evaluation results to {args.output}")
+if __name__ == "__main__":
+    main()

backend/tests/test_auth.py CHANGED Viewed

@@ -122,3 +122,80 @@ def test_hf_token_appears_in_user_response(client, auth_headers, user, db_sessio
     stored_token = row[0]
     assert stored_token is not None
     assert stored_token != "hf_persist_token"

     stored_token = row[0]
     assert stored_token is not None
     assert stored_token != "hf_persist_token"
+from unittest.mock import patch, AsyncMock, MagicMock
+import urllib.parse
+def test_huggingface_login(client):
+    from app.config import get_settings
+    settings = get_settings()
+    settings.HF_CLIENT_ID = "test-client-id"
+    settings.HF_REDIRECT_URI = "http://localhost:8000/api/v1/auth/callback/huggingface"
+    response = client.get("/api/v1/auth/login/huggingface")
+    assert response.status_code == 200
+    data = response.json()
+    assert "url" in data
+    assert "test-client-id" in data["url"]
+    assert "oauth_state" in response.cookies
+@patch("httpx.AsyncClient.post")
+@patch("httpx.AsyncClient.get")
+def test_huggingface_callback_success(mock_get, mock_post, client):
+    from app.config import get_settings
+    settings = get_settings()
+    settings.HF_CLIENT_ID = "test-client-id"
+    settings.HF_CLIENT_SECRET = "test-client-secret"
+    settings.HF_REDIRECT_URI = "http://localhost:8000/api/v1/auth/callback/huggingface"
+    mock_post_resp = MagicMock()
+    mock_post_resp.status_code = 200
+    mock_post_resp.json.return_value = {"access_token": "hf-access-token"}
+    mock_post.return_value = mock_post_resp
+    mock_get_resp = MagicMock()
+    mock_get_resp.status_code = 200
+    mock_get_resp.json.return_value = {
+        "email": "hfuser@example.com",
+        "preferred_username": "hfuser"
+    }
+    mock_get.return_value = mock_get_resp
+    login_response = client.get("/api/v1/auth/login/huggingface")
+    state_cookie = login_response.cookies["oauth_state"]
+    url = login_response.json()["url"]
+    parsed = urllib.parse.urlparse(url)
+    queries = urllib.parse.parse_qs(parsed.query)
+    state_param = queries["state"][0]
+    client.cookies.set("oauth_state", state_cookie)
+    callback_response = client.get(
+        f"/api/v1/auth/callback/huggingface?code=hf-code&state={state_param}",
+        follow_redirects=False
+    )
+    assert callback_response.status_code == 307
+    assert "/dashboard" in callback_response.headers["location"]
+    assert "access_token" in callback_response.cookies
+    assert "refresh_token" in callback_response.cookies
+def test_huggingface_callback_invalid_state(client):
+    response = client.get(
+        "/api/v1/auth/callback/huggingface?code=hf-code&state=invalid-state",
+        cookies={"oauth_state": "actual-state"}
+    )
+    assert response.status_code == 400
+    assert "State verification failed" in response.json()["detail"]
+def test_huggingface_logout(client):
+    response = client.post(
+        "/api/v1/auth/logout",
+        cookies={"access_token": "token-value", "refresh_token": "refresh-value"}
+    )
+    assert response.status_code == 200
+    assert response.cookies.get("access_token") in (None, "")
+    assert response.cookies.get("refresh_token") in (None, "")

backend/tests/test_document_upload_validation.py CHANGED Viewed

@@ -6,7 +6,7 @@ import uuid
 from pathlib import Path
 import pytest
-from fastapi import BackgroundTasks, HTTPException, UploadFile
 from pypdf import PdfWriter
 from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker
@@ -141,10 +141,14 @@ def test_upload_document_handles_duplicate_original_names(
     monkeypatch.setattr(documents, "validate_upload", fake_validate_upload)
     monkeypatch.setattr(documents.settings, "UPLOAD_DIR", str(tmp_path / "uploads"))
     monkeypatch.setattr(documents.uuid, "uuid4", lambda: next(uuid_values))
     first = _run(
         documents.upload_document(
-            BackgroundTasks(),
             file=_upload_file("same-name.pdf", b"first"),
             user=user,
             db=session,
@@ -152,7 +156,6 @@ def test_upload_document_handles_duplicate_original_names(
     )
     second = _run(
         documents.upload_document(
-            BackgroundTasks(),
             file=_upload_file("same-name.pdf", b"second"),
             user=user,
             db=session,
@@ -164,6 +167,7 @@ def test_upload_document_handles_duplicate_original_names(
     assert [doc.original_name for doc in stored_docs] == ["same-name.pdf", "same-name.pdf"]
     assert len({doc.filename for doc in stored_docs}) == 2
     assert first.original_name == second.original_name == "same-name.pdf"
     assert (tmp_path / "uploads" / user.id / f"{first_hex}.pdf").exists()
     assert (tmp_path / "uploads" / user.id / f"{second_hex}.pdf").exists()
     assert all(not path.exists() for path in temp_files)

 from pathlib import Path
 import pytest
+from fastapi import HTTPException, UploadFile
 from pypdf import PdfWriter
 from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker
     monkeypatch.setattr(documents, "validate_upload", fake_validate_upload)
     monkeypatch.setattr(documents.settings, "UPLOAD_DIR", str(tmp_path / "uploads"))
     monkeypatch.setattr(documents.uuid, "uuid4", lambda: next(uuid_values))
+    monkeypatch.setattr(
+        documents.process_document,
+        "delay",
+        lambda **_kwargs: types.SimpleNamespace(id="queued-task"),
+    )
     first = _run(
         documents.upload_document(
             file=_upload_file("same-name.pdf", b"first"),
             user=user,
             db=session,
     )
     second = _run(
         documents.upload_document(
             file=_upload_file("same-name.pdf", b"second"),
             user=user,
             db=session,
     assert [doc.original_name for doc in stored_docs] == ["same-name.pdf", "same-name.pdf"]
     assert len({doc.filename for doc in stored_docs}) == 2
     assert first.original_name == second.original_name == "same-name.pdf"
+    assert first.task_id == second.task_id == "queued-task"
     assert (tmp_path / "uploads" / user.id / f"{first_hex}.pdf").exists()
     assert (tmp_path / "uploads" / user.id / f"{second_hex}.pdf").exists()
     assert all(not path.exists() for path in temp_files)

backend/tests/test_documents.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import types
 from app.models import Document
-from app.routes.documents import _ingest_document
 def test_api_health(client):
@@ -116,9 +116,9 @@ def test_ingest_document_builds_and_saves_graph(db_session, monkeypatch, tmp_pat
     chunks = [{"text": "OpenAI works with Microsoft.", "page": 1, "chunk_index": 0}]
     saved = {}
-    monkeypatch.setattr("app.routes.documents.get_page_count", lambda filepath: 1)
-    monkeypatch.setattr("app.routes.documents.chunk_document", lambda filepath: chunks)
-    monkeypatch.setattr("app.routes.documents.store_chunks", lambda **kwargs: len(chunks))
     monkeypatch.setattr("app.database.SessionLocal", lambda: db_session)
     fake_summary = types.ModuleType("app.rag.summarizer")
@@ -136,7 +136,7 @@ def test_ingest_document_builds_and_saves_graph(db_session, monkeypatch, tmp_pat
         ),
     )
-    _ingest_document(
         document_id=document_id,
         filepath=str(tmp_path / "graph.txt"),
         original_name=document.original_name,

 import types
 from app.models import Document
+from app.services.document_ingestion import ingest_document
 def test_api_health(client):
     chunks = [{"text": "OpenAI works with Microsoft.", "page": 1, "chunk_index": 0}]
     saved = {}
+    monkeypatch.setattr("app.services.document_ingestion.get_page_count", lambda filepath: 1)
+    monkeypatch.setattr("app.services.document_ingestion.chunk_document", lambda filepath: chunks)
+    monkeypatch.setattr("app.services.document_ingestion.store_chunks", lambda **kwargs: len(chunks))
     monkeypatch.setattr("app.database.SessionLocal", lambda: db_session)
     fake_summary = types.ModuleType("app.rag.summarizer")
         ),
     )
+    ingest_document(
         document_id=document_id,
         filepath=str(tmp_path / "graph.txt"),
         original_name=document.original_name,

backend/tests/test_ragas_pipeline.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import json
+from types import SimpleNamespace
+from app.evaluation import ragas_pipeline
+from app.evaluation.ragas_pipeline import (
+    EvaluationQuestion,
+    append_graph_context,
+    collect_records,
+    load_questions,
+    summarize_ragas_result,
+)
+def test_load_questions_requires_exact_limit(tmp_path):
+    dataset = tmp_path / "questions.jsonl"
+    rows = [
+        {"id": "q1", "question": "Question 1?", "reference": "Reference 1."},
+        {"id": "q2", "question": "Question 2?", "reference": "Reference 2."},
+    ]
+    dataset.write_text("\n".join(json.dumps(row) for row in rows), encoding="utf-8")
+    questions = load_questions(dataset, limit=2)
+    assert [question.id for question in questions] == ["q1", "q2"]
+    assert questions[0].question == "Question 1?"
+def test_append_graph_context_skips_empty_context():
+    assert append_graph_context(["vector context"], "  ") == ["vector context"]
+    assert append_graph_context(["vector context"], "graph context") == [
+        "vector context",
+        "graph context",
+    ]
+def test_collect_records_builds_vector_and_graphrag_samples(monkeypatch):
+    questions = [
+        EvaluationQuestion(id="q1", question="What is Alpha?", reference="Alpha is a product."),
+    ]
+    monkeypatch.setattr(
+        ragas_pipeline,
+        "retrieve_vector_contexts",
+        lambda **_kwargs: ["Alpha vector context."],
+    )
+    monkeypatch.setattr(
+        ragas_pipeline,
+        "retrieve_graphrag_contexts",
+        lambda **_kwargs: ["Alpha vector context.", "Alpha is related to Beta."],
+    )
+    records = collect_records(
+        questions=questions,
+        user_id="user-1",
+        answer_generator=lambda question, contexts: f"{question} -> {len(contexts)} contexts",
+    )
+    assert records["vector"][0].mode == "vector"
+    assert records["vector"][0].response.endswith("1 contexts")
+    assert records["graphrag"][0].mode == "graphrag"
+    assert records["graphrag"][0].response.endswith("2 contexts")
+def test_summarize_ragas_result_averages_score_rows():
+    result = SimpleNamespace(
+        scores=[
+            {"faithfulness": 1.0, "context_recall": 0.5},
+            {"faithfulness": 0.5, "context_recall": 1.0},
+        ]
+    )
+    assert summarize_ragas_result(result) == {
+        "faithfulness": 0.75,
+        "context_recall": 0.75,
+    }

docker-compose.yml CHANGED Viewed

@@ -1,6 +1,20 @@
 version: '3.8'
 services:
   # ── PostgreSQL Database ──────────────────────────────────
   postgres:
     image: postgres:16-alpine
@@ -34,11 +48,16 @@ services:
       - SECRET_KEY=${SECRET_KEY:-dev-secret-key-change-me}
       - HF_TOKEN=${HF_TOKEN}
       - DATABASE_URL=postgresql://${POSTGRES_USER:-pdf_rag_user}:${POSTGRES_PASSWORD:-pdf_rag_pass}@postgres:5432/${POSTGRES_DB:-pdf_rag}
-      - UPLOAD_DIR=./data/uploads
-      - CHROMA_PERSIST_DIR=./data/chroma_db
     depends_on:
       postgres:
         condition: service_healthy
     restart: unless-stopped
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:7860/api/health"]
@@ -47,6 +66,31 @@ services:
       retries: 3
       start_period: 60s
   # ── pgAdmin (optional — for local DB inspection) ─────────
   pgadmin:
     image: dpage/pgadmin4:latest

 version: '3.8'
 services:
+  # Redis broker/result backend for Celery document processing
+  redis:
+    image: redis:7-alpine
+    container_name: pdf_rag_redis
+    restart: unless-stopped
+    ports:
+      - "6379:6379"
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 5s
   # ── PostgreSQL Database ──────────────────────────────────
   postgres:
     image: postgres:16-alpine
       - SECRET_KEY=${SECRET_KEY:-dev-secret-key-change-me}
       - HF_TOKEN=${HF_TOKEN}
       - DATABASE_URL=postgresql://${POSTGRES_USER:-pdf_rag_user}:${POSTGRES_PASSWORD:-pdf_rag_pass}@postgres:5432/${POSTGRES_DB:-pdf_rag}
+      - UPLOAD_DIR=/app/data/uploads
+      - CHROMA_PERSIST_DIR=/app/data/chroma_db
+      - GRAPH_PERSIST_DIR=/app/data/graphs
+      - CELERY_BROKER_URL=redis://redis:6379/0
+      - CELERY_RESULT_BACKEND=redis://redis:6379/1
     depends_on:
       postgres:
         condition: service_healthy
+      redis:
+        condition: service_healthy
     restart: unless-stopped
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:7860/api/health"]
       retries: 3
       start_period: 60s
+  # Celery worker for document extraction, chunking, embeddings, and vector storage
+  worker:
+    build: .
+    container_name: pdf_rag_worker
+    command: >
+      sh -c "cd /app/backend &&
+      celery -A app.celery_app.celery_app worker --loglevel=info"
+    volumes:
+      - app_data:/app/data
+    environment:
+      - SECRET_KEY=${SECRET_KEY:-dev-secret-key-change-me}
+      - HF_TOKEN=${HF_TOKEN}
+      - DATABASE_URL=postgresql://${POSTGRES_USER:-pdf_rag_user}:${POSTGRES_PASSWORD:-pdf_rag_pass}@postgres:5432/${POSTGRES_DB:-pdf_rag}
+      - UPLOAD_DIR=/app/data/uploads
+      - CHROMA_PERSIST_DIR=/app/data/chroma_db
+      - GRAPH_PERSIST_DIR=/app/data/graphs
+      - CELERY_BROKER_URL=redis://redis:6379/0
+      - CELERY_RESULT_BACKEND=redis://redis:6379/1
+    depends_on:
+      postgres:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+    restart: unless-stopped
   # ── pgAdmin (optional — for local DB inspection) ─────────
   pgadmin:
     image: dpage/pgadmin4:latest

docs/ARCHITECTURE.md CHANGED Viewed

@@ -52,7 +52,8 @@ sequenceDiagram
     participant UI as Frontend
     participant API as FastAPI documents route
     participant DB as SQL metadata
-    participant Worker as Background task
     participant Files as Upload storage
     participant Vector as ChromaDB
@@ -60,8 +61,9 @@ sequenceDiagram
     API->>API: Validate filename, extension, size, MIME, and parser readability
     API->>Files: Persist original file under the user's upload directory
     API->>DB: Create document row with processing status
-    API-->>UI: 202 Accepted with document metadata
-    API->>Worker: Queue ingestion task
     Worker->>Files: Read saved document
     Worker->>Worker: Extract pages, chunk text, build graph summary data
     Worker->>Vector: Store chunks with document and user metadata
@@ -70,9 +72,9 @@ sequenceDiagram
 The upload route is intentionally strict before it writes long-lived state:
 extension checks, size checks, MIME checks, and parser checks happen before the
-file is moved into permanent storage. The background task owns expensive work
-such as text extraction, chunking, embedding, graph building, and summary
-generation.
 ## Chat And Retrieval Flow

     participant UI as Frontend
     participant API as FastAPI documents route
     participant DB as SQL metadata
+    participant Redis as Redis broker
+    participant Worker as Celery worker
     participant Files as Upload storage
     participant Vector as ChromaDB
     API->>API: Validate filename, extension, size, MIME, and parser readability
     API->>Files: Persist original file under the user's upload directory
     API->>DB: Create document row with processing status
+    API->>Redis: Queue Celery ingestion task
+    API-->>UI: 202 Accepted with document metadata and task_id
+    Redis->>Worker: Deliver ingestion task
     Worker->>Files: Read saved document
     Worker->>Worker: Extract pages, chunk text, build graph summary data
     Worker->>Vector: Store chunks with document and user metadata
 The upload route is intentionally strict before it writes long-lived state:
 extension checks, size checks, MIME checks, and parser checks happen before the
+file is moved into permanent storage. Celery uses Redis as the broker/result
+backend, and the worker owns expensive work such as text extraction, chunking,
+embedding, graph building, and summary generation.
 ## Chat And Retrieval Flow

frontend/e2e/auth-and-chat.spec.ts CHANGED Viewed

@@ -28,7 +28,13 @@ const uploadedDocument = {
 async function mockDashboardApis(page: Page, documents: typeof uploadedDocument[] = []) {
   await page.route("**/api/v1/auth/me", async (route) => {
-    await route.fulfill({ json: user });
   });
   await page.route("**/api/v1/documents/", async (route) => {
@@ -54,7 +60,7 @@ test("logs in with email and password", async ({ page }) => {
   await page.goto("/login");
   await page.locator("#login-email").fill(user.email);
   await page.locator("#login-password").fill("password123");
-  await page.getByRole("button", { name: "Sign In" }).click();
   await expect(page).toHaveURL(/\/dashboard$/);
   await expect(page.getByText("No documents yet")).toBeVisible();

 async function mockDashboardApis(page: Page, documents: typeof uploadedDocument[] = []) {
   await page.route("**/api/v1/auth/me", async (route) => {
+    const headers = route.request().headers();
+    const hasAuth = headers["authorization"] || headers["cookie"];
+    if (hasAuth) {
+      await route.fulfill({ json: user });
+    } else {
+      await route.fulfill({ status: 401, json: { detail: "Not authenticated" } });
+    }
   });
   await page.route("**/api/v1/documents/", async (route) => {
   await page.goto("/login");
   await page.locator("#login-email").fill(user.email);
   await page.locator("#login-password").fill("password123");
+  await page.locator("#sign-in-btn").click();
   await expect(page).toHaveURL(/\/dashboard$/);
   await expect(page.getByText("No documents yet")).toBeVisible();

frontend/package-lock.json CHANGED Viewed

@@ -5699,6 +5699,7 @@
       "version": "2.3.2",
       "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
       "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
       "hasInstallScript": true,
       "license": "MIT",
       "optional": true,

       "version": "2.3.2",
       "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
       "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
+      "dev": true,
       "hasInstallScript": true,
       "license": "MIT",
       "optional": true,

frontend/src/app/dashboard/page.tsx CHANGED Viewed

@@ -56,7 +56,7 @@ export interface DocInfo {
 }
 export default function DashboardPage() {
-  const { user, loading } = useAuth();
   const router = useRouter();
   const [documents, setDocuments] = useState<DocInfo[]>([]);
@@ -85,18 +85,21 @@ export default function DashboardPage() {
     setActiveDoc((current) => (current?.id === renamedDocument.id ? renamedDocument : current));
   }, []);
-    // Auth guard
   useEffect(() => {
-    if (!loading && !user) router.replace("/login");
-  }, [user, loading, router]);
-  // Intercept dashboard if Hugging Face token configuration is missing
   useEffect(() => {
     if (user) {
-      const existingHfToken = localStorage.getItem("hf_token");
-      if (!existingHfToken) {
-        console.warn("Hugging Face API configuration key missing.");
       }
     }
   }, [user]);
@@ -160,7 +163,7 @@ export default function DashboardPage() {
     return () => clearInterval(interval);
   }, [documents, loadDocuments]);
-  if (loading || !user) {
     return (
       <div className="min-h-screen flex items-center justify-center">
         <div className="animate-pulse-glow w-12 h-12 rounded-full bg-primary/20" />

 }
 export default function DashboardPage() {
+  const { user, loading, initialized } = useAuth();
   const router = useRouter();
   const [documents, setDocuments] = useState<DocInfo[]>([]);
     setActiveDoc((current) => (current?.id === renamedDocument.id ? renamedDocument : current));
   }, []);
+  // Auth guard
   useEffect(() => {
+    if (initialized && !user) router.replace("/login");
+  }, [user, initialized, router]);
+  // Check if Hugging Face token configuration is present
   useEffect(() => {
     if (user) {
+      const hasHfToken = !!(user.hf_token || localStorage.getItem("hf_token"));
+      if (!hasHfToken) {
+        console.info(
+          "Hugging Face API token is not configured. Personal model access will fall back to the system default unless set in the user profile menu."
+        );
       }
     }
   }, [user]);
     return () => clearInterval(interval);
   }, [documents, loadDocuments]);
+  if (!initialized || !user) {
     return (
       <div className="min-h-screen flex items-center justify-center">
         <div className="animate-pulse-glow w-12 h-12 rounded-full bg-primary/20" />

frontend/src/app/login/page.tsx CHANGED Viewed

@@ -1,6 +1,6 @@
 "use client";
-import { useCallback, useState } from "react";
 import { useRouter } from "next/navigation";
 import { useAuth } from "@/lib/auth";
 import { useTranslation } from "react-i18next";
@@ -10,9 +10,10 @@ import { Card, CardContent, CardHeader, CardTitle, CardDescription } from "@/com
 import { Brain, Eye, EyeOff } from "lucide-react";
 import Link from "next/link";
 import GoogleSignInButton from "@/components/auth/GoogleSignInButton";
 export default function LoginPage() {
-  const { login } = useAuth();
   const { t } = useTranslation();
   const router = useRouter();
   const [email, setEmail] = useState("");
@@ -21,6 +22,13 @@ export default function LoginPage() {
   const [error, setError] = useState("");
   const [loading, setLoading] = useState(false);
   const handleGoogleSuccess = useCallback(() => {
     router.replace("/dashboard");
   }, [router]);
@@ -58,13 +66,25 @@ export default function LoginPage() {
         </CardHeader>
         <CardContent>
-          <div className="mb-4">
             <GoogleSignInButton
               onError={setError}
               onSuccess={handleGoogleSuccess}
             />
           </div>
           <form onSubmit={handleSubmit} className="space-y-4">
             {error && (
               <div className="p-3 rounded-lg bg-destructive/10 border border-destructive/30 text-sm text-destructive">
@@ -107,7 +127,7 @@ export default function LoginPage() {
               </div>
             </div>
-            <Button type="submit" className="w-full h-11 text-base" disabled={loading}>
               {loading ? (
                 <span className="flex items-center gap-2">
                   <span className="w-4 h-4 border-2 border-primary-foreground/30 border-t-primary-foreground rounded-full animate-spin" />

 "use client";
+import { useCallback, useState, useEffect } from "react";
 import { useRouter } from "next/navigation";
 import { useAuth } from "@/lib/auth";
 import { useTranslation } from "react-i18next";
 import { Brain, Eye, EyeOff } from "lucide-react";
 import Link from "next/link";
 import GoogleSignInButton from "@/components/auth/GoogleSignInButton";
+import HuggingFaceSignInButton from "@/components/auth/HuggingFaceSignInButton";
 export default function LoginPage() {
+  const { login, user, initialized } = useAuth();
   const { t } = useTranslation();
   const router = useRouter();
   const [email, setEmail] = useState("");
   const [error, setError] = useState("");
   const [loading, setLoading] = useState(false);
+  // Redirect if already logged in
+  useEffect(() => {
+    if (initialized && user) {
+      router.replace("/dashboard");
+    }
+  }, [user, initialized, router]);
   const handleGoogleSuccess = useCallback(() => {
     router.replace("/dashboard");
   }, [router]);
         </CardHeader>
         <CardContent>
+          <div className="flex flex-col gap-2.5 mb-4">
+            <HuggingFaceSignInButton onError={setError} />
             <GoogleSignInButton
               onError={setError}
               onSuccess={handleGoogleSuccess}
             />
           </div>
+          <div className="relative my-5">
+            <div className="absolute inset-0 flex items-center">
+              <span className="w-full border-t border-border/40" />
+            </div>
+            <div className="relative flex justify-center text-xs uppercase">
+              <span className="bg-card px-2.5 text-muted-foreground text-[10px] tracking-wider font-semibold">
+                Or continue with
+              </span>
+            </div>
+          </div>
           <form onSubmit={handleSubmit} className="space-y-4">
             {error && (
               <div className="p-3 rounded-lg bg-destructive/10 border border-destructive/30 text-sm text-destructive">
               </div>
             </div>
+            <Button id="sign-in-btn" type="submit" className="w-full h-11 text-base" disabled={loading}>
               {loading ? (
                 <span className="flex items-center gap-2">
                   <span className="w-4 h-4 border-2 border-primary-foreground/30 border-t-primary-foreground rounded-full animate-spin" />

frontend/src/app/register/page.tsx CHANGED Viewed

@@ -1,6 +1,6 @@
 "use client";
-import { useCallback, useState } from "react";
 import { useRouter } from "next/navigation";
 import { useAuth } from "@/lib/auth";
 import { useTranslation } from "react-i18next";
@@ -10,9 +10,10 @@ import { Card, CardContent, CardHeader, CardTitle, CardDescription } from "@/com
 import { Brain, Eye, EyeOff } from "lucide-react";
 import Link from "next/link";
 import GoogleSignInButton from "@/components/auth/GoogleSignInButton";
 export default function RegisterPage() {
-  const { register } = useAuth();
   const { t } = useTranslation();
   const router = useRouter();
   const [username, setUsername] = useState("");
@@ -22,6 +23,13 @@ export default function RegisterPage() {
   const [error, setError] = useState("");
   const [loading, setLoading] = useState(false);
   const handleGoogleSuccess = useCallback(() => {
     router.replace("/dashboard");
   }, [router]);
@@ -58,7 +66,8 @@ export default function RegisterPage() {
         </CardHeader>
         <CardContent>
-          <div className="mb-4">
             <GoogleSignInButton
               onError={setError}
               onSuccess={handleGoogleSuccess}

 "use client";
+import { useCallback, useState, useEffect } from "react";
 import { useRouter } from "next/navigation";
 import { useAuth } from "@/lib/auth";
 import { useTranslation } from "react-i18next";
 import { Brain, Eye, EyeOff } from "lucide-react";
 import Link from "next/link";
 import GoogleSignInButton from "@/components/auth/GoogleSignInButton";
+import HuggingFaceSignInButton from "@/components/auth/HuggingFaceSignInButton";
 export default function RegisterPage() {
+  const { register, user, initialized } = useAuth();
   const { t } = useTranslation();
   const router = useRouter();
   const [username, setUsername] = useState("");
   const [error, setError] = useState("");
   const [loading, setLoading] = useState(false);
+  // Redirect if already logged in
+  useEffect(() => {
+    if (initialized && user) {
+      router.replace("/dashboard");
+    }
+  }, [user, initialized, router]);
   const handleGoogleSuccess = useCallback(() => {
     router.replace("/dashboard");
   }, [router]);
         </CardHeader>
         <CardContent>
+          <div className="flex flex-col gap-2.5 mb-4">
+            <HuggingFaceSignInButton onError={setError} />
             <GoogleSignInButton
               onError={setError}
               onSuccess={handleGoogleSuccess}

frontend/src/components/auth/HuggingFaceSignInButton.tsx ADDED Viewed

	@@ -0,0 +1,58 @@

+"use client";
+import { useState } from "react";
+import { Button } from "@/components/ui/button";
+import { api } from "@/lib/api";
+type HuggingFaceSignInButtonProps = {
+  onError: (message: string) => void;
+};
+export default function HuggingFaceSignInButton({ onError }: HuggingFaceSignInButtonProps) {
+  const [loading, setLoading] = useState(false);
+  const handleLogin = async () => {
+    setLoading(true);
+    try {
+      // 1. Fetch the Hugging Face OAuth authorization URL from backend
+      const data = await api.get<{ url: string }>("/api/v1/auth/login/huggingface");
+      if (data.url) {
+        // 2. Redirect the user's browser to Hugging Face
+        window.location.href = data.url;
+      } else {
+        onError("Could not retrieve authorization URL from backend.");
+        setLoading(false);
+      }
+    } catch (error) {
+      onError(
+        error instanceof Error
+          ? error.message
+          : "An error occurred while connecting to Hugging Face OAuth."
+      );
+      setLoading(false);
+    }
+  };
+  return (
+    <Button
+      onClick={handleLogin}
+      disabled={loading}
+      variant="outline"
+      className="w-full h-11 bg-card/45 backdrop-blur-md border border-border/60 hover:border-[#FFD21E]/60 hover:bg-[#FFD21E]/5 hover:shadow-[0_0_15px_-3px_rgba(255,210,30,0.18)] text-foreground hover:text-[#FFD21E] transition-all duration-300 shadow-sm relative group flex items-center justify-center gap-2.5 font-semibold rounded-xl overflow-hidden active:scale-[0.98] cursor-pointer"
+    >
+      {loading ? (
+        <span className="w-5 h-5 border-2 border-[#FFD21E]/30 border-t-[#FFD21E] rounded-full animate-spin mr-1" />
+      ) : (
+        <svg
+          className="w-5 h-5 transition-transform duration-300 group-hover:scale-110 fill-current text-[#FFD21E]"
+          viewBox="0 0 24 24"
+          xmlns="http://www.w3.org/2000/svg"
+        >
+          <title>Hugging Face</title>
+          <path d="M12.025 1.13c-5.77 0-10.449 4.647-10.449 10.378 0 1.112.178 2.181.503 3.185.064-.222.203-.444.416-.577a.96.96 0 0 1 .524-.15c.293 0 .584.124.84.284.278.173.48.408.71.694.226.282.458.611.684.951v-.014c.017-.324.106-.622.264-.874s.403-.487.762-.543c.3-.047.596.06.787.203s.31.313.4.467c.15.257.212.468.233.542.01.026.653 1.552 1.657 2.54.616.605 1.01 1.223 1.082 1.912.055.537-.096 1.059-.38 1.572.637.121 1.294.187 1.967.187.657 0 1.298-.063 1.921-.178-.287-.517-.44-1.041-.384-1.581.07-.69.465-1.307 1.081-1.913 1.004-.987 1.647-2.513 1.657-2.539.021-.074.083-.285.233-.542.09-.154.208-.323.4-.467a1.08 1.08 0 0 1 .787-.203c.359.056.604.29.762.543s.247.55.265.874v.015c.225-.34.457-.67.683-.952.23-.286.432-.52.71-.694.257-.16.547-.284.84-.285a.97.97 0 0 1 .524.151c.228.143.373.388.43.625l.006.04a10.3 10.3 0 0 0 .534-3.273c0-5.731-4.678-10.378-10.449-10.378M8.327 6.583a1.5 1.5 0 0 1 .713.174 1.487 1.487 0 0 1 .617 2.013c-.183.343-.762-.214-1.102-.094-.38.134-.532.914-.917.71a1.487 1.487 0 0 1 .69-2.803m7.486 0a1.487 1.487 0 0 1 .689 2.803c-.385.204-.536-.576-.916-.71-.34-.12-.92.437-1.103.094a1.487 1.487 0 0 1 .617-2.013 1.5 1.5 0 0 1 .713-.174m-10.68 1.55a.96.96 0 1 1 0 1.921.96.96 0 0 1 0-1.92m13.838 0a.96.96 0 1 1 0 1.92.96.96 0 0 1 0-1.92M8.489 11.458c.588.01 1.965 1.157 3.572 1.164 1.607-.007 2.984-1.155 3.572-1.164.196-.003.305.12.305.454 0 .886-.424 2.328-1.563 3.202-.22-.756-1.396-1.366-1.63-1.32q-.011.001-.02.006l-.044.026-.01.008-.03.024q-.018.017-.035.036l-.032.04a1 1 0 0 0-.058.09l-.014.025q-.049.088-.11.19a1 1 0 0 1-.083.116 1.2 1.2 0 0 1-.173.18q-.035.029-.075.058a1.3 1.3 0 0 1-.251-.243 1 1 0 0 1-.076-.107c-.124-.193-.177-.363-.337-.444-.034-.016-.104-.008-.2.022q-.094.03-.216.087-.06.028-.125.063l-.13.074q-.067.04-.136.086a3 3 0 0 0-.135.096 3 3 0 0 0-.26.219 2 2 0 0 0-.12.121 2 2 0 0 0-.106.128l-.002.002a2 2 0 0 0-.09.132l-.001.001a1.2 1.2 0 0 0-.105.212q-.013.036-.024.073c-1.139-.875-1.563-2.317-1.563-3.203 0-.334.109-.457.305-.454m.836 10.354c.824-1.19.766-2.082-.365-3.194-1.13-1.112-1.789-2.738-1.789-2.738s-.246-.945-.806-.858-.97 1.499.202 2.362c1.173.864-.233 1.45-.685.64-.45-.812-1.683-2.896-2.322-3.295s-1.089-.175-.938.647 2.822 2.813 2.562 3.244-1.176-.506-1.176-.506-2.866-2.567-3.49-1.898.473 1.23 2.037 2.16c1.564.932 1.686 1.178 1.464 1.53s-3.675-2.511-4-1.297c-.323 1.214 3.524 1.567 3.287 2.405-.238.839-2.71-1.587-3.216-.642-.506.946 3.49 2.056 3.522 2.064 1.29.33 4.568 1.028 5.713-.624m5.349 0c-.824-1.19-.766-2.082.365-3.194 1.13-1.112 1.789-2.738 1.789-2.738s.246-.945.806-.858.97 1.499-.202 2.362c-1.173.864.233 1.45.685.64.451-.812 1.683-2.896 2.322-3.295s1.089-.175.938.647-2.822 2.813-2.562 3.244 1.176-.506 1.176-.506 2.866-2.567 3.49-1.898-.473 1.23-2.037 2.16c-1.564.932-1.686 1.178-1.464 1.53s3.675-2.511 4-1.297c.323 1.214-3.524 1.567-3.287 2.405.238.839 2.71-1.587 3.216-.642.506.946-3.49 2.056-3.522 2.064-1.29.33-4.568 1.028-5.713-.624" />
+        </svg>
+      )}
+      <span className="truncate">Sign in with Hugging Face</span>
+    </Button>
+  );
+}

frontend/src/components/layout/Header.tsx CHANGED Viewed

@@ -37,6 +37,7 @@ import {
 import { useWorkspaceStore, WORKSPACES, type WorkspaceId } from "@/store/workspace-store";
 import { api } from "@/lib/api";
 import { useTheme } from "next-themes";
 import { useSyncExternalStore } from "react";
@@ -223,7 +224,14 @@ export default function Header({
                 <p className="text-xs text-muted-foreground truncate">{user?.email}</p>
               </div>
               <DropdownMenuSeparator />
-              <DropdownMenuItem className="text-destructive cursor-pointer" onClick={handleLogout}>
                 <LogOut className="w-4 h-4 mr-2" />
                 Sign out
               </DropdownMenuItem>

 import { useWorkspaceStore, WORKSPACES, type WorkspaceId } from "@/store/workspace-store";
 import { api } from "@/lib/api";
 import { useTheme } from "next-themes";
+import HuggingFaceTokenModal from "@/components/auth/HuggingFaceTokenModal";
 import { useSyncExternalStore } from "react";
                 <p className="text-xs text-muted-foreground truncate">{user?.email}</p>
               </div>
               <DropdownMenuSeparator />
+              <div className="px-1 py-0.5">
+                <HuggingFaceTokenModal />
+              </div>
+              <DropdownMenuSeparator />
+              <DropdownMenuItem
+                className="text-destructive cursor-pointer"
+                onClick={handleLogout}
+              >
                 <LogOut className="w-4 h-4 mr-2" />
                 Sign out
               </DropdownMenuItem>

frontend/src/lib/api.ts CHANGED Viewed

@@ -39,7 +39,7 @@ class ApiClient {
     };
     const authToken = token || this.getToken();
-    if (authToken) {
       headers["Authorization"] = `Bearer ${authToken}`;
     }
@@ -48,7 +48,11 @@ class ApiClient {
   private async fetchWithConnectionError(input: RequestInfo | URL, init?: RequestInit): Promise<Response> {
     try {
-      return await fetch(input, init);
     } catch (error) {
       if (error instanceof TypeError) {
         throw new Error(CONNECTION_ERROR_MESSAGE);

     };
     const authToken = token || this.getToken();
+    if (authToken && authToken !== "cookie") {
       headers["Authorization"] = `Bearer ${authToken}`;
     }
   private async fetchWithConnectionError(input: RequestInfo | URL, init?: RequestInit): Promise<Response> {
     try {
+      const mergedInit = {
+        credentials: "include" as const,
+        ...init,
+      };
+      return await fetch(input, mergedInit);
     } catch (error) {
       if (error instanceof TypeError) {
         throw new Error(CONNECTION_ERROR_MESSAGE);

frontend/src/store/auth-store.ts CHANGED Viewed

@@ -90,7 +90,12 @@ export const useAuthStore = create<AuthStore>((set, get) => ({
     });
   },
-  logout() {
     clearStoredTokens();
     set({
       token: null,
@@ -105,16 +110,19 @@ export const useAuthStore = create<AuthStore>((set, get) => ({
     if (initialized) return;
     const storedToken = token ?? getStoredToken();
-    if (!storedToken) {
-      set({ token: null, user: null, loading: false, initialized: true });
-      return;
-    }
-    set({ token: storedToken, loading: true });
     try {
-      const user = await api.get<AuthUser>("/api/v1/auth/me", { token: storedToken });
-      set({ user, token: storedToken, loading: false, initialized: true });
     } catch {
       clearStoredTokens();
       set({ user: null, token: null, loading: false, initialized: true });

     });
   },
+  async logout() {
+    try {
+      await api.post("/api/v1/auth/logout");
+    } catch {
+      // Ignore network errors on logout
+    }
     clearStoredTokens();
     set({
       token: null,
     if (initialized) return;
     const storedToken = token ?? getStoredToken();
+    set({ loading: true });
     try {
+      const user = await api.get<AuthUser>(
+        "/api/v1/auth/me",
+        storedToken ? { token: storedToken } : undefined
+      );
+      set({
+        user,
+        token: storedToken || "cookie",
+        loading: false,
+        initialized: true,
+      });
     } catch {
       clearStoredTokens();
       set({ user: null, token: null, loading: false, initialized: true });

grafana_dashboard.json ADDED Viewed

	@@ -0,0 +1,1025 @@

+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "description": "System monitoring dashboard for PDF-Assistant-RAG covering API latency, LLM token usage, active users, request volume, error rate, and backend memory.",
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 1,
+  "id": null,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 1,
+      "panels": [],
+      "title": "Service Overview",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "decimals": 0,
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 500
+              },
+              {
+                "color": "red",
+                "value": 1500
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 6,
+        "x": 0,
+        "y": 1
+      },
+      "id": 2,
+      "options": {
+        "colorMode": "background",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "10.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{handler=~\"$handler\"}[$__rate_interval])) by (le)) * 1000",
+          "legendFormat": "p95 latency",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "API p95 Latency",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "decimals": 2,
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "blue",
+                "value": null
+              }
+            ]
+          },
+          "unit": "reqps"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 6,
+        "x": 6,
+        "y": 1
+      },
+      "id": 3,
+      "options": {
+        "colorMode": "background",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "10.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(http_requests_total{handler=~\"$handler\"}[$__rate_interval]))",
+          "legendFormat": "requests/sec",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "API Throughput",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "Requires an application counter named llm_tokens_total with labels such as direction=\"input|output\".",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "continuous-BlPu"
+          },
+          "decimals": 0,
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "purple",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 6,
+        "x": 12,
+        "y": 1
+      },
+      "id": 4,
+      "options": {
+        "colorMode": "background",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "10.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(llm_tokens_total[$__rate_interval])) * 60",
+          "legendFormat": "tokens/min",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "LLM Tokens / min",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "Requires an application gauge named active_users.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "continuous-GrYlRd"
+          },
+          "decimals": 0,
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 100
+              },
+              {
+                "color": "red",
+                "value": 500
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 6,
+        "x": 18,
+        "y": 1
+      },
+      "id": 5,
+      "options": {
+        "colorMode": "background",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "10.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(active_users)",
+          "legendFormat": "active users",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Active Users",
+      "type": "stat"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 5
+      },
+      "id": 6,
+      "panels": [],
+      "title": "API Health",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "custom": {
+            "drawStyle": "line",
+            "fillOpacity": 18,
+            "gradientMode": "opacity",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "showPoints": "never",
+            "spanNulls": false
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 6
+      },
+      "id": 7,
+      "options": {
+        "legend": {
+          "calcs": [
+            "lastNotNull",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.50, sum(rate(http_request_duration_seconds_bucket{handler=~\"$handler\"}[$__rate_interval])) by (le)) * 1000",
+          "legendFormat": "p50",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{handler=~\"$handler\"}[$__rate_interval])) by (le)) * 1000",
+          "legendFormat": "p95",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{handler=~\"$handler\"}[$__rate_interval])) by (le)) * 1000",
+          "legendFormat": "p99",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "API Latency Percentiles",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "gradientMode": "opacity",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 4,
+            "showPoints": "never",
+            "spanNulls": false
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "reqps"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 6
+      },
+      "id": 8,
+      "options": {
+        "legend": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(http_requests_total{handler=~\"$handler\"}[$__rate_interval])) by (handler)",
+          "legendFormat": "{{handler}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Request Rate by Route",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "custom": {
+            "drawStyle": "line",
+            "fillOpacity": 25,
+            "gradientMode": "opacity",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 4,
+            "showPoints": "never",
+            "spanNulls": false
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 1
+              },
+              {
+                "color": "red",
+                "value": 5
+              }
+            ]
+          },
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 12,
+        "x": 0,
+        "y": 14
+      },
+      "id": 9,
+      "options": {
+        "legend": {
+          "calcs": [
+            "lastNotNull",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "100 * sum(rate(http_requests_total{handler=~\"$handler\", status=~\"5..\"}[$__rate_interval])) / clamp_min(sum(rate(http_requests_total{handler=~\"$handler\"}[$__rate_interval])), 0.001)",
+          "legendFormat": "5xx error rate",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "API Error Rate",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "custom": {
+            "align": "auto",
+            "cellOptions": {
+              "type": "color-background"
+            },
+            "inspect": false
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 500
+              },
+              {
+                "color": "red",
+                "value": 1500
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 12,
+        "x": 12,
+        "y": 14
+      },
+      "id": 10,
+      "options": {
+        "cellHeight": "sm",
+        "footer": {
+          "countRows": false,
+          "fields": "",
+          "reducer": [
+            "sum"
+          ],
+          "show": false
+        },
+        "showHeader": true
+      },
+      "pluginVersion": "10.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "topk(10, sum(rate(http_request_duration_seconds_sum{handler=~\"$handler\"}[$__rate_interval])) by (handler) / clamp_min(sum(rate(http_request_duration_seconds_count{handler=~\"$handler\"}[$__rate_interval])) by (handler), 0.001) * 1000)",
+          "format": "table",
+          "instant": true,
+          "legendFormat": "{{handler}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Slowest Routes - Average Latency",
+      "type": "table"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 21
+      },
+      "id": 11,
+      "panels": [],
+      "title": "LLM and User Activity",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "Requires llm_tokens_total. Suggested labels: direction, model, route.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "drawStyle": "line",
+            "fillOpacity": 25,
+            "gradientMode": "opacity",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 4,
+            "showPoints": "never",
+            "spanNulls": false
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 22
+      },
+      "id": 12,
+      "options": {
+        "legend": {
+          "calcs": [
+            "lastNotNull",
+            "sum"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(llm_tokens_total[$__rate_interval])) by (direction) * 60",
+          "legendFormat": "{{direction}} tokens/min",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "LLM Token Usage by Direction",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "Requires active_users gauge. Optional labels such as auth_provider or plan are supported.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "continuous-GrYlRd"
+          },
+          "custom": {
+            "drawStyle": "line",
+            "fillOpacity": 30,
+            "gradientMode": "opacity",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 4,
+            "showPoints": "never",
+            "spanNulls": false
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 22
+      },
+      "id": 13,
+      "options": {
+        "legend": {
+          "calcs": [
+            "lastNotNull",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(active_users)",
+          "legendFormat": "active users",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Active Users Over Time",
+      "type": "timeseries"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 30
+      },
+      "id": 14,
+      "panels": [],
+      "title": "Runtime",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "gradientMode": "opacity",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 4,
+            "showPoints": "never",
+            "spanNulls": false
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "decbytes"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 12,
+        "x": 0,
+        "y": 31
+      },
+      "id": 15,
+      "options": {
+        "legend": {
+          "calcs": [
+            "lastNotNull",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "app_process_resident_memory_bytes",
+          "legendFormat": "backend RSS",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Backend Memory",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "red",
+                "value": null
+              },
+              {
+                "color": "green",
+                "value": 1
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 12,
+        "x": 12,
+        "y": 31
+      },
+      "id": 16,
+      "options": {
+        "displayMode": "lcd",
+        "maxVizHeight": 300,
+        "minVizHeight": 16,
+        "minVizWidth": 8,
+        "namePlacement": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showUnfilled": true,
+        "sizing": "auto",
+        "valueMode": "color"
+      },
+      "pluginVersion": "10.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "up",
+          "legendFormat": "{{job}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Prometheus Target Health",
+      "type": "bargauge"
+    }
+  ],
+  "refresh": "30s",
+  "schemaVersion": 39,
+  "style": "dark",
+  "tags": [
+    "pdf-assistant-rag",
+    "fastapi",
+    "prometheus",
+    "llm",
+    "rag"
+  ],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "selected": false,
+          "text": "Prometheus",
+          "value": "prometheus"
+        },
+        "hide": 0,
+        "includeAll": false,
+        "label": "Prometheus",
+        "multi": false,
+        "name": "DS_PROMETHEUS",
+        "options": [],
+        "query": "prometheus",
+        "refresh": 1,
+        "regex": "",
+        "type": "datasource"
+      },
+      {
+        "allValue": ".*",
+        "current": {
+          "selected": true,
+          "text": "All",
+          "value": "$__all"
+        },
+        "datasource": {
+          "type": "prometheus",
+          "uid": "${DS_PROMETHEUS}"
+        },
+        "definition": "label_values(http_requests_total, handler)",
+        "hide": 0,
+        "includeAll": true,
+        "label": "Route",
+        "multi": true,
+        "name": "handler",
+        "options": [],
+        "query": {
+          "query": "label_values(http_requests_total, handler)",
+          "refId": "PrometheusVariableQueryEditor-VariableQuery"
+        },
+        "refresh": 2,
+        "regex": "",
+        "sort": 1,
+        "type": "query"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-6h",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m"
+    ]
+  },
+  "timezone": "browser",
+  "title": "PDF-Assistant-RAG System Monitoring",
+  "uid": "pdf-assistant-rag-system-monitoring",
+  "version": 1,
+  "weekStart": ""
+}

package-lock.json DELETED Viewed

@@ -1,6 +0,0 @@
-{
-  "name": "PDF-Assistant-RAG",
-  "lockfileVersion": 3,
-  "requires": true,
-  "packages": {}
-}