Spaces:

Param20h
/

PDF-Assit_RAG

Running

App Files Files Community

Yuvraj Sarathe commited on 5 days ago

Commit

c1d1230

unverified ·

2 Parent(s): 465cfce ef02d76

Merge branch 'dev' into Hugging-Face-Token-Input

Browse files

Files changed (30) hide show

.github/ISSUE_TEMPLATE/bug_report.yml +1 -1
.github/ISSUE_TEMPLATE/feature_request.yml +1 -1
.github/workflows/sync-issue-labels.yml +164 -0
README.md +20 -2
backend/app/auth.py +35 -7
backend/app/config.py +4 -1
backend/app/database.py +1 -0
backend/app/models.py +132 -14
backend/app/rag/agent.py +13 -21
backend/app/rag/retriever.py +149 -8
backend/app/routes/admin.py +17 -4
backend/app/routes/auth.py +2 -1
backend/app/routes/chat.py +6 -4
backend/app/routes/documents.py +135 -3
backend/app/schemas.py +7 -0
backend/migrate_add_role.py +39 -0
backend/requirements.txt +3 -1
backend/scripts/migrate_sqlite_to_postgres.py +524 -0
backend/tests/test_auth.py +7 -0
backend/tests/test_chat.py +32 -1
backend/tests/test_retriever.py +77 -0
frontend/package-lock.json +26 -1
frontend/package.json +1 -0
frontend/src/app/dashboard/page.tsx +66 -70
frontend/src/app/globals.css +1 -0
frontend/src/app/page.tsx +12 -2
frontend/src/app/privacy/page.tsx +450 -0
frontend/src/app/terms/page.tsx +435 -0
frontend/src/components/document/PDFViewer.tsx +78 -25
frontend/src/components/layout/Header.tsx +150 -70

.github/ISSUE_TEMPLATE/bug_report.yml CHANGED Viewed

@@ -59,7 +59,7 @@ body:
   - type: checkboxes
     id: gssoc
     attributes:
-      label: "GSSoC '24"
       description: "Are you a GSSoC contributor?"
       options:
         - label: "Yes, I am participating in GirlScript Summer of Code and would like to fix this."

   - type: checkboxes
     id: gssoc
     attributes:
+      label: "GSSoC '26"
       description: "Are you a GSSoC contributor?"
       options:
         - label: "Yes, I am participating in GirlScript Summer of Code and would like to fix this."

.github/ISSUE_TEMPLATE/feature_request.yml CHANGED Viewed

@@ -42,7 +42,7 @@ body:
   - type: checkboxes
     id: gssoc
     attributes:
-      label: "GSSoC '24"
       description: "Are you a GSSoC contributor?"
       options:
         - label: "Yes, I am participating in GirlScript Summer of Code and would like to build this."

   - type: checkboxes
     id: gssoc
     attributes:
+      label: "GSSoC '26"
       description: "Are you a GSSoC contributor?"
       options:
         - label: "Yes, I am participating in GirlScript Summer of Code and would like to build this."

.github/workflows/sync-issue-labels.yml ADDED Viewed

	@@ -0,0 +1,164 @@

+name: Sync Labels — Issue to PR
+# ──────────────────────────────────────────────────────────
+# Auto-syncs labels from referenced issue(s) to the PR when
+# a PR is opened or updated targeting `dev`.
+#
+# Why pull_request_target:
+#   Label operations need write permissions on the target
+#   repo. pull_request_target runs in the context of the
+#   base repo with access to secrets and write token.
+#   Since we only read issue data and apply labels, there
+#   is no security concern.
+# ──────────────────────────────────────────────────────────
+on:
+  pull_request_target:
+    types: [closed]
+    branches: ["dev"]
+permissions:
+  contents: read
+  issues: read
+  pull-requests: write
+jobs:
+  sync-labels:
+    name: Sync labels from referenced issue
+    runs-on: ubuntu-latest
+    if: github.event.pull_request.merged == true
+    steps:
+      - name: Extract issue numbers from PR body
+        id: extract
+        env:
+          PR_BODY: ${{ github.event.pull_request.body }}
+        run: |
+          # Match patterns:
+          #   "Closes #123"
+          #   "Fixes #456, #789"  (comma-separated)
+          #   "Resolves #111, #222, #333"
+          #
+          # Approach: grab lines containing a keyword, then
+          # extract every NNN from those lines.
+          # We place '|| true' at the very end of the pipeline so it doesn't short-circuit.
+          ISSUES=$(
+            echo "${PR_BODY:-}" \
+              | grep -ioE '.*(closes|fixes|resolves).*' \
+              | grep -oE '#[0-9]+' \
+              | grep -oE '[0-9]+' \
+              | sort -un \
+              | xargs \
+            || true
+          )
+          echo "Found issues: [$ISSUES]"
+          echo "issues=$ISSUES" >> "$GITHUB_OUTPUT"
+      - name: Fetch and apply labels
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          ISSUES: ${{ steps.extract.outputs.issues }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REPO: ${{ github.repository }}
+        run: |
+          set -euo pipefail
+          ALL_LABELS="gssoc"$'\n'"gssoc:approved"$'\n'"mentor:param20h"$'\n'
+          for ISSUE in $ISSUES; do
+            echo "--- Fetching labels for #$ISSUE ---"
+            LABELS=$(gh issue view "$ISSUE" --repo "$REPO" --json labels --jq '.labels[].name' 2>/dev/null || true)
+            if [ -z "$LABELS" ]; then
+              echo "  → No labels on #$ISSUE, skipping"
+              continue
+            fi
+            echo "  → Labels: $(echo "$LABELS" | tr '\n' ' ')"
+            # Accumulate labels (newline-separated, deduplicated later)
+            ALL_LABELS="${ALL_LABELS}${LABELS}"$'\n'
+          done
+          if [ -z "$ALL_LABELS" ]; then
+            echo "No labels to apply. Exiting."
+            exit 0
+          fi
+          # Deduplicate and remove empty lines
+          UNIQUE_LABELS=$(echo "$ALL_LABELS" | sort -u | grep -v '^$')
+          echo ""
+          echo "=== Applying labels to PR #$PR_NUMBER ==="
+          echo "$UNIQUE_LABELS"
+          # Get labels already on the PR
+          EXISTING=$(gh pr view "$PR_NUMBER" --repo "$REPO" --json labels --jq '.labels[].name' 2>/dev/null || true)
+          MISSING=0
+          while IFS= read -r LABEL; do
+            [ -z "$LABEL" ] && continue
+            if echo "$EXISTING" | grep -qxF "$LABEL"; then
+              echo "  ✓ Already present: $LABEL"
+            else
+              echo "  + Adding: $LABEL"
+              gh label create "$LABEL" --repo "$REPO" 2>/dev/null || true  # create if not exists
+              gh pr edit "$PR_NUMBER" --repo "$REPO" --add-label "$LABEL"
+              MISSING=$((MISSING + 1))
+            fi
+          done <<< "$UNIQUE_LABELS"
+          if [ "$MISSING" -eq 0 ]; then
+            echo "All labels already synced — nothing to add."
+          else
+            echo "Done. Added $MISSING label(s) to PR #$PR_NUMBER."
+          fi
+      - name: Calculate GSSoC Points and Comment
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REPO: ${{ github.repository }}
+        run: |
+          set -euo pipefail
+          echo "Calculating GSSoC points for PR #$PR_NUMBER..."
+          # Fetch all labels currently on the PR (including the ones we just synced)
+          PR_LABELS=$(gh pr view "$PR_NUMBER" --repo "$REPO" --json labels --jq '.labels[].name' 2>/dev/null || true)
+          POINTS=0
+          while IFS= read -r LABEL; do
+            [ -z "$LABEL" ] && continue
+            case "$LABEL" in
+              "level:beginner") POINTS=$((POINTS + 20)) ;;
+              "level:intermediate") POINTS=$((POINTS + 35)) ;;
+              "level:advanced") POINTS=$((POINTS + 55)) ;;
+              "level:critical") POINTS=$((POINTS + 80)) ;;
+              "type:accessibility") POINTS=$((POINTS + 15)) ;;
+              "type:bug") POINTS=$((POINTS + 10)) ;;
+              "type:design") POINTS=$((POINTS + 10)) ;;
+              "type:devops") POINTS=$((POINTS + 15)) ;;
+              "type:docs") POINTS=$((POINTS + 5)) ;;
+              "type:feature") POINTS=$((POINTS + 10)) ;;
+              "type:performance") POINTS=$((POINTS + 15)) ;;
+              "type:refactor") POINTS=$((POINTS + 10)) ;;
+              "type:security") POINTS=$((POINTS + 20)) ;;
+              "type:testing") POINTS=$((POINTS + 10)) ;;
+            esac
+          done <<< "$PR_LABELS"
+          echo "Total Points Calculated: $POINTS"
+          if [ "$POINTS" -gt 0 ]; then
+            printf -v COMMENT "🎉 **Congratulations on getting your Pull Request merged!** 🎉\n\nThank you for contributing to PDF-Assistant-RAG as part of GSSoC '26! 🚀\n\nKeep up the great work! ✨"
+            # Post the comment to the PR
+            echo "Posting comment..."
+            gh pr comment "$PR_NUMBER" --repo "$REPO" --body "$COMMENT"
+          else
+            echo "No scorable labels found. No comment posted."
+          fi

README.md CHANGED Viewed

@@ -362,7 +362,25 @@ CHROMA_PERSIST_DIR=./data/chroma_db
 > Get your free HuggingFace token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
-### 3. Run Locally
 Open **two terminals**:
@@ -384,7 +402,7 @@ npm run dev
 # → App running at http://localhost:3000
 ```
-### 4. Run with Docker
 ```bash
 docker compose up --build

 > Get your free HuggingFace token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
+### 3. Set up crawl4ai (URL Upload Feature)
+The URL upload feature (`POST /api/v1/documents/urlupload`) uses **crawl4ai** with a Playwright browser to crawl web pages. `crawl4ai-setup` handles the Playwright browser installation automatically — run it once after `pip install`:
+```bash
+crawl4ai-setup
+```
+> **Linux / Docker users:** If Chromium fails to launch due to missing system libraries, also run:
+> ```bash
+> playwright install-deps chromium
+> ```
+> This installs OS-level dependencies (libnss, libatk, etc.) on fresh Ubuntu/Debian servers.
+> **Windows users:** No extra steps — the `NotImplementedError` (SelectorEventLoop + subprocess) is already handled in the backend automatically.
+---
+### 4. Run Locally
 Open **two terminals**:
 # → App running at http://localhost:3000
 ```
+### 5. Run with Docker
 ```bash
 docker compose up --build

backend/app/auth.py CHANGED Viewed

@@ -12,7 +12,7 @@ from sqlalchemy.orm import Session
 from app.config import get_settings
 from app.database import get_db
-from app.models import User
 settings = get_settings()
 security = HTTPBearer()
@@ -30,10 +30,10 @@ def verify_password(plain: str, hashed: str) -> bool:
 # ── JWT Token ────────────────────────────────────────
-def create_access_token(user_id: str) -> str:
     """Create a JWT access token with user_id as the subject."""
     payload = {
-        "sub": user_id,
         "type": "access",
         "exp": datetime.now(timezone.utc) + timedelta(minutes=settings.JWT_ACCESS_EXPIRY_MINUTES),
         "iat": datetime.now(timezone.utc),
@@ -41,10 +41,10 @@ def create_access_token(user_id: str) -> str:
     return jwt.encode(payload, settings.SECRET_KEY, algorithm=settings.JWT_ALGORITHM)
-def create_refresh_token(user_id: str) -> str:
     """Create a JWT refresh token with user_id as the subject."""
     payload = {
-        "sub": user_id,
         "type": "refresh",
         "exp": datetime.now(timezone.utc) + timedelta(days=settings.JWT_REFRESH_EXPIRY_DAYS),
         "iat": datetime.now(timezone.utc),
@@ -120,11 +120,39 @@ def get_current_user(
     return user
 def get_admin_user(user: User = Depends(get_current_user)) -> User:
-    """Dependency: require admin privileges."""
-    if not user.is_admin:
         raise HTTPException(
             status_code=status.HTTP_403_FORBIDDEN,
             detail="Admin access required",
         )
     return user

 from app.config import get_settings
 from app.database import get_db
+from app.models import User, UserRole
 settings = get_settings()
 security = HTTPBearer()
 # ── JWT Token ────────────────────────────────────────
+def create_access_token(user_id) -> str:
     """Create a JWT access token with user_id as the subject."""
     payload = {
+        "sub": str(user_id),
         "type": "access",
         "exp": datetime.now(timezone.utc) + timedelta(minutes=settings.JWT_ACCESS_EXPIRY_MINUTES),
         "iat": datetime.now(timezone.utc),
     return jwt.encode(payload, settings.SECRET_KEY, algorithm=settings.JWT_ALGORITHM)
+def create_refresh_token(user_id) -> str:
     """Create a JWT refresh token with user_id as the subject."""
     payload = {
+        "sub": str(user_id),
         "type": "refresh",
         "exp": datetime.now(timezone.utc) + timedelta(days=settings.JWT_REFRESH_EXPIRY_DAYS),
         "iat": datetime.now(timezone.utc),
     return user
+def _is_admin_user(user: User) -> bool:
+    """
+    Check if a user has administrative privileges.
+    Supports both the modern 'role' field and the legacy 'is_admin' boolean.
+    """
+    if not user:
+        return False
+    # We check the role first (it can be an Enum or a plain string depending on the environment)
+    role_check = (user.role == UserRole.admin) or (str(user.role) == "admin")
+    # Fallback to the legacy is_admin flag
+    return role_check or bool(user.is_admin)
 def get_admin_user(user: User = Depends(get_current_user)) -> User:
+    """
+    FastAPI dependency that restricts access to administrators only.
+    Raises 403 Forbidden if the user lacks sufficient permissions.
+    """
+    if not _is_admin_user(user):
         raise HTTPException(
             status_code=status.HTTP_403_FORBIDDEN,
             detail="Admin access required",
         )
     return user
+async def get_current_admin(
+    current_user: User = Depends(get_current_user)
+) -> User:
+    """
+    Alias for get_admin_user to maintain compatibility with existing routes.
+    Ensures the requesting user has administrative rights.
+    """
+    return get_admin_user(current_user)

backend/app/config.py CHANGED Viewed

@@ -33,7 +33,10 @@ class Settings(BaseSettings):
         ".docx": [
             "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
             "application/zip",
-        ]
     }
     # ── RAG Pipeline ─────────────────────────────────────

         ".docx": [
             "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
             "application/zip",
+        ],
+        ".txt": ["text/plain"],
+        ".md": ["text/markdown"],
     }
     # ── RAG Pipeline ─────────────────────────────────────

backend/app/database.py CHANGED Viewed

@@ -48,6 +48,7 @@ def _migrate_schema():
     migrations = [
         ("users", "hf_token", "ALTER TABLE users ADD COLUMN hf_token VARCHAR(255)"),
     ]
     for table, column, ddl in migrations:

     migrations = [
         ("users", "hf_token", "ALTER TABLE users ADD COLUMN hf_token VARCHAR(255)"),
+        ("users", "role", "ALTER TABLE users ADD COLUMN role VARCHAR(20) DEFAULT 'user'"),
     ]
     for table, column, ddl in migrations:

backend/app/models.py CHANGED Viewed

@@ -2,27 +2,128 @@
 SQLAlchemy ORM models for users, documents, and chat messages.
 """
 import uuid
 from datetime import datetime, timezone
 from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, Text, Boolean
 from sqlalchemy.orm import relationship
 from app.database import Base
 def generate_uuid():
     return str(uuid.uuid4())
 class User(Base):
     __tablename__ = "users"
-    id = Column(String, primary_key=True, default=generate_uuid)
     username = Column(String(80), unique=True, nullable=False, index=True)
     email = Column(String(120), unique=True, nullable=False, index=True)
     hashed_password = Column(String(255), nullable=False)
     is_admin = Column(Boolean, default=False)
     created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
     last_login = Column(DateTime, nullable=True, index=True)
-    hf_token = Column(String(255), nullable=True)
     # Relationships
     documents = relationship("Document", back_populates="owner", cascade="all, delete-orphan")
@@ -31,10 +132,13 @@ class User(Base):
 class ApiKey(Base):
     __tablename__ = "api_keys"
-    id = Column(String, primary_key=True, default=generate_uuid)
-    user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
     key_prefix = Column(String(10), nullable=False)
     hashed_key = Column(String(255), nullable=False, unique=True, index=True)
     created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
@@ -45,19 +149,27 @@ class ApiKey(Base):
 class Document(Base):
     __tablename__ = "documents"
-    id = Column(String, primary_key=True, default=generate_uuid)
-    user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
     filename = Column(String(255), nullable=False)        # Stored filename (UUID-based)
     original_name = Column(String(255), nullable=False)    # User's original filename
     file_size = Column(Integer, default=0)                 # Size in bytes
     page_count = Column(Integer, default=0)
     chunk_count = Column(Integer, default=0)
-    status = Column(String(20), default="pending")         # pending | processing | ready | failed
     error_message = Column(Text, nullable=True)
     uploaded_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
-    summary = Column(Text, nullable=True)  # Optional summary of the document's content
     # Relationships
     owner = relationship("User", back_populates="documents")
@@ -65,14 +177,17 @@ class Document(Base):
 class ChatMessage(Base):
     __tablename__ = "chat_messages"
-    id = Column(String, primary_key=True, default=generate_uuid)
-    user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
-    document_id = Column(String, ForeignKey("documents.id"), nullable=True, index=True)
     role = Column(String(20), nullable=False)  # "user" | "assistant"
     content = Column(Text, nullable=False)
-    sources_json = Column(Text, nullable=True)  # JSON string of source citations
     created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
     # Relationships
@@ -82,10 +197,13 @@ class ChatMessage(Base):
 class SharedMessage(Base):
     __tablename__ = "shared_messages"
-    id = Column(String, primary_key=True, default=generate_uuid)
-    message_id = Column(String, ForeignKey("chat_messages.id"), nullable=False, unique=True, index=True)
     created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
     # Relationships

 SQLAlchemy ORM models for users, documents, and chat messages.
 """
 import uuid
+import enum
+import base64
+import hashlib
 from datetime import datetime, timezone
+from cryptography.fernet import Fernet
 from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, Text, Boolean
+from sqlalchemy.types import TypeDecorator, CHAR
+from sqlalchemy.dialects.postgresql import UUID as PG_UUID
+from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, Text, Boolean, Enum as SQLAlchemyEnum
+from sqlalchemy.types import TypeDecorator
 from sqlalchemy.orm import relationship
 from app.database import Base
+class GUID(TypeDecorator):
+    """Platform-independent GUID type.
+    Uses PostgreSQL's UUID type, otherwise uses CHAR(36).
+    """
+    impl = CHAR
+    cache_ok = True
+    def load_dialect_impl(self, dialect):
+        if dialect.name == 'postgresql':
+            return dialect.type_descriptor(PG_UUID(as_uuid=True))
+        else:
+            return dialect.type_descriptor(CHAR(36))
+    def process_bind_param(self, value, dialect):
+        if value is None:
+            return value
+        if isinstance(value, uuid.UUID):
+            return value if dialect.name == 'postgresql' else str(value)
+        try:
+            val_uuid = uuid.UUID(value)
+            return val_uuid if dialect.name == 'postgresql' else str(val_uuid)
+        except ValueError:
+            if dialect.name == 'postgresql':
+                return uuid.UUID(int=0)
+            return value
+    def process_result_value(self, value, dialect):
+        if value is None:
+            return value
+        return str(value)
+class EncryptedString(TypeDecorator):
+    """
+    A custom SQLAlchemy type that transparently encrypts strings in the database
+    using Fernet (AES). This ensures sensitive tokens aren't stored in plain text
+    while remaining easily accessible in code.
+    """
+    impl = Text
+    cache_ok = False
+    def _get_cipher(self):
+        from app.config import get_settings
+        settings = get_settings()
+        # Derive a 32-byte key from the SECRET_KEY for Fernet encryption
+        key = base64.urlsafe_b64encode(hashlib.sha256(settings.SECRET_KEY.encode()).digest())
+        return Fernet(key)
+    def process_bind_param(self, value, dialect):
+        """Encrypt the value before saving to the database."""
+        if value is None:
+            return value
+        cipher = self._get_cipher()
+        return cipher.encrypt(value.encode()).decode()
+    def process_result_value(self, value, dialect):
+        """Decrypt the value after reading from the database."""
+        if value is None:
+            return value
+        cipher = self._get_cipher()
+        try:
+            return cipher.decrypt(value.encode()).decode()
+        except Exception:
+            # Fallback for unencrypted data or if decryption fails
+            return value
 def generate_uuid():
+    """Generates a standard unique string identifier for database records."""
     return str(uuid.uuid4())
+class UserRole(str, enum.Enum):
+    """
+    Defines the available user roles for Role-Based Access Control (RBAC).
+    - 'admin': Full access to system statistics and user management.
+    - 'user': Standard access for uploading and chatting with documents.
+    """
+    user = "user"
+    admin = "admin"
 class User(Base):
+    """
+    Represents a registered user within the system.
+    Supports both legacy 'is_admin' flags and the modern 'role' enum for permissions.
+    """
     __tablename__ = "users"
+    id = Column(GUID, primary_key=True, default=uuid.uuid4)
     username = Column(String(80), unique=True, nullable=False, index=True)
     email = Column(String(120), unique=True, nullable=False, index=True)
     hashed_password = Column(String(255), nullable=False)
+    # Permission fields: transitioning towards 'role', keeping 'is_admin' for compatibility
+    role = Column(
+        SQLAlchemyEnum(UserRole),
+        default=UserRole.user,
+        nullable=False,
+        server_default="user"
+    )
     is_admin = Column(Boolean, default=False)
     created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
     last_login = Column(DateTime, nullable=True, index=True)
+    hf_token = Column(EncryptedString, nullable=True)
     # Relationships
     documents = relationship("Document", back_populates="owner", cascade="all, delete-orphan")
 class ApiKey(Base):
+    """
+    Stores secure hashes of API keys used for programmatic interaction with the system.
+    """
     __tablename__ = "api_keys"
+    id = Column(GUID, primary_key=True, default=uuid.uuid4)
+    user_id = Column(GUID, ForeignKey("users.id"), nullable=False, index=True)
     key_prefix = Column(String(10), nullable=False)
     hashed_key = Column(String(255), nullable=False, unique=True, index=True)
     created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
 class Document(Base):
+    """
+    Metadata and processing status for files uploaded by users.
+    """
     __tablename__ = "documents"
+    id = Column(GUID, primary_key=True, default=uuid.uuid4)
+    user_id = Column(GUID, ForeignKey("users.id"), nullable=False, index=True)
     filename = Column(String(255), nullable=False)        # Stored filename (UUID-based)
     original_name = Column(String(255), nullable=False)    # User's original filename
     file_size = Column(Integer, default=0)                 # Size in bytes
+    id = Column(String, primary_key=True, default=generate_uuid)
+    user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
+    filename = Column(String(255), nullable=False)         # Internal UUID-based filename
+    original_name = Column(String(255), nullable=False)     # Original name for user display
+    file_size = Column(Integer, default=0)                  # Size in bytes
     page_count = Column(Integer, default=0)
     chunk_count = Column(Integer, default=0)
+    status = Column(String(20), default="pending")          # pending | processing | ready | failed
     error_message = Column(Text, nullable=True)
     uploaded_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
+    summary = Column(Text, nullable=True)
     # Relationships
     owner = relationship("User", back_populates="documents")
 class ChatMessage(Base):
+    """
+    Persistent log of conversations between users and the AI analyst.
+    """
     __tablename__ = "chat_messages"
+    id = Column(GUID, primary_key=True, default=uuid.uuid4)
+    user_id = Column(GUID, ForeignKey("users.id"), nullable=False, index=True)
+    document_id = Column(GUID, ForeignKey("documents.id"), nullable=True, index=True)
     role = Column(String(20), nullable=False)  # "user" | "assistant"
     content = Column(Text, nullable=False)
+    sources_json = Column(Text, nullable=True)  # JSON representation of retrieved sources
     created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
     # Relationships
 class SharedMessage(Base):
+    """
+    Links specific chat messages to public sharing URLs.
+    """
     __tablename__ = "shared_messages"
+    id = Column(GUID, primary_key=True, default=uuid.uuid4)
+    message_id = Column(GUID, ForeignKey("chat_messages.id"), nullable=False, unique=True, index=True)
     created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
     # Relationships

backend/app/rag/agent.py CHANGED Viewed

@@ -15,21 +15,11 @@ from app.rag.tracing import trace_function
 logger = logging.getLogger(__name__)
 settings = get_settings()
-# ── Singleton LLM client ─────────────────────────────
-_llm_client = None
-def get_llm_client() -> InferenceClient:
-    """Get or create HuggingFace InferenceClient (singleton)."""
-    global _llm_client
-    if _llm_client is None:
-        _llm_client = InferenceClient(
-            token=settings.HF_TOKEN,
-        )
-        logger.info(f"LLM client initialized for model: {settings.LLM_MODEL}")
-    return _llm_client
 def is_greeting(question: str) -> bool:
@@ -68,7 +58,7 @@ def _chat_messages(system: str, user_content: str) -> list:
 @trace_function(
     "generate_answer",
-    metadata_factory=lambda question, user_id, document_id=None: {
         "user_id": user_id,
         "document_id": document_id,
         "llm_model": settings.LLM_MODEL,
@@ -78,13 +68,14 @@ def generate_answer(
     question: str,
     user_id: str,
     document_id: Optional[str] = None,
 ) -> Dict[str, Any]:
     """
     Full RAG pipeline: retrieve → build context → generate answer.
     Returns dict with 'answer' and 'sources'.
     """
-    # Get HuggingFace InferenceClient singleton (created once, reused)
-    client = get_llm_client()
     # ── Handle greetings ─────────────────────────────
     # Short-circuit: if user just says "hello", skip RAG entirely
@@ -156,7 +147,7 @@ def generate_answer(
 @trace_function(
     "generate_answer_stream",
-    metadata_factory=lambda question, user_id, document_id=None: {
         "user_id": user_id,
         "document_id": document_id,
         "llm_model": settings.LLM_MODEL,
@@ -166,13 +157,14 @@ def generate_answer_stream(
     question: str,
     user_id: str,
     document_id: Optional[str] = None,
 ) -> Generator[str, None, None]:
     """
     Streaming RAG pipeline — yields SSE-formatted chunks.
     First yields sources, then streams answer tokens.
     """
-    # Get HuggingFace InferenceClient singleton (created once, reused)
-    client = get_llm_client()
     # ── Handle greetings ─────────────────────────────
     # Short-circuit: if user just says "hello", skip RAG entirely

 logger = logging.getLogger(__name__)
 settings = get_settings()
+def get_llm_client(hf_token: Optional[str] = None) -> InferenceClient:
+    """Create a HuggingFace InferenceClient per-request."""
+    return InferenceClient(
+        token=hf_token or settings.HF_TOKEN,
+    )
 def is_greeting(question: str) -> bool:
 @trace_function(
     "generate_answer",
+    metadata_factory=lambda question, user_id, document_id=None, **kwargs: {
         "user_id": user_id,
         "document_id": document_id,
         "llm_model": settings.LLM_MODEL,
     question: str,
     user_id: str,
     document_id: Optional[str] = None,
+    hf_token: Optional[str] = None,
 ) -> Dict[str, Any]:
     """
     Full RAG pipeline: retrieve → build context → generate answer.
     Returns dict with 'answer' and 'sources'.
     """
+    # Get HuggingFace InferenceClient per-request
+    client = get_llm_client(hf_token)
     # ── Handle greetings ─────────────────────────────
     # Short-circuit: if user just says "hello", skip RAG entirely
 @trace_function(
     "generate_answer_stream",
+    metadata_factory=lambda question, user_id, document_id=None, **kwargs: {
         "user_id": user_id,
         "document_id": document_id,
         "llm_model": settings.LLM_MODEL,
     question: str,
     user_id: str,
     document_id: Optional[str] = None,
+    hf_token: Optional[str] = None,
 ) -> Generator[str, None, None]:
     """
     Streaming RAG pipeline — yields SSE-formatted chunks.
     First yields sources, then streams answer tokens.
     """
+    # Get HuggingFace InferenceClient per-request
+    client = get_llm_client(hf_token)
     # ── Handle greetings ─────────────────────────────
     # Short-circuit: if user just says "hello", skip RAG entirely

backend/app/rag/retriever.py CHANGED Viewed

@@ -1,7 +1,9 @@
 """
 Two-stage retrieval: ChromaDB similarity search + cross-encoder reranking.
 """
 import logging
 from typing import List, Dict, Any, Optional
 from app.config import get_settings
 from app.rag.embeddings import embed_query
@@ -10,6 +12,7 @@ from app.rag.vectorstore import query_chunks
 logger = logging.getLogger(__name__)
 settings = get_settings()
 # ── Singleton reranker ───────────────────────────────
 _reranker = None
@@ -32,6 +35,136 @@ def get_reranker():
     return _reranker if _reranker != "disabled" else None
 @trace_function(
     "retrieve",
     metadata_factory=lambda query, user_id, document_id=None: {
@@ -55,18 +188,24 @@ def retrieve(
     Returns chunks with confidence scores.
     """
-    # ── Stage 1: Embedding search ────────────────────
-    query_vector = embed_query(query)
-    candidates = query_chunks(
-        query_embedding=query_vector,
-        user_id=user_id,
-        document_id=document_id,
-        top_k=settings.TOP_K_RETRIEVAL,
-    )
     if not candidates:
         return []
     # ── Stage 2: Cross-encoder reranking ─────────────
     reranker = get_reranker()
@@ -86,6 +225,8 @@ def retrieve(
         except Exception as e:
             logger.warning(f"Reranking failed, using embedding scores: {e}")
     # ── Take top-K after reranking ───────────────────
     top_chunks = candidates[:settings.TOP_K_RERANK]

 """
 Two-stage retrieval: ChromaDB similarity search + cross-encoder reranking.
 """
+import json
 import logging
+import re
 from typing import List, Dict, Any, Optional
 from app.config import get_settings
 from app.rag.embeddings import embed_query
 logger = logging.getLogger(__name__)
 settings = get_settings()
+MAX_QUERY_VARIANTS = 4
 # ── Singleton reranker ───────────────────────────────
 _reranker = None
     return _reranker if _reranker != "disabled" else None
+def transform_query(query: str) -> List[str]:
+    """Rewrite a user question into multiple retrieval-friendly search queries."""
+    original_query = query.strip()
+    if not original_query:
+        return []
+    try:
+        generated_queries = _generate_query_variants(original_query)
+    except Exception as e:
+        logger.warning(f"Query transformation failed, using original query only: {e}")
+        generated_queries = []
+    return _dedupe_queries([original_query, *generated_queries])[:MAX_QUERY_VARIANTS]
+def _generate_query_variants(query: str) -> List[str]:
+    """Use the configured LLM to split/rewrite a user query for semantic search."""
+    if not settings.HF_TOKEN:
+        return []
+    from huggingface_hub import InferenceClient
+    client = InferenceClient(token=settings.HF_TOKEN)
+    prompt = (
+        "Rewrite the user question into concise semantic search queries for document retrieval. "
+        "Split independent topics into separate queries. Return a JSON array of strings only. "
+        f"User question: {query}"
+    )
+    response = client.chat_completion(
+        messages=[
+            {
+                "role": "system",
+                "content": "You create optimized search queries for a RAG retriever.",
+            },
+            {"role": "user", "content": prompt},
+        ],
+        model=settings.LLM_MODEL,
+        max_tokens=256,
+        temperature=0.2,
+    )
+    if not response.choices:
+        return []
+    content = response.choices[0].message.content or ""
+    return _parse_query_variants(content)
+def _parse_query_variants(content: str) -> List[str]:
+    """Parse LLM output into a list even when it adds light prose around JSON."""
+    content = content.strip()
+    if not content:
+        return []
+    parsed = _try_parse_query_json(content)
+    if parsed is not None:
+        return parsed
+    match = re.search(r"\[[\s\S]*\]", content)
+    if match:
+        parsed = _try_parse_query_json(match.group(0))
+        if parsed is not None:
+            return parsed
+    queries = []
+    for line in content.splitlines():
+        cleaned = re.sub(r"^\s*[-*\d.)]+\s*", "", line).strip().strip('"')
+        if cleaned:
+            queries.append(cleaned)
+    return queries
+def _try_parse_query_json(content: str) -> Optional[List[str]]:
+    try:
+        parsed = json.loads(content)
+    except json.JSONDecodeError:
+        return None
+    if isinstance(parsed, dict):
+        parsed = parsed.get("queries", [])
+    if not isinstance(parsed, list):
+        return []
+    return [item.strip() for item in parsed if isinstance(item, str) and item.strip()]
+def _dedupe_queries(queries: List[str]) -> List[str]:
+    deduped = []
+    seen = set()
+    for query in queries:
+        normalized = " ".join(query.split())
+        key = normalized.lower()
+        if normalized and key not in seen:
+            seen.add(key)
+            deduped.append(normalized)
+    return deduped
+def _candidate_key(chunk: Dict[str, Any]) -> str:
+    for key in ("id", "chunk_id"):
+        if chunk.get(key):
+            return str(chunk[key])
+    text = str(chunk.get("text", ""))
+    return "|".join(
+        str(part)
+        for part in (
+            chunk.get("document_id", ""),
+            chunk.get("filename", ""),
+            chunk.get("page", ""),
+            text[:200],
+        )
+    )
+def _merge_candidates(candidates: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    merged: Dict[str, Dict[str, Any]] = {}
+    for candidate in candidates:
+        candidate_copy = dict(candidate)
+        key = _candidate_key(candidate_copy)
+        existing = merged.get(key)
+        if existing is None or candidate_copy.get("score", 0) > existing.get("score", 0):
+            merged[key] = candidate_copy
+    return list(merged.values())
 @trace_function(
     "retrieve",
     metadata_factory=lambda query, user_id, document_id=None: {
     Returns chunks with confidence scores.
     """
+    # ── Stage 1: Query transformation + embedding search ─────────────
+    candidates = []
+    for search_query in transform_query(query):
+        query_vector = embed_query(search_query)
+        candidates.extend(
+            query_chunks(
+                query_embedding=query_vector,
+                user_id=user_id,
+                document_id=document_id,
+                top_k=settings.TOP_K_RETRIEVAL,
+            )
+        )
     if not candidates:
         return []
+    candidates = _merge_candidates(candidates)
     # ── Stage 2: Cross-encoder reranking ─────────────
     reranker = get_reranker()
         except Exception as e:
             logger.warning(f"Reranking failed, using embedding scores: {e}")
+    candidates.sort(key=lambda x: x.get("rerank_score", x.get("score", 0)), reverse=True)
     # ── Take top-K after reranking ───────────────────
     top_chunks = candidates[:settings.TOP_K_RERANK]

backend/app/routes/admin.py CHANGED Viewed

@@ -3,17 +3,18 @@ Admin-only operational statistics routes.
 """
 import shutil
 from pathlib import Path
 from fastapi import APIRouter, Depends
 from sqlalchemy import func
 from sqlalchemy.orm import Session
-from app.auth import get_admin_user
 from app.config import get_settings
 from app.database import get_db
 from app.metrics import get_query_metrics
-from app.models import Document, User
-from app.schemas import AdminStatsResponse, DiskUsageResponse
 router = APIRouter(prefix="/admin", tags=["Admin"])
 settings = get_settings()
@@ -35,8 +36,8 @@ def _directory_size(path: Path) -> int:
 @router.get("/stats", response_model=AdminStatsResponse)
 def get_admin_stats(
-    _admin: User = Depends(get_admin_user),
     db: Session = Depends(get_db),
 ):
     """Return aggregate system statistics for administrators."""
     upload_dir = Path(settings.UPLOAD_DIR).resolve()
@@ -59,6 +60,8 @@ def get_admin_stats(
     return AdminStatsResponse(
         total_users=db.query(User).count(),
         total_pdfs_uploaded=total_pdfs_uploaded,
         average_query_response_time_ms=float(
             query_metrics["average_query_response_time_ms"]
         ),
@@ -70,4 +73,14 @@ def get_admin_stats(
             usage_percent=used_percent,
             upload_dir_bytes=_directory_size(upload_dir),
         ),
     )

 """
 import shutil
 from pathlib import Path
+from typing import List
 from fastapi import APIRouter, Depends
 from sqlalchemy import func
 from sqlalchemy.orm import Session
+from app.auth import get_current_admin
 from app.config import get_settings
 from app.database import get_db
 from app.metrics import get_query_metrics
+from app.models import Document, User, ChatMessage
+from app.schemas import AdminStatsResponse, DiskUsageResponse, UserResponse
 router = APIRouter(prefix="/admin", tags=["Admin"])
 settings = get_settings()
 @router.get("/stats", response_model=AdminStatsResponse)
 def get_admin_stats(
     db: Session = Depends(get_db),
+    _admin: User = Depends(get_current_admin),
 ):
     """Return aggregate system statistics for administrators."""
     upload_dir = Path(settings.UPLOAD_DIR).resolve()
     return AdminStatsResponse(
         total_users=db.query(User).count(),
         total_pdfs_uploaded=total_pdfs_uploaded,
+        total_documents=db.query(Document).count(),
+        total_messages=db.query(ChatMessage).count(),
         average_query_response_time_ms=float(
             query_metrics["average_query_response_time_ms"]
         ),
             usage_percent=used_percent,
             upload_dir_bytes=_directory_size(upload_dir),
         ),
+        users=db.query(User).all()
     )
+@router.get("/users", response_model=List[UserResponse])
+def list_all_users(
+    db: Session = Depends(get_db),
+    _admin: User = Depends(get_current_admin),
+):
+    """List all registered users (admin-only)."""
+    return db.query(User).all()

backend/app/routes/auth.py CHANGED Viewed

@@ -11,7 +11,7 @@ from sqlalchemy.orm import Session
 from sqlalchemy import select
 from app.config import get_settings
 from app.database import get_db
-from app.models import User, ApiKey
 from app.schemas import (
     GoogleLoginRequest,
     HFTokenUpdate,
@@ -140,6 +140,7 @@ def register(payload: UserRegister, db: Session = Depends(get_db)):
         username=payload.username,
         email=payload.email,
         hashed_password=hash_password(payload.password),
     )
     db.add(user)
     db.commit()

 from sqlalchemy import select
 from app.config import get_settings
 from app.database import get_db
+from app.models import User, ApiKey, UserRole
 from app.schemas import (
     GoogleLoginRequest,
     HFTokenUpdate,
         username=payload.username,
         email=payload.email,
         hashed_password=hash_password(payload.password),
+        role=UserRole.user,
     )
     db.add(user)
     db.commit()

backend/app/routes/chat.py CHANGED Viewed

@@ -82,16 +82,16 @@ def create_share_link(
     )
-def generate_answer(question: str, user_id: str, document_id: Optional[str] = None):
     from app.rag.agent import generate_answer as _generate_answer
-    return _generate_answer(question=question, user_id=user_id, document_id=document_id)
-def generate_answer_stream(question: str, user_id: str, document_id: Optional[str] = None):
     from app.rag.agent import generate_answer_stream as _generate_answer_stream
-    return _generate_answer_stream(question=question, user_id=user_id, document_id=document_id)
 @router.post("/ask", response_model=ChatResponse)
@@ -151,6 +151,7 @@ def ask_question(
             question=payload.question,
             user_id=user.id,
             document_id=payload.document_id,
         )
         # Save to chat history
@@ -240,6 +241,7 @@ def ask_question_stream(
                 question=payload.question,
                 user_id=user.id,
                 document_id=payload.document_id,
             ):
                 yield chunk

     )
+def generate_answer(question: str, user_id: str, document_id: Optional[str] = None, hf_token: Optional[str] = None):
     from app.rag.agent import generate_answer as _generate_answer
+    return _generate_answer(question=question, user_id=user_id, document_id=document_id, hf_token=hf_token)
+def generate_answer_stream(question: str, user_id: str, document_id: Optional[str] = None, hf_token: Optional[str] = None):
     from app.rag.agent import generate_answer_stream as _generate_answer_stream
+    return _generate_answer_stream(question=question, user_id=user_id, document_id=document_id, hf_token=hf_token)
 @router.post("/ask", response_model=ChatResponse)
             question=payload.question,
             user_id=user.id,
             document_id=payload.document_id,
+            hf_token=user.hf_token,
         )
         # Save to chat history
                 question=payload.question,
                 user_id=user.id,
                 document_id=payload.document_id,
+                hf_token=user.hf_token,
             ):
                 yield chunk

backend/app/routes/documents.py CHANGED Viewed

@@ -3,24 +3,32 @@ Document management routes — upload, list, delete, and serve PDF files.
 Background ingestion via FastAPI BackgroundTasks.
 """
 import os
 import uuid
 import logging
 from typing import Optional
 from pathlib import Path
 import shutil
 import tempfile
 from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, BackgroundTasks, status, Query
 from fastapi.responses import FileResponse
 from sqlalchemy.orm import Session
 from app.database import get_db
 from app.models import User, Document
-from app.schemas import DocumentResponse, DocumentListResponse, DocumentStatusResponse
 from app.auth import get_current_user
 from app.config import get_settings
 from app.rag.chunker import chunk_document, get_page_count
 from app.rag.vectorstore import store_chunks, delete_document_chunks
 from sqlalchemy import select
 logger = logging.getLogger(__name__)
 settings = get_settings()
@@ -62,7 +70,7 @@ async def validate_upload(file: UploadFile):
     # extension without leading dot in settings
     if ext.lstrip(".") not in settings.ALLOWED_EXTENSIONS:
-        raise HTTPException(status_code=400, detail="Only PDF and DOCX files are allowed")
     # save to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
@@ -205,6 +213,36 @@ def _ingest_document(document_id: str, filepath: str, original_name: str, user_i
         db.close()
 @router.post("/upload", response_model=DocumentResponse, status_code=status.HTTP_202_ACCEPTED)
 async def upload_document(
     background_tasks: BackgroundTasks,
@@ -287,6 +325,100 @@ async def upload_document(
     return DocumentResponse.model_validate(document)
 @router.get("/{document_id}/status", response_model=DocumentStatusResponse)
 def get_document_status(

 Background ingestion via FastAPI BackgroundTasks.
 """
 import os
+import sys
 import uuid
 import logging
+import asyncio
+import concurrent.futures
 from typing import Optional
 from pathlib import Path
 import shutil
 import tempfile
+from urllib.parse import urlparse
 from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, BackgroundTasks, status, Query
 from fastapi.responses import FileResponse
 from sqlalchemy.orm import Session
 from app.database import get_db
 from app.models import User, Document
+from app.schemas import DocumentResponse, DocumentListResponse, DocumentStatusResponse, UploadUrl
 from app.auth import get_current_user
 from app.config import get_settings
 from app.rag.chunker import chunk_document, get_page_count
 from app.rag.vectorstore import store_chunks, delete_document_chunks
+import crawl4ai
+from crawl4ai import AsyncWebCrawler
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig, CacheMode
 from sqlalchemy import select
 logger = logging.getLogger(__name__)
 settings = get_settings()
     # extension without leading dot in settings
     if ext.lstrip(".") not in settings.ALLOWED_EXTENSIONS:
+        raise HTTPException(status_code=400, detail="Only PDF, DOCX, TEXT, AND MARKDOWN files are allowed")
     # save to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
         db.close()
+def _crawl_in_new_loop(url: str) -> str:
+    """Run the async crawler in a fresh event loop on a worker thread.
+    On Windows this must be a ProactorEventLoop to support subprocesses.
+    """
+    if sys.platform == "win32":
+        loop = asyncio.ProactorEventLoop()
+    else:
+        loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        async def _crawl():
+            browser_config = BrowserConfig()
+            run_config = CrawlerRunConfig(
+                excluded_tags=['form', 'header'],
+                # Content processing
+                process_iframes=True,
+                # remove_overlay_elements=True,
+                # Cache control
+                # cache_mode=CacheMode.ENABLED
+            )
+            async with AsyncWebCrawler(config=browser_config) as crawler:
+                result = await crawler.arun(url=url, config=run_config)
+                return result.markdown or ""
+        return loop.run_until_complete(_crawl())
+    finally:
+        loop.close()
 @router.post("/upload", response_model=DocumentResponse, status_code=status.HTTP_202_ACCEPTED)
 async def upload_document(
     background_tasks: BackgroundTasks,
     return DocumentResponse.model_validate(document)
+@router.post("/urlupload", status_code=status.HTTP_202_ACCEPTED)
+async def upload_document_url(
+        payload: UploadUrl,
+        background_tasks: BackgroundTasks,
+        user: User = Depends(get_current_user),
+        db: Session = Depends(get_db),
+):
+    """
+    Uses crawl4ai's AsyncWebCrawler in a dedicated thread with its own
+    event loop. This is required on Windows because uvicorn's default
+    SelectorEventLoop does not support subprocess creation (used by
+    Playwright/crawl4ai), which causes a NotImplementedError.
+    On Linux (production) a plain new_event_loop() is used instead.
+    """
+    temp_path: Optional[str] = None
+    try:
+        parsed = urlparse(payload.url)
+        if not all([parsed.scheme, parsed.netloc]):
+            raise HTTPException(status_code=400, detail="Invalid URL")
+        # Run in a worker thread with its own event loop to avoid
+        # NotImplementedError on Windows (SelectorEventLoop can't spawn subprocesses)
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            markdown = await asyncio.get_event_loop().run_in_executor(
+                pool, _crawl_in_new_loop, payload.url
+            )
+        if not markdown:
+            raise HTTPException(status_code=422, detail="No content could be extracted from the URL")
+        with tempfile.NamedTemporaryFile(
+            mode="w",
+            suffix=".txt",
+            delete=False,
+            encoding="utf-8",
+        ) as tmp:
+            tmp.write(markdown)
+            temp_path = tmp.name
+        # ── Move temp file to permanent user upload directory ──
+        ext = "txt"
+        user_dir = os.path.join(settings.UPLOAD_DIR, user.id)
+        os.makedirs(user_dir, exist_ok=True)
+        stored_filename = f"{uuid.uuid4().hex}.{ext}"
+        filepath = os.path.join(user_dir, stored_filename)
+        shutil.move(temp_path, filepath)
+        temp_path = None  # file is now at filepath; no longer a temp to clean up
+        file_size = Path(filepath).stat().st_size
+        # ── Derive a human-readable name from the URL ─────────
+        url_path = parsed.path.rstrip("/")
+        original_name = f"{parsed.netloc}{url_path or ''}.txt"
+        # ── Create database record ─────────────────────────────
+        document = Document(
+            user_id=user.id,
+            filename=stored_filename,
+            original_name=original_name,
+            file_size=file_size,
+            status="pending",
+        )
+        db.add(document)
+        db.commit()
+        db.refresh(document)
+        # ── Trigger background ingestion ───────────────────────
+        background_tasks.add_task(
+            _ingest_document,
+            document_id=document.id,
+            filepath=filepath,
+            original_name=original_name,
+            user_id=user.id,
+        )
+        return DocumentResponse.model_validate(document)
+    except HTTPException:
+        raise
+    except ValueError:
+        raise HTTPException(status_code=400, detail="Invalid URL")
+    except Exception as e:
+        logger.error(f"URL upload error: {e}")
+        raise HTTPException(status_code=400, detail=f"Something went wrong with URL processing: {str(e)}")
+    finally:
+        '''Runs whether the request succeeded, raised an HTTPException,
+        or hit an unexpected error — no temp files are ever left behind.'''
+        if temp_path is not None:
+            Path(temp_path).unlink(missing_ok=True)
 @router.get("/{document_id}/status", response_model=DocumentStatusResponse)
 def get_document_status(

backend/app/schemas.py CHANGED Viewed

@@ -4,6 +4,7 @@ Pydantic schemas for API request/response validation.
 from pydantic import BaseModel, EmailStr, Field
 from typing import Optional, List
 from datetime import datetime
 # ── Auth ─────────────────────────────────────────────
@@ -75,6 +76,7 @@ class UserResponse(BaseModel):
     id: str
     username: str
     email: str
     is_admin: bool
     hf_token: Optional[str] = None
     created_at: datetime
@@ -131,9 +133,12 @@ class DiskUsageResponse(BaseModel):
 class AdminStatsResponse(BaseModel):
     total_users: int
     total_pdfs_uploaded: int
     average_query_response_time_ms: float
     query_count: int
     disk_space_usage: DiskUsageResponse
 # ── Chat ─────────────────────────────────────────────
@@ -172,6 +177,8 @@ class ChatHistoryResponse(BaseModel):
     messages: List[ChatMessageResponse]
     document_id: Optional[str] = None
 class ShareAnswerResponse(BaseModel):
     id: str

 from pydantic import BaseModel, EmailStr, Field
 from typing import Optional, List
 from datetime import datetime
+from app.models import UserRole
 # ── Auth ─────────────────────────────────────────────
     id: str
     username: str
     email: str
+    role: UserRole
     is_admin: bool
     hf_token: Optional[str] = None
     created_at: datetime
 class AdminStatsResponse(BaseModel):
     total_users: int
     total_pdfs_uploaded: int
+    total_documents: int
+    total_messages: int
     average_query_response_time_ms: float
     query_count: int
     disk_space_usage: DiskUsageResponse
+    users: List[UserResponse]
 # ── Chat ─────────────────────────────────────────────
     messages: List[ChatMessageResponse]
     document_id: Optional[str] = None
+class UploadUrl(BaseModel):
+    url: str
 class ShareAnswerResponse(BaseModel):
     id: str

backend/migrate_add_role.py ADDED Viewed

	@@ -0,0 +1,39 @@

+"""
+One-time migration script to add the 'role' column to the 'users' table.
+Run this from the 'backend' directory.
+"""
+import sys
+import os
+# Add the current directory to sys.path to allow importing 'app'
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+from app.database import engine
+from sqlalchemy import text
+def migrate():
+    print("🚀 Starting migration: adding 'role' column to 'users' table...")
+    try:
+        with engine.connect() as conn:
+            # SQLite doesn't support adding a column with NOT NULL without a default value
+            # if there are already rows, but we provide a default 'user'.
+            conn.execute(text(
+                "ALTER TABLE users ADD COLUMN role VARCHAR DEFAULT 'user'"
+            ))
+            # Update existing rows to have the 'user' role
+            conn.execute(text(
+                "UPDATE users SET role = 'user' WHERE role IS NULL"
+            ))
+            conn.execute(text(
+                "UPDATE users SET role = 'admin' WHERE is_admin = 1"
+            ))
+            conn.commit()
+        print("✅ Migration successful!")
+    except Exception as e:
+        if "duplicate column name" in str(e).lower():
+            print("ℹ️ Column 'role' already exists. Skipping migration.")
+        else:
+            print(f"❌ Migration failed: {e}")
+if __name__ == "__main__":
+    migrate()

backend/requirements.txt CHANGED Viewed

@@ -8,6 +8,7 @@ python-multipart
 # Database
 sqlalchemy
 aiosqlite
 # Auth
 pyjwt
@@ -50,8 +51,9 @@ slowapi
 # File Validation
 #sudo apt-get install libmagic1 // for Debian/Ubuntu
 #brew install libmagic // for OSX
-python-magic-bin==0.4.27; sys_platform == "win32" # for windows
 python-magic; sys_platform != "win32"
 python-docx
 pypdf
 reportlab

 # Database
 sqlalchemy
 aiosqlite
+psycopg[binary]
 # Auth
 pyjwt
 # File Validation
 #sudo apt-get install libmagic1 // for Debian/Ubuntu
 #brew install libmagic // for OSX
+python-magic-bin; sys_platform == "win32" # for windows
 python-magic; sys_platform != "win32"
 python-docx
 pypdf
 reportlab
+crawl4ai

backend/scripts/migrate_sqlite_to_postgres.py ADDED Viewed

	@@ -0,0 +1,524 @@

+"""Migrate SQLite app data into a Supabase/Postgres database.
+The script supports both the current FastAPI SQLite schema
+(`users`, `documents`, `chat_messages`) and the older legacy
+`instance/users.db` schema (`user` only).
+"""
+from __future__ import annotations
+import argparse
+import logging
+import os
+import sys
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+from sqlalchemy import (
+    Boolean,
+    Column,
+    DateTime,
+    ForeignKey,
+    Integer,
+    MetaData,
+    String,
+    Table,
+    Text,
+    create_engine,
+    inspect,
+    select,
+)
+from sqlalchemy.engine import Engine
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy.orm import Session, sessionmaker
+LOGGER = logging.getLogger("sqlite_to_postgres")
+def generate_uuid() -> str:
+    return str(uuid.uuid4())
+metadata = MetaData()
+users = Table(
+    "users",
+    metadata,
+    Column("id", String, primary_key=True, default=generate_uuid),
+    Column("username", String(80), unique=True, nullable=False, index=True),
+    Column("email", String(120), unique=True, nullable=False, index=True),
+    Column("hashed_password", String(255), nullable=False),
+    Column("is_admin", Boolean, default=False),
+    Column("created_at", DateTime, default=lambda: datetime.now(timezone.utc)),
+    Column("last_login", DateTime, nullable=True, index=True),
+    Column("hf_token", String(255), nullable=True),
+)
+api_keys = Table(
+    "api_keys",
+    metadata,
+    Column("id", String, primary_key=True, default=generate_uuid),
+    Column("user_id", String, ForeignKey("users.id"), nullable=False, index=True),
+    Column("key_prefix", String(10), nullable=False),
+    Column("hashed_key", String(255), nullable=False, unique=True, index=True),
+    Column("created_at", DateTime, default=lambda: datetime.now(timezone.utc)),
+    Column("last_used", DateTime, nullable=True),
+)
+documents = Table(
+    "documents",
+    metadata,
+    Column("id", String, primary_key=True, default=generate_uuid),
+    Column("user_id", String, ForeignKey("users.id"), nullable=False, index=True),
+    Column("filename", String(255), nullable=False),
+    Column("original_name", String(255), nullable=False),
+    Column("file_size", Integer, default=0),
+    Column("page_count", Integer, default=0),
+    Column("chunk_count", Integer, default=0),
+    Column("status", String(20), default="pending"),
+    Column("error_message", Text, nullable=True),
+    Column("uploaded_at", DateTime, default=lambda: datetime.now(timezone.utc)),
+    Column("summary", Text, nullable=True),
+)
+chat_messages = Table(
+    "chat_messages",
+    metadata,
+    Column("id", String, primary_key=True, default=generate_uuid),
+    Column("user_id", String, ForeignKey("users.id"), nullable=False, index=True),
+    Column("document_id", String, ForeignKey("documents.id"), nullable=True, index=True),
+    Column("role", String(20), nullable=False),
+    Column("content", Text, nullable=False),
+    Column("sources_json", Text, nullable=True),
+    Column("created_at", DateTime, default=lambda: datetime.now(timezone.utc)),
+)
+shared_messages = Table(
+    "shared_messages",
+    metadata,
+    Column("id", String, primary_key=True, default=generate_uuid),
+    Column("message_id", String, ForeignKey("chat_messages.id"), nullable=False, unique=True, index=True),
+    Column("created_at", DateTime, default=lambda: datetime.now(timezone.utc)),
+)
+@dataclass
+class MigrationStats:
+    inserted: dict[str, int] = field(default_factory=dict)
+    reused: dict[str, int] = field(default_factory=dict)
+    skipped: dict[str, int] = field(default_factory=dict)
+    def add(self, table_name: str, action: str) -> None:
+        getattr(self, action)[table_name] = getattr(self, action).get(table_name, 0) + 1
+def normalize_postgres_url(url: str) -> str:
+    """Prefer psycopg v3 when callers pass Supabase's common URL forms."""
+    if url.startswith("postgres://"):
+        return "postgresql+psycopg://" + url.removeprefix("postgres://")
+    if url.startswith("postgresql://"):
+        return "postgresql+psycopg://" + url.removeprefix("postgresql://")
+    return url
+def sqlite_url_from_path(path: str) -> str:
+    return f"sqlite:///{Path(path).resolve().as_posix()}"
+def make_engine(url: str) -> Engine:
+    return create_engine(url, future=True)
+def make_session(engine: Engine) -> Session:
+    return sessionmaker(bind=engine, autocommit=False, autoflush=False, future=True)()
+def reflected_table(engine: Engine, table_name: str) -> Table | None:
+    if not inspect(engine).has_table(table_name):
+        return None
+    reflected = MetaData()
+    return Table(table_name, reflected, autoload_with=engine)
+def fetch_rows(session: Session, table: Table) -> list[dict[str, Any]]:
+    stmt = select(table)
+    if "id" in table.c:
+        stmt = stmt.order_by(table.c.id)
+    return [dict(row) for row in session.execute(stmt).mappings().all()]
+def existing_id(session: Session, table: Table, source_id: str | None) -> str | None:
+    if not source_id:
+        return None
+    return session.execute(select(table.c.id).where(table.c.id == source_id)).scalar_one_or_none()
+def available_id(session: Session, table: Table, source_id: Any) -> str:
+    candidate = str(source_id) if source_id is not None else generate_uuid()
+    if existing_id(session, table, candidate) is None:
+        return candidate
+    while True:
+        candidate = generate_uuid()
+        if existing_id(session, table, candidate) is None:
+            return candidate
+def first_existing_user(session: Session, row: dict[str, Any]) -> str | None:
+    email = row.get("email")
+    username = row.get("username")
+    if email:
+        match = session.execute(select(users.c.id).where(users.c.email == email)).scalar_one_or_none()
+        if match:
+            return match
+    if username:
+        return session.execute(select(users.c.id).where(users.c.username == username)).scalar_one_or_none()
+    return None
+def copy_users(
+    source_session: Session,
+    target_session: Session,
+    source_table: Table,
+    stats: MigrationStats,
+) -> dict[str, str]:
+    id_map: dict[str, str] = {}
+    now = datetime.now(timezone.utc)
+    for row in fetch_rows(source_session, source_table):
+        old_id = str(row.get("id"))
+        existing = existing_id(target_session, users, old_id) or first_existing_user(target_session, row)
+        if existing:
+            id_map[old_id] = existing
+            stats.add("users", "reused")
+            continue
+        is_legacy = source_table.name == "user"
+        new_id = available_id(target_session, users, None if is_legacy else old_id)
+        user_values = {
+            "id": new_id,
+            "username": row["username"],
+            "email": row["email"],
+            "hashed_password": row.get("hashed_password") or row.get("password") or "",
+            "is_admin": bool(row.get("is_admin") or False),
+            "created_at": row.get("created_at") or now,
+            "last_login": row.get("last_login"),
+            "hf_token": row.get("hf_token"),
+        }
+        target_session.execute(users.insert().values(**user_values))
+        id_map[old_id] = new_id
+        stats.add("users", "inserted")
+    return id_map
+def copy_api_keys(
+    source_session: Session,
+    target_session: Session,
+    source_table: Table | None,
+    user_id_map: dict[str, str],
+    stats: MigrationStats,
+) -> dict[str, str]:
+    id_map: dict[str, str] = {}
+    if source_table is None:
+        return id_map
+    for row in fetch_rows(source_session, source_table):
+        old_id = str(row.get("id"))
+        new_user_id = user_id_map.get(str(row.get("user_id")))
+        if not new_user_id:
+            stats.add("api_keys", "skipped")
+            continue
+        existing = (
+            existing_id(target_session, api_keys, old_id)
+            or target_session.execute(
+                select(api_keys.c.id).where(api_keys.c.hashed_key == row.get("hashed_key"))
+            ).scalar_one_or_none()
+        )
+        if existing:
+            id_map[old_id] = existing
+            stats.add("api_keys", "reused")
+            continue
+        new_id = available_id(target_session, api_keys, old_id)
+        target_session.execute(
+            api_keys.insert().values(
+                id=new_id,
+                user_id=new_user_id,
+                key_prefix=row["key_prefix"],
+                hashed_key=row["hashed_key"],
+                created_at=row.get("created_at") or datetime.now(timezone.utc),
+                last_used=row.get("last_used"),
+            )
+        )
+        id_map[old_id] = new_id
+        stats.add("api_keys", "inserted")
+    return id_map
+def copy_documents(
+    source_session: Session,
+    target_session: Session,
+    source_table: Table | None,
+    user_id_map: dict[str, str],
+    stats: MigrationStats,
+) -> dict[str, str]:
+    id_map: dict[str, str] = {}
+    if source_table is None:
+        return id_map
+    for row in fetch_rows(source_session, source_table):
+        old_id = str(row.get("id"))
+        new_user_id = user_id_map.get(str(row.get("user_id")))
+        if not new_user_id:
+            stats.add("documents", "skipped")
+            continue
+        existing = existing_id(target_session, documents, old_id)
+        if existing:
+            id_map[old_id] = existing
+            stats.add("documents", "reused")
+            continue
+        new_id = available_id(target_session, documents, old_id)
+        target_session.execute(
+            documents.insert().values(
+                id=new_id,
+                user_id=new_user_id,
+                filename=row["filename"],
+                original_name=row["original_name"],
+                file_size=row.get("file_size") or 0,
+                page_count=row.get("page_count") or 0,
+                chunk_count=row.get("chunk_count") or 0,
+                status=row.get("status") or "pending",
+                error_message=row.get("error_message"),
+                uploaded_at=row.get("uploaded_at") or datetime.now(timezone.utc),
+                summary=row.get("summary"),
+            )
+        )
+        id_map[old_id] = new_id
+        stats.add("documents", "inserted")
+    return id_map
+def copy_chat_messages(
+    source_session: Session,
+    target_session: Session,
+    source_table: Table | None,
+    user_id_map: dict[str, str],
+    document_id_map: dict[str, str],
+    stats: MigrationStats,
+) -> dict[str, str]:
+    id_map: dict[str, str] = {}
+    if source_table is None:
+        return id_map
+    for row in fetch_rows(source_session, source_table):
+        old_id = str(row.get("id"))
+        new_user_id = user_id_map.get(str(row.get("user_id")))
+        old_document_id = row.get("document_id")
+        new_document_id = document_id_map.get(str(old_document_id)) if old_document_id else None
+        if not new_user_id or (old_document_id and not new_document_id):
+            stats.add("chat_messages", "skipped")
+            continue
+        existing = existing_id(target_session, chat_messages, old_id)
+        if existing:
+            id_map[old_id] = existing
+            stats.add("chat_messages", "reused")
+            continue
+        new_id = available_id(target_session, chat_messages, old_id)
+        target_session.execute(
+            chat_messages.insert().values(
+                id=new_id,
+                user_id=new_user_id,
+                document_id=new_document_id,
+                role=row["role"],
+                content=row["content"],
+                sources_json=row.get("sources_json"),
+                created_at=row.get("created_at") or datetime.now(timezone.utc),
+            )
+        )
+        id_map[old_id] = new_id
+        stats.add("chat_messages", "inserted")
+    return id_map
+def copy_shared_messages(
+    source_session: Session,
+    target_session: Session,
+    source_table: Table | None,
+    message_id_map: dict[str, str],
+    stats: MigrationStats,
+) -> None:
+    if source_table is None:
+        return
+    for row in fetch_rows(source_session, source_table):
+        old_id = str(row.get("id"))
+        new_message_id = message_id_map.get(str(row.get("message_id")))
+        if not new_message_id:
+            stats.add("shared_messages", "skipped")
+            continue
+        existing = (
+            existing_id(target_session, shared_messages, old_id)
+            or target_session.execute(
+                select(shared_messages.c.id).where(shared_messages.c.message_id == new_message_id)
+            ).scalar_one_or_none()
+        )
+        if existing:
+            stats.add("shared_messages", "reused")
+            continue
+        target_session.execute(
+            shared_messages.insert().values(
+                id=available_id(target_session, shared_messages, old_id),
+                message_id=new_message_id,
+                created_at=row.get("created_at") or datetime.now(timezone.utc),
+            )
+        )
+        stats.add("shared_messages", "inserted")
+def migrate(
+    sqlite_url: str,
+    postgres_url: str,
+    create_tables: bool,
+    dry_run: bool,
+) -> MigrationStats:
+    source_engine = make_engine(sqlite_url)
+    target_engine = make_engine(normalize_postgres_url(postgres_url))
+    if create_tables:
+        metadata.create_all(target_engine)
+    source_session = make_session(source_engine)
+    target_session = make_session(target_engine)
+    stats = MigrationStats()
+    try:
+        current_users = reflected_table(source_engine, "users")
+        legacy_users = reflected_table(source_engine, "user")
+        source_users = current_users if current_users is not None else legacy_users
+        if source_users is None:
+            raise RuntimeError("No users table found. Expected 'users' or legacy 'user'.")
+        user_id_map = copy_users(source_session, target_session, source_users, stats)
+        copy_api_keys(source_session, target_session, reflected_table(source_engine, "api_keys"), user_id_map, stats)
+        document_id_map = copy_documents(
+            source_session,
+            target_session,
+            reflected_table(source_engine, "documents"),
+            user_id_map,
+            stats,
+        )
+        message_id_map = copy_chat_messages(
+            source_session,
+            target_session,
+            reflected_table(source_engine, "chat_messages"),
+            user_id_map,
+            document_id_map,
+            stats,
+        )
+        copy_shared_messages(
+            source_session,
+            target_session,
+            reflected_table(source_engine, "shared_messages"),
+            message_id_map,
+            stats,
+        )
+        if dry_run:
+            target_session.rollback()
+            LOGGER.info("Dry run complete; rolled back target transaction.")
+        else:
+            target_session.commit()
+            LOGGER.info("Migration committed.")
+        return stats
+    except IntegrityError:
+        target_session.rollback()
+        LOGGER.exception("Migration failed because the target database rejected a row.")
+        raise
+    except Exception:
+        target_session.rollback()
+        LOGGER.exception("Migration failed; rolled back target transaction.")
+        raise
+    finally:
+        source_session.close()
+        target_session.close()
+        source_engine.dispose()
+        target_engine.dispose()
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Migrate SQLite users/documents/chat history to Supabase Postgres.")
+    parser.add_argument(
+        "--sqlite-path",
+        default="instance/users.db",
+        help="Path to the SQLite database file. Defaults to instance/users.db.",
+    )
+    parser.add_argument(
+        "--sqlite-url",
+        help="Full SQLite SQLAlchemy URL. Overrides --sqlite-path.",
+    )
+    parser.add_argument(
+        "--postgres-url",
+        default=os.getenv("SUPABASE_DB_URL") or os.getenv("POSTGRES_DATABASE_URL") or os.getenv("DATABASE_URL"),
+        help="Supabase/Postgres SQLAlchemy URL. Also read from SUPABASE_DB_URL, POSTGRES_DATABASE_URL, or DATABASE_URL.",
+    )
+    parser.add_argument(
+        "--no-create-tables",
+        action="store_true",
+        help="Do not create missing target tables before migrating.",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Run the migration and roll back the target transaction.",
+    )
+    parser.add_argument("--verbose", action="store_true", help="Enable debug logging.")
+    return parser.parse_args()
+def main() -> int:
+    args = parse_args()
+    logging.basicConfig(
+        level=logging.DEBUG if args.verbose else logging.INFO,
+        format="%(levelname)s %(message)s",
+    )
+    postgres_url = args.postgres_url
+    if not postgres_url or postgres_url.startswith("sqlite"):
+        LOGGER.error("Provide a Supabase/Postgres URL with --postgres-url or SUPABASE_DB_URL.")
+        return 2
+    sqlite_url = args.sqlite_url or sqlite_url_from_path(args.sqlite_path)
+    stats = migrate(
+        sqlite_url=sqlite_url,
+        postgres_url=postgres_url,
+        create_tables=not args.no_create_tables,
+        dry_run=args.dry_run,
+    )
+    for table_name in sorted(set(stats.inserted) | set(stats.reused) | set(stats.skipped)):
+        LOGGER.info(
+            "%s: inserted=%s reused=%s skipped=%s",
+            table_name,
+            stats.inserted.get(table_name, 0),
+            stats.reused.get(table_name, 0),
+            stats.skipped.get(table_name, 0),
+        )
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

backend/tests/test_auth.py CHANGED Viewed

@@ -115,3 +115,10 @@ def test_hf_token_appears_in_user_response(client, auth_headers, user, db_sessio
     me_resp = client.get("/api/v1/auth/me", headers=auth_headers)
     assert me_resp.status_code == 200
     assert me_resp.json()["hf_token"] == "hf_persist_token"

     me_resp = client.get("/api/v1/auth/me", headers=auth_headers)
     assert me_resp.status_code == 200
     assert me_resp.json()["hf_token"] == "hf_persist_token"
+    # Verify encryption at rest in the database directly
+    from sqlalchemy import text
+    row = db_session.execute(text("SELECT hf_token FROM users WHERE id = :id"), {"id": user.id}).fetchone()
+    stored_token = row[0]
+    assert stored_token is not None
+    assert stored_token != "hf_persist_token"

backend/tests/test_chat.py CHANGED Viewed

@@ -1,7 +1,7 @@
 def test_chat_ask_success(client, auth_headers, ready_document, monkeypatch):
     monkeypatch.setattr(
         "app.routes.chat.generate_answer",
-        lambda question, user_id, document_id=None: {
             "answer": "Mocked answer",
             "sources": [
                 {
@@ -48,3 +48,34 @@ def test_chat_ask_document_not_ready(client, auth_headers, pending_document):
     assert response.status_code == 400
     assert "Document is still pending" in response.json()["detail"]

 def test_chat_ask_success(client, auth_headers, ready_document, monkeypatch):
     monkeypatch.setattr(
         "app.routes.chat.generate_answer",
+        lambda question, user_id, document_id=None, **kwargs: {
             "answer": "Mocked answer",
             "sources": [
                 {
     assert response.status_code == 400
     assert "Document is still pending" in response.json()["detail"]
+def test_agent_dynamic_token(monkeypatch):
+    from app.rag.agent import generate_answer
+    import app.rag.agent
+    called_with_token = None
+    class MockInferenceClient:
+        def __init__(self, token=None, **kwargs):
+            nonlocal called_with_token
+            called_with_token = token
+        def chat_completion(self, *args, **kwargs):
+            class MockResponse:
+                choices = []
+            return MockResponse()
+    # Mock the InferenceClient in app.rag.agent
+    monkeypatch.setattr(app.rag.agent, "InferenceClient", MockInferenceClient)
+    # Mock retrieval to return empty chunks
+    monkeypatch.setattr("app.rag.agent.retrieve", lambda **kwargs: [])
+    # Test with custom token
+    generate_answer(question="hello?", user_id="some-user", hf_token="my-custom-hf-token")
+    assert called_with_token == "my-custom-hf-token"
+    # Test with None (should fallback to global token in config)
+    generate_answer(question="hello?", user_id="some-user", hf_token=None)
+    from app.config import get_settings
+    assert called_with_token == get_settings().HF_TOKEN

backend/tests/test_retriever.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from app.rag import retriever
+def test_transform_query_includes_original_and_dedupes(monkeypatch):
+    monkeypatch.setattr(
+        retriever,
+        "_generate_query_variants",
+        lambda _query: [
+            "How do taxes work?",
+            "how do taxes work?",
+            "How does healthcare work?",
+            "healthcare overview",
+        ],
+    )
+    queries = retriever.transform_query("How do taxes and healthcare work?")
+    assert queries == [
+        "How do taxes and healthcare work?",
+        "How do taxes work?",
+        "How does healthcare work?",
+        "healthcare overview",
+    ]
+def test_retrieve_fans_out_transformed_queries_and_merges_duplicates(monkeypatch):
+    searched_queries = []
+    monkeypatch.setattr(retriever, "transform_query", lambda _query: ["taxes", "healthcare"])
+    monkeypatch.setattr(retriever, "embed_query", lambda query: f"embedding:{query}")
+    monkeypatch.setattr(retriever, "get_reranker", lambda: None)
+    def fake_query_chunks(query_embedding, user_id, document_id=None, top_k=10):
+        searched_queries.append(query_embedding)
+        if query_embedding == "embedding:taxes":
+            return [
+                {
+                    "id": "shared",
+                    "text": "Shared chunk",
+                    "filename": "policy.pdf",
+                    "page": 1,
+                    "score": 0.2,
+                },
+                {
+                    "id": "taxes",
+                    "text": "Tax chunk",
+                    "filename": "policy.pdf",
+                    "page": 2,
+                    "score": 0.7,
+                },
+            ]
+        return [
+            {
+                "id": "shared",
+                "text": "Shared chunk",
+                "filename": "policy.pdf",
+                "page": 1,
+                "score": 0.9,
+            },
+            {
+                "id": "healthcare",
+                "text": "Healthcare chunk",
+                "filename": "policy.pdf",
+                "page": 3,
+                "score": 0.8,
+            },
+        ]
+    monkeypatch.setattr(retriever, "query_chunks", fake_query_chunks)
+    chunks = retriever.retrieve("How do taxes and healthcare work?", user_id="user-1")
+    assert searched_queries == ["embedding:taxes", "embedding:healthcare"]
+    assert [chunk["id"] for chunk in chunks] == ["shared", "healthcare", "taxes"]
+    assert chunks[0]["score"] == 0.9
+    assert chunks[0]["confidence"] == 100.0

frontend/package-lock.json CHANGED Viewed

@@ -9,6 +9,7 @@
       "version": "0.1.0",
       "dependencies": {
         "@base-ui/react": "^1.4.1",
         "class-variance-authority": "^0.7.1",
         "clsx": "^2.1.1",
         "i18next": "^26.3.0",
@@ -2532,6 +2533,31 @@
         "tailwindcss": "4.2.2"
       }
     },
     "node_modules/@ts-morph/common": {
       "version": "0.27.0",
       "resolved": "https://registry.npmjs.org/@ts-morph/common/-/common-0.27.0.tgz",
@@ -10845,7 +10871,6 @@
       "version": "4.2.2",
       "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.2.2.tgz",
       "integrity": "sha512-KWBIxs1Xb6NoLdMVqhbhgwZf2PGBpPEiwOqgI4pFIYbNTfBXiKYyWoTsXgBQ9WFg/OlhnvHaY+AEpW7wSmFo2Q==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/tapable": {

       "version": "0.1.0",
       "dependencies": {
         "@base-ui/react": "^1.4.1",
+        "@tailwindcss/typography": "^0.5.19",
         "class-variance-authority": "^0.7.1",
         "clsx": "^2.1.1",
         "i18next": "^26.3.0",
         "tailwindcss": "4.2.2"
       }
     },
+    "node_modules/@tailwindcss/typography": {
+      "version": "0.5.19",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/typography/-/typography-0.5.19.tgz",
+      "integrity": "sha512-w31dd8HOx3k9vPtcQh5QHP9GwKcgbMp87j58qi6xgiBnFFtKEAgCWnDw4qUT8aHwkCp8bKvb/KGKWWHedP0AAg==",
+      "license": "MIT",
+      "dependencies": {
+        "postcss-selector-parser": "6.0.10"
+      },
+      "peerDependencies": {
+        "tailwindcss": ">=3.0.0 || insiders || >=4.0.0-alpha.20 || >=4.0.0-beta.1"
+      }
+    },
+    "node_modules/@tailwindcss/typography/node_modules/postcss-selector-parser": {
+      "version": "6.0.10",
+      "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.0.10.tgz",
+      "integrity": "sha512-IQ7TZdoaqbT+LCpShg46jnZVlhWD2w6iQYAcYXfHARZ7X1t/UGhhceQDs5X0cGqKvYlHNOuv7Oa1xmb0oQuA3w==",
+      "license": "MIT",
+      "dependencies": {
+        "cssesc": "^3.0.0",
+        "util-deprecate": "^1.0.2"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
     "node_modules/@ts-morph/common": {
       "version": "0.27.0",
       "resolved": "https://registry.npmjs.org/@ts-morph/common/-/common-0.27.0.tgz",
       "version": "4.2.2",
       "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.2.2.tgz",
       "integrity": "sha512-KWBIxs1Xb6NoLdMVqhbhgwZf2PGBpPEiwOqgI4pFIYbNTfBXiKYyWoTsXgBQ9WFg/OlhnvHaY+AEpW7wSmFo2Q==",
       "license": "MIT"
     },
     "node_modules/tapable": {

frontend/package.json CHANGED Viewed

@@ -12,6 +12,7 @@
   },
   "dependencies": {
     "@base-ui/react": "^1.4.1",
     "class-variance-authority": "^0.7.1",
     "clsx": "^2.1.1",
     "i18next": "^26.3.0",

   },
   "dependencies": {
     "@base-ui/react": "^1.4.1",
+    "@tailwindcss/typography": "^0.5.19",
     "class-variance-authority": "^0.7.1",
     "clsx": "^2.1.1",
     "i18next": "^26.3.0",

frontend/src/app/dashboard/page.tsx CHANGED Viewed

@@ -3,17 +3,41 @@
 import { useEffect, useState, useCallback } from "react";
 import { useRouter } from "next/navigation";
 import { useAuth } from "@/lib/auth";
-import {
-  api,
-  CONNECTION_ERROR_BANNER_MESSAGE,
-  CONNECTION_ERROR_MESSAGE,
-} from "@/lib/api";
 import Header from "@/components/layout/Header";
 import DocumentSidebar from "@/components/document/DocumentSidebar";
 import ChatPanel from "@/components/chat/ChatPanel";
-import PDFViewer from "@/components/document/PDFViewer";
-import { Skeleton } from "@/components/ui/skeleton";
 export interface DocInfo {
   summary: string;
@@ -27,23 +51,6 @@ export interface DocInfo {
   uploaded_at: string;
 }
-function DocumentSkeleton() {
-  return (
-    <div className="w-72 flex-shrink-0 border-r border-border/50 p-4 space-y-4">
-      {[1, 2, 3, 4].map((item) => (
-        <div
-          key={item}
-          className="rounded-lg border border-border/50 p-4 space-y-3"
-        >
-          <Skeleton className="h-4 w-[180px]" />
-          <Skeleton className="h-3 w-[120px]" />
-          <Skeleton className="h-3 w-[90px]" />
-        </div>
-      ))}
-    </div>
-  );
-}
 export default function DashboardPage() {
   const { user, loading } = useAuth();
   const router = useRouter();
@@ -54,7 +61,6 @@ export default function DashboardPage() {
   const [sidebarOpen, setSidebarOpen] = useState(true);
   const [viewerOpen, setViewerOpen] = useState(true);
   const [connectionError, setConnectionError] = useState("");
-  const [documentsLoading, setDocumentsLoading] = useState(true);
     // Auth guard
   useEffect(() => {
@@ -76,31 +82,23 @@ export default function DashboardPage() {
   // Load documents
   const loadDocuments = useCallback(async () => {
     try {
-      setDocumentsLoading(true);
       const data = await api.get<{ documents?: DocInfo[]; items?: DocInfo[] }>(
         "/api/v1/documents/"
       );
       setDocuments(data?.documents ?? data?.items ?? []);
       setConnectionError("");
     } catch (err) {
-      const message =
-        err instanceof Error ? err.message : CONNECTION_ERROR_MESSAGE;
       setConnectionError(
         message === CONNECTION_ERROR_MESSAGE
           ? CONNECTION_ERROR_BANNER_MESSAGE
           : `⚠️ ${message}`
       );
-    } finally {
-      setDocumentsLoading(false);
     }
   }, []);
   useEffect(() => {
     if (!user) return;
     void (async () => {
       await loadDocuments();
     })();
@@ -111,11 +109,9 @@ export default function DashboardPage() {
     const hasPending = (documents || []).some(
       (d) => d.status === "pending" || d.status === "processing"
     );
     if (!hasPending) return;
     const interval = setInterval(loadDocuments, 3000);
     return () => clearInterval(interval);
   }, [documents, loadDocuments]);
@@ -127,6 +123,19 @@ export default function DashboardPage() {
     );
   }
   return (
     <div className="h-screen flex flex-col overflow-hidden">
       <Header
@@ -134,6 +143,7 @@ export default function DashboardPage() {
         onToggleSidebar={() => setSidebarOpen(!sidebarOpen)}
         viewerOpen={viewerOpen}
         onToggleViewer={() => setViewerOpen(!viewerOpen)}
       />
       {connectionError && (
@@ -146,49 +156,35 @@ export default function DashboardPage() {
       )}
       <div className="flex-1 flex overflow-hidden">
-        {/* ── Left: Document Sidebar / Skeleton ──────────────── */}
-        {sidebarOpen &&
-          (documentsLoading ? (
-            <DocumentSkeleton />
-          ) : (
-            <div className="w-72 flex-shrink-0 border-r border-border/50 overflow-hidden animate-fade-in-up">
-              <DocumentSidebar
-                documents={documents}
-                activeDoc={activeDoc}
-                onSelectDoc={(doc) => {
-                  setActiveDoc(doc);
-                  setPdfPage(1);
-                }}
-                onDocumentsChange={loadDocuments}
-              />
-            </div>
-          ))}
-        {/* ── Center: Chat Panel ─────────────────── */}
         <div className="flex-1 min-w-0 flex flex-col">
           <ChatPanel
             activeDoc={activeDoc}
             onCitationClick={(page) => {
               setPdfPage(page);
               if (!viewerOpen) setViewerOpen(true);
             }}
           />
         </div>
-        {/* ── Right: PDF Viewer ──────────────────── */}
-        {viewerOpen &&
-          activeDoc &&
-          activeDoc.original_name.endsWith(".pdf") && (
-            <div className="w-[480px] flex-shrink-0 border-l border-border/50 overflow-hidden animate-fade-in-up">
-              <PDFViewer
-                documentId={activeDoc.id}
-                currentPage={pdfPage}
-                onPageChange={setPdfPage}
-                totalPages={activeDoc.page_count}
-              />
-            </div>
-          )}
       </div>
     </div>
   );

 import { useEffect, useState, useCallback } from "react";
 import { useRouter } from "next/navigation";
 import { useAuth } from "@/lib/auth";
+import { api, CONNECTION_ERROR_BANNER_MESSAGE, CONNECTION_ERROR_MESSAGE } from "@/lib/api";
 import Header from "@/components/layout/Header";
 import DocumentSidebar from "@/components/document/DocumentSidebar";
 import ChatPanel from "@/components/chat/ChatPanel";
+function PDFViewerSkeleton() {
+  return (
+    <div
+      className="h-full flex flex-col bg-background"
+      aria-busy="true"
+      aria-label="Loading PDF viewer"
+    >
+      <div className="flex items-center justify-between px-3 py-2 border-b border-border/50 bg-card/50 shrink-0">
+        <div className="flex items-center gap-2">
+          <div className="h-7 w-7 rounded-md bg-muted/70 animate-pulse" />
+          <div className="h-7 w-20 rounded-md bg-muted/70 animate-pulse" />
+          <div className="h-7 w-7 rounded-md bg-muted/70 animate-pulse" />
+        </div>
+        <div className="flex items-center gap-2">
+          <div className="h-7 w-7 rounded-md bg-muted/70 animate-pulse" />
+          <div className="h-4 w-10 rounded bg-muted/70 animate-pulse" />
+          <div className="h-7 w-7 rounded-md bg-muted/70 animate-pulse" />
+        </div>
+      </div>
+      <div className="flex-1 p-4">
+        <div className="h-full rounded-lg border border-border/50 bg-muted/40 animate-pulse" />
+      </div>
+    </div>
+  );
+}
+const PDFViewer = dynamic(() => import("@/components/document/PDFViewer"), {
+  ssr: false,
+  loading: () => <PDFViewerSkeleton />,
+});
 export interface DocInfo {
   summary: string;
   uploaded_at: string;
 }
 export default function DashboardPage() {
   const { user, loading } = useAuth();
   const router = useRouter();
   const [sidebarOpen, setSidebarOpen] = useState(true);
   const [viewerOpen, setViewerOpen] = useState(true);
   const [connectionError, setConnectionError] = useState("");
     // Auth guard
   useEffect(() => {
   // Load documents
   const loadDocuments = useCallback(async () => {
     try {
       const data = await api.get<{ documents?: DocInfo[]; items?: DocInfo[] }>(
         "/api/v1/documents/"
       );
       setDocuments(data?.documents ?? data?.items ?? []);
       setConnectionError("");
     } catch (err) {
+      const message = err instanceof Error ? err.message : CONNECTION_ERROR_MESSAGE;
       setConnectionError(
         message === CONNECTION_ERROR_MESSAGE
           ? CONNECTION_ERROR_BANNER_MESSAGE
           : `⚠️ ${message}`
       );
     }
   }, []);
   useEffect(() => {
     if (!user) return;
     void (async () => {
       await loadDocuments();
     })();
     const hasPending = (documents || []).some(
       (d) => d.status === "pending" || d.status === "processing"
     );
     if (!hasPending) return;
     const interval = setInterval(loadDocuments, 3000);
     return () => clearInterval(interval);
   }, [documents, loadDocuments]);
     );
   }
+  // Shared sidebar content — used by both desktop panel and mobile sheet
+  const sidebarContent = (
+    <DocumentSidebar
+      documents={documents}
+      activeDoc={activeDoc}
+      onSelectDoc={(doc) => {
+        setActiveDoc(doc);
+        setPdfPage(1);
+      }}
+      onDocumentsChange={loadDocuments}
+    />
+  );
   return (
     <div className="h-screen flex flex-col overflow-hidden">
       <Header
         onToggleSidebar={() => setSidebarOpen(!sidebarOpen)}
         viewerOpen={viewerOpen}
         onToggleViewer={() => setViewerOpen(!viewerOpen)}
+        mobileSheetContent={sidebarContent}
       />
       {connectionError && (
       )}
       <div className="flex-1 flex overflow-hidden">
+        {/* ── Left: Document Sidebar — desktop only (md+) ─────────── */}
+        {sidebarOpen && (
+          <div className="hidden md:block w-72 flex-shrink-0 border-r border-border/50 overflow-hidden animate-fade-in-up">
+            {sidebarContent}
+          </div>
+        )}
+        {/* ── Center: Chat Panel ──────────────────────────────────── */}
         <div className="flex-1 min-w-0 flex flex-col">
           <ChatPanel
             activeDoc={activeDoc}
             onCitationClick={(page) => {
               setPdfPage(page);
               if (!viewerOpen) setViewerOpen(true);
             }}
           />
         </div>
+        {/* ── Right: PDF Viewer — hidden on mobile ────────────────── */}
+        {viewerOpen && activeDoc && activeDoc.original_name.endsWith(".pdf") && (
+          <div className="hidden md:block w-[480px] flex-shrink-0 border-l border-border/50 overflow-hidden animate-fade-in-up">
+            <PDFViewer
+              documentId={activeDoc.id}
+              currentPage={pdfPage}
+              onPageChange={setPdfPage}
+              totalPages={activeDoc.page_count}
+            />
+          </div>
+        )}
       </div>
     </div>
   );

frontend/src/app/globals.css CHANGED Viewed

@@ -1,6 +1,7 @@
 @import "tailwindcss";
 @import "tw-animate-css";
 @import "shadcn/tailwind.css";
 @custom-variant dark (&:is(.dark *));

 @import "tailwindcss";
 @import "tw-animate-css";
 @import "shadcn/tailwind.css";
+@plugin "@tailwindcss/typography";
 @custom-variant dark (&:is(.dark *));

frontend/src/app/page.tsx CHANGED Viewed

@@ -128,8 +128,18 @@ export default function HomePage() {
       </div>
       {/* ── Footer ──────────────────────────────────── */}
-      <footer className="text-center py-6 text-xs text-muted-foreground border-t border-border/50">
-        Built with FastAPI • LangChain • ChromaDB • HuggingFace • Next.js
       </footer>
       {/* Hall of Fame Modal */}

       </div>
       {/* ── Footer ──────────────────────────────────── */}
+      <footer className="py-8 text-xs text-muted-foreground border-t border-border/50">
+        <div className="max-w-4xl mx-auto px-6 flex flex-col sm:flex-row items-center justify-between gap-4">
+          <span>Built with FastAPI • LangChain • ChromaDB • HuggingFace • Next.js</span>
+          <div className="flex items-center gap-4">
+            <Link href="/privacy" className="hover:text-foreground transition-colors">
+              Privacy Policy
+            </Link>
+            <Link href="/terms" className="hover:text-foreground transition-colors">
+              Terms of Service
+            </Link>
+          </div>
+        </div>
       </footer>
       {/* Hall of Fame Modal */}

frontend/src/app/privacy/page.tsx ADDED Viewed

	@@ -0,0 +1,450 @@

+import type { Metadata } from "next";
+import Link from "next/link";
+import { ArrowLeft, Shield, Brain, FileText, Database, Cookie, UserCheck, Mail } from "lucide-react";
+export const metadata: Metadata = {
+  title: "Privacy Policy — Document AI Analyst",
+  description:
+    "How PDF-Assistant-RAG collects, uses, and protects your data. Learn about our privacy practices for document uploads, AI processing, and account information.",
+  openGraph: {
+    title: "Privacy Policy — Document AI Analyst",
+    description:
+      "How PDF-Assistant-RAG collects, uses, and protects your data.",
+  },
+};
+const sections = [
+  {
+    id: "information-we-collect",
+    icon: FileText,
+    title: "1. Information We Collect",
+    content: (
+      <>
+        <p>
+          When you use PDF-Assistant-RAG, we collect the following categories of information
+          to provide and improve our service:
+        </p>
+        <h3>Account Information</h3>
+        <ul>
+          <li>
+            <strong>Registration data:</strong> username, email address, and a securely hashed
+            password when you create an account.
+          </li>
+          <li>
+            <strong>Profile information:</strong> any optional details you choose to provide.
+          </li>
+        </ul>
+        <h3>Document Data</h3>
+        <ul>
+          <li>
+            <strong>Uploaded files:</strong> PDFs, DOCX, TXT, Markdown, and other documents you
+            upload for analysis.
+          </li>
+          <li>
+            <strong>Extracted content:</strong> text, embeddings, and metadata extracted from your
+            documents to enable semantic search and AI-powered question answering.
+          </li>
+          <li>
+            <strong>Chat history:</strong> questions you ask and the AI-generated responses, stored
+            to maintain conversation context.
+          </li>
+        </ul>
+        <h3>Usage Data</h3>
+        <ul>
+          <li>
+            <strong>Technical metadata:</strong> page views, feature interactions, query timestamps,
+            and performance metrics to improve the platform.
+          </li>
+          <li>
+            <strong>Device &amp; browser info:</strong> browser type, operating system, and basic
+            device information for compatibility optimization.
+          </li>
+        </ul>
+      </>
+    ),
+  },
+  {
+    id: "how-we-use-data",
+    icon: Brain,
+    title: "2. How We Use Your Data",
+    content: (
+      <>
+        <p>Your data is used solely for the core functionality of the platform:</p>
+        <ul>
+          <li>
+            <strong>AI-powered document analysis:</strong> Your documents are processed by
+            open-source large language models (LLMs) hosted on HuggingFace to generate insights,
+            summaries, and answers to your questions.
+          </li>
+          <li>
+            <strong>Semantic search &amp; retrieval:</strong> Document embeddings are stored in
+            vector databases (ChromaDB) to enable fast, accurate retrieval of relevant content.
+          </li>
+          <li>
+            <strong>Conversation continuity:</strong> Chat history is stored per session so you
+            can refer back to previous interactions.
+          </li>
+          <li>
+            <strong>Service improvement:</strong> Aggregated, anonymized usage patterns help us
+            identify bugs, optimize performance, and prioritize features.
+          </li>
+        </ul>
+        <p>
+          We <strong>do not</strong> use your uploaded documents or chat data to train or fine-tune
+          any AI models. Your content remains private to your account.
+        </p>
+      </>
+    ),
+  },
+  {
+    id: "data-storage-security",
+    icon: Shield,
+    title: "3. Data Storage &amp; Security",
+    content: (
+      <>
+        <p>We take data protection seriously and implement multiple layers of security:</p>
+        <h3>Encryption</h3>
+        <ul>
+          <li>
+            <strong>In transit:</strong> All communications between your browser and our servers
+            are encrypted using TLS 1.3.
+          </li>
+          <li>
+            <strong>At rest:</strong> Document files, embeddings, and user data are stored in
+            encrypted storage volumes.
+          </li>
+          <li>
+            <strong>Passwords:</strong> Never stored in plain text — we use bcrypt hashing with
+            per-user salts.
+          </li>
+        </ul>
+        <h3>Data Isolation</h3>
+        <ul>
+          <li>
+            Each user&apos;s documents and embeddings are stored in isolated vector collections.
+          </li>
+          <li>
+            Authentication is enforced at every API endpoint — users can only access their own
+            data.
+          </li>
+          <li>
+            JWT tokens with short expiration and refresh token rotation prevent unauthorized
+            access.
+          </li>
+        </ul>
+        <h3>Infrastructure</h3>
+        <ul>
+          <li>
+            Servers are hosted on secure cloud infrastructure with strict access controls.
+          </li>
+          <li>
+            Regular security audits and dependency updates are performed.
+          </li>
+        </ul>
+      </>
+    ),
+  },
+  {
+    id: "data-retention",
+    icon: Database,
+    title: "4. Data Retention",
+    content: (
+      <>
+        <p>We retain your data only as long as necessary to provide the service:</p>
+        <ul>
+          <li>
+            <strong>Account data:</strong> Retained until you delete your account. You can request
+            account deletion at any time.
+          </li>
+          <li>
+            <strong>Uploaded documents &amp; embeddings:</strong> Retained until you delete them
+            or close your account. Documents can be removed individually from the dashboard.
+          </li>
+          <li>
+            <strong>Chat history:</strong> Retained per conversation. You can clear individual
+            chats or your entire history from the settings page.
+          </li>
+          <li>
+            <strong>Logs &amp; analytics:</strong> Aggregated usage data may be retained longer
+            in anonymized form for service improvement.
+          </li>
+        </ul>
+        <p>
+          When you delete your account, all associated documents, embeddings, chat histories, and
+          personal information are permanently deleted within 30 days.
+        </p>
+      </>
+    ),
+  },
+  {
+    id: "third-party-services",
+    icon: Database,
+    title: "5. Third-Party Services",
+    content: (
+      <>
+        <p>
+          PDF-Assistant-RAG integrates with the following third-party services to deliver its
+          functionality:
+        </p>
+        <ul>
+          <li>
+            <strong>HuggingFace Inference API:</strong> Used to run open-source LLMs for document
+            analysis. Document snippets may be sent to HuggingFace for inference; they are not
+            stored or used for training. See{" "}
+            <a
+              href="https://huggingface.co/privacy"
+              target="_blank"
+              rel="noopener noreferrer"
+            >
+              HuggingFace&apos;s Privacy Policy
+            </a>.
+          </li>
+          <li>
+            <strong>Google OAuth (optional):</strong> If you choose to sign in with Google, we
+            receive only your name and email address from your Google profile. See{" "}
+            <a
+              href="https://policies.google.com/privacy"
+              target="_blank"
+              rel="noopener noreferrer"
+            >
+              Google&apos;s Privacy Policy
+            </a>.
+          </li>
+        </ul>
+        <p>
+          We do not sell your personal information or document data to any third party.
+        </p>
+      </>
+    ),
+  },
+  {
+    id: "cookies",
+    icon: Cookie,
+    title: "6. Cookies",
+    content: (
+      <>
+        <p>We use only essential cookies required for the platform to function:</p>
+        <ul>
+          <li>
+            <strong>Authentication cookies:</strong> JWT refresh tokens stored securely as
+            HTTP-only cookies to maintain your login session.
+          </li>
+          <li>
+            <strong>Local storage:</strong> Access tokens and UI preferences (theme, language)
+            are stored in your browser&apos;s local storage. No tracking or advertising cookies
+            are used.
+          </li>
+        </ul>
+        <p>
+          You can clear these at any time via your browser settings. Note that clearing
+          authentication data will sign you out of your session.
+        </p>
+      </>
+    ),
+  },
+  {
+    id: "your-rights",
+    icon: UserCheck,
+    title: "7. Your Rights",
+    content: (
+      <>
+        <p>You have the following rights regarding your data:</p>
+        <ul>
+          <li>
+            <strong>Access:</strong> View all documents and data associated with your account at
+            any time from your dashboard.
+          </li>
+          <li>
+            <strong>Deletion:</strong> Delete individual documents or your entire account and
+            associated data.
+          </li>
+          <li>
+            <strong>Export:</strong> Request a copy of your data in a machine-readable format.
+          </li>
+          <li>
+            <strong>Correction:</strong> Update your account information (username, email) from
+            your profile settings.
+          </li>
+          <li>
+            <strong>Withdraw consent:</strong> Stop using the service and delete your account at
+            any time.
+          </li>
+        </ul>
+        <p>
+          To exercise any of these rights, please contact us using the information in the
+          &ldquo;Contact&rdquo; section below.
+        </p>
+      </>
+    ),
+  },
+  {
+    id: "changes",
+    icon: Shield,
+    title: "8. Changes to This Policy",
+    content: (
+      <>
+        <p>
+          We may update this Privacy Policy from time to time. Changes will be communicated by:
+        </p>
+        <ul>
+          <li>Posting the updated policy on this page with a new &ldquo;Last updated&rdquo; date.</li>
+          <li>
+            Sending a notification to your registered email address for material changes.
+          </li>
+        </ul>
+        <p>
+          Your continued use of the platform after changes constitutes acceptance of the updated
+          policy. We encourage you to review this page periodically.
+        </p>
+      </>
+    ),
+  },
+  {
+    id: "contact",
+    icon: Mail,
+    title: "9. Contact Us",
+    content: (
+      <>
+        <p>
+          If you have any questions, concerns, or requests regarding this Privacy Policy or your
+          data, please reach out through the project&rsquo;s official channels:
+        </p>
+        <ul>
+          <li>
+            <strong>GitHub Issues:</strong>{" "}
+            <a
+              href="https://github.com/param20h/PDF-Assistant-RAG/issues"
+              target="_blank"
+              rel="noopener noreferrer"
+            >
+              github.com/param20h/PDF-Assistant-RAG/issues
+            </a>
+          </li>
+          <li>
+            <strong>GitHub Discussions:</strong>{" "}
+            <a
+              href="https://github.com/param20h/PDF-Assistant-RAG/discussions"
+              target="_blank"
+              rel="noopener noreferrer"
+            >
+              github.com/param20h/PDF-Assistant-RAG/discussions
+            </a>
+          </li>
+          <li>
+            <strong>LinkedIn:</strong>{" "}
+            <a
+              href="https://www.linkedin.com/in/param20h/"
+              target="_blank"
+              rel="noopener noreferrer"
+            >
+              linkedin.com/in/param20h
+            </a>
+          </li>
+        </ul>
+      </>
+    ),
+  },
+];
+export default function PrivacyPage() {
+  return (
+    <div className="min-h-screen bg-background">
+      {/* ── Header ────────────────────────────────────── */}
+      <header className="sticky top-0 z-50 border-b border-border/50 bg-card/50 backdrop-blur-md">
+        <div className="mx-auto max-w-4xl flex items-center justify-between px-6 h-14">
+          <Link
+            href="/"
+            className="flex items-center gap-2 text-sm text-muted-foreground hover:text-foreground transition-colors"
+          >
+            <ArrowLeft className="w-4 h-4" />
+            Back to Home
+          </Link>
+          <div className="flex items-center gap-2">
+            <div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
+              <Shield className="w-4 h-4 text-primary" />
+            </div>
+            <span className="font-semibold text-sm">Privacy Policy</span>
+          </div>
+        </div>
+      </header>
+      {/* ── Hero ──────────────────────────────────────── */}
+      <section className="border-b border-border/50">
+        <div className="mx-auto max-w-4xl px-6 py-16 sm:py-20 text-center">
+          <div className="inline-flex items-center gap-2 px-4 py-1.5 rounded-full bg-primary/10 border border-primary/20 text-sm text-primary mb-6">
+            <Shield className="w-4 h-4" />
+            Your data matters
+          </div>
+          <h1 className="text-4xl sm:text-5xl font-bold tracking-tight mb-4">
+            Privacy Policy
+          </h1>
+          <p className="text-lg text-muted-foreground max-w-2xl mx-auto">
+            How we collect, use, and protect your data when you use PDF-Assistant-RAG.
+          </p>
+          <p className="mt-4 text-sm text-muted-foreground">
+            <em>Last updated: May 30, 2026</em>
+          </p>
+        </div>
+      </section>
+      {/* ── Content ───────────────────────────────────── */}
+      <div className="mx-auto max-w-4xl px-6 py-12 sm:py-16">
+        {/* Table of Contents */}
+        <nav className="mb-12 p-6 rounded-xl border border-border/50 bg-card/30" aria-label="Table of contents">
+          <h2 className="text-sm font-semibold uppercase tracking-wider text-muted-foreground mb-4">
+            On this page
+          </h2>
+          <ul className="space-y-2">
+            {sections.map((section) => (
+              <li key={section.id}>
+                <a
+                  href={`#${section.id}`}
+                  className="flex items-center gap-2 text-sm text-muted-foreground hover:text-foreground transition-colors"
+                >
+                  <section.icon className="w-3.5 h-3.5 shrink-0 text-primary" />
+                  {section.title}
+                </a>
+              </li>
+            ))}
+          </ul>
+        </nav>
+        {/* Sections */}
+        <div className="prose prose-sm sm:prose-base dark:prose-invert max-w-none prose-headings:font-semibold prose-headings:tracking-tight prose-h2:text-foreground prose-h3:text-foreground prose-p:text-muted-foreground prose-p:leading-relaxed prose-a:text-primary prose-a:no-underline hover:prose-a:underline prose-strong:text-foreground prose-li:text-muted-foreground prose-li:marker:text-primary/60">
+          {sections.map((section) => (
+            <section key={section.id} id={section.id} className="mb-12 scroll-mt-20">
+              <div className="flex items-center gap-3 mb-6">
+                <div className="w-8 h-8 rounded-lg bg-primary/10 flex items-center justify-center shrink-0">
+                  <section.icon className="w-4 h-4 text-primary" />
+                </div>
+                <h2 className="text-xl sm:text-2xl !my-0">{section.title}</h2>
+              </div>
+              {section.content}
+              <hr className="mt-8 border-border/30" />
+            </section>
+          ))}
+        </div>
+        {/* Footer note */}
+        <div className="mt-8 text-center">
+          <p className="text-sm text-muted-foreground">
+            Have questions?{" "}
+            <a
+              href="https://github.com/param20h/PDF-Assistant-RAG/discussions"
+              target="_blank"
+              rel="noopener noreferrer"
+              className="text-primary hover:underline"
+            >
+              Start a discussion
+            </a>
+          </p>
+        </div>
+      </div>
+      {/* ── Footer ────────────────────────────────── */}
+      <footer className="text-center py-6 text-xs text-muted-foreground border-t border-border/50">
+        Built with FastAPI • LangChain • ChromaDB • HuggingFace • Next.js
+      </footer>
+    </div>
+  );
+}

frontend/src/app/terms/page.tsx ADDED Viewed

	@@ -0,0 +1,435 @@

+import type { Metadata } from "next";
+import Link from "next/link";
+import {
+  ArrowLeft,
+  Shield,
+  CheckCircle,
+  FileText,
+  AlertTriangle,
+  UserCheck,
+  Scale,
+  Ban,
+  RefreshCw,
+  Mail,
+} from "lucide-react";
+const sections = [
+  {
+    id: "acceptance",
+    icon: CheckCircle,
+    title: "1. Acceptance of Terms",
+    content: (
+      <>
+        <p>
+          By accessing or using PDF-Assistant-RAG (&ldquo;the Platform&rdquo;), you agree to be
+          bound by these Terms of Service (&ldquo;Terms&rdquo;). If you do not agree to all terms,
+          you must not use the Platform.
+        </p>
+        <p>
+          These Terms apply to all visitors, users, and contributors to the Platform. By creating
+          an account, uploading documents, or interacting with the service in any way, you signify
+          your acceptance of these Terms.
+        </p>
+      </>
+    ),
+  },
+  {
+    id: "service-description",
+    icon: FileText,
+    title: "2. Description of Service",
+    content: (
+      <>
+        <p>
+          PDF-Assistant-RAG is an open-source document analysis platform that allows users to upload
+          documents (PDF, DOCX, TXT, Markdown) and interact with them through AI-powered semantic
+          search and chat, using Retrieval-Augmented Generation (RAG) and open-source large language
+          models (LLMs).
+        </p>
+        <p>The core features include:</p>
+        <ul>
+          <li>Document upload, storage, and management</li>
+          <li>AI-powered question answering and document analysis</li>
+          <li>Semantic search across uploaded documents</li>
+          <li>Conversation history and context retention</li>
+          <li>Multi-language support (English, Hindi, Spanish, French)</li>
+        </ul>
+        <p>
+          The Platform is provided &ldquo;as is&rdquo; and &ldquo;as available&rdquo; for
+          educational and productivity purposes. The maintainers make no guarantees about the
+          accuracy, completeness, or reliability of AI-generated responses.
+        </p>
+      </>
+    ),
+  },
+  {
+    id: "accounts",
+    icon: UserCheck,
+    title: "3. User Accounts &amp; Registration",
+    content: (
+      <>
+        <p>To use certain features of the Platform, you must register for an account:</p>
+        <ul>
+          <li>
+            <strong>Accuracy:</strong> You agree to provide accurate, current, and complete
+            information during registration and to update it as necessary.
+          </li>
+          <li>
+            <strong>Security:</strong> You are responsible for safeguarding your password and for
+            all activities under your account. Notify the maintainers immediately of any
+            unauthorized use.
+          </li>
+          <li>
+            <strong>Account types:</strong> The Platform supports email/password registration and
+            optional Google OAuth sign-in.
+          </li>
+          <li>
+            <strong>One account per person:</strong> You may not create multiple accounts for the
+            same individual unless explicitly permitted.
+          </li>
+          <li>
+            <strong>No shared accounts:</strong> Account sharing with unauthorized users is
+            prohibited.
+          </li>
+        </ul>
+      </>
+    ),
+  },
+  {
+    id: "acceptable-use",
+    icon: Ban,
+    title: "4. Acceptable Use",
+    content: (
+      <>
+        <p>You agree to use the Platform only for lawful purposes and in accordance with these Terms. Prohibited activities include:</p>
+        <ul>
+          <li>
+            Uploading malware, viruses, or any malicious code
+          </li>
+          <li>
+            Uploading illegal, obscene, defamatory, or infringing content
+          </li>
+          <li>
+            Attempting to bypass authentication, access other users&apos; data, or exploit the
+            system
+          </li>
+          <li>
+            Using the Platform for automated scraping, data mining, or high-volume API abuse
+          </li>
+          <li>
+            Reverse-engineering, decompiling, or attempting to extract the source code of
+            proprietary components
+          </li>
+          <li>
+            Interfering with the operation of the Platform or its underlying infrastructure
+          </li>
+        </ul>
+      </>
+    ),
+  },
+  {
+    id: "content-data",
+    icon: Shield,
+    title: "5. Uploaded Content &amp; Data",
+    content: (
+      <>
+        <p>
+          You retain full ownership of all documents and content you upload to the Platform
+          (&ldquo;Your Content&rdquo;). By uploading, you grant the Platform a limited, temporary
+          license to process, store, and analyze Your Content solely for the purpose of providing
+          the service.
+        </p>
+        <h3>Data Handling</h3>
+        <ul>
+          <li>
+            Your documents are processed by open-source LLMs hosted on HuggingFace. Document
+            snippets may be sent for inference but are not stored or used for training.
+          </li>
+          <li>
+            Document embeddings are stored in per-user isolated vector collections (ChromaDB).
+          </li>
+          <li>
+            Chat history is stored per session to maintain conversation context.
+          </li>
+        </ul>
+        <h3>Your Responsibilities</h3>
+        <ul>
+          <li>
+            You represent that you own or have the necessary rights to upload and process Your
+            Content.
+          </li>
+          <li>
+            You must not upload documents containing sensitive personal information, trade secrets,
+            or classified data unless you have the legal right to do so.
+          </li>
+          <li>
+            You are solely responsible for the legality, reliability, and accuracy of Your Content.
+          </li>
+        </ul>
+        <p>
+          See our{" "}
+          <Link href="/privacy" className="text-primary hover:underline">
+            Privacy Policy
+          </Link>{" "}
+          for more details on how we handle your data.
+        </p>
+      </>
+    ),
+  },
+  {
+    id: "intellectual-property",
+    icon: Scale,
+    title: "6. Intellectual Property",
+    content: (
+      <>
+        <p>
+          The Platform codebase is open-source and licensed under the{" "}
+          <a
+            href="https://opensource.org/licenses/MIT"
+            target="_blank"
+            rel="noopener noreferrer"
+          >
+            MIT License
+          </a>. This means:
+        </p>
+        <ul>
+          <li>
+            You may freely use, modify, and distribute the source code, subject to the terms of
+            the MIT License.
+          </li>
+          <li>
+            The name &ldquo;PDF-Assistant-RAG,&rdquo; its logo, and branding elements may not be
+            used without explicit permission.
+          </li>
+          <li>
+            AI-generated responses produced by the Platform are provided without warranty and
+            should not be considered professional advice (legal, financial, medical, etc.).
+          </li>
+        </ul>
+      </>
+    ),
+  },
+  {
+    id: "liability",
+    icon: AlertTriangle,
+    title: "7. Limitation of Liability",
+    content: (
+      <>
+        <p>
+          The Platform is provided free of charge as an open-source project. To the fullest extent
+          permitted by law:
+        </p>
+        <ul>
+          <li>
+            The maintainers shall not be liable for any indirect, incidental, special,
+            consequential, or punitive damages arising from your use of the Platform.
+          </li>
+          <li>
+            AI-generated content may contain errors, omissions, or inaccuracies. You should
+            independently verify critical information.
+          </li>
+          <li>
+            The Platform makes no guarantees about uptime, availability, or data durability,
+            though reasonable efforts are made to maintain the service.
+          </li>
+        </ul>
+      </>
+    ),
+  },
+  {
+    id: "termination",
+    icon: Ban,
+    title: "8. Termination",
+    content: (
+      <>
+        <p>
+          We reserve the right to suspend or terminate your access to the Platform at any time,
+          without prior notice, for:
+        </p>
+        <ul>
+          <li>Violation of these Terms of Service</li>
+          <li>Engaging in prohibited or illegal activities</li>
+          <li>Extended inactivity of your account</li>
+          <li>At your request via account deletion</li>
+        </ul>
+        <p>
+          Upon termination, your access to documents, chat history, and account data will be
+          revoked. You may request a data export before account deletion by contacting the
+          maintainers.
+        </p>
+      </>
+    ),
+  },
+  {
+    id: "changes-to-terms",
+    icon: RefreshCw,
+    title: "9. Changes to These Terms",
+    content: (
+      <>
+        <p>
+          We may revise these Terms from time to time. The most current version will always be
+          posted on this page. Material changes will be communicated via:
+        </p>
+        <ul>
+          <li>A notice on the Platform dashboard</li>
+          <li>Email notification to registered users (for significant changes)</li>
+        </ul>
+        <p>
+          Your continued use of the Platform after changes take effect constitutes acceptance of
+          the revised Terms.
+        </p>
+      </>
+    ),
+  },
+  {
+    id: "contact",
+    icon: Mail,
+    title: "10. Contact Us",
+    content: (
+      <>
+        <p>
+          If you have any questions about these Terms, please reach out through the project&rsquo;s
+          official channels:
+        </p>
+        <ul>
+          <li>
+            <strong>GitHub Issues:</strong>{" "}
+            <a
+              href="https://github.com/param20h/PDF-Assistant-RAG/issues"
+              target="_blank"
+              rel="noopener noreferrer"
+            >
+              github.com/param20h/PDF-Assistant-RAG/issues
+            </a>
+          </li>
+          <li>
+            <strong>GitHub Discussions:</strong>{" "}
+            <a
+              href="https://github.com/param20h/PDF-Assistant-RAG/discussions"
+              target="_blank"
+              rel="noopener noreferrer"
+            >
+              github.com/param20h/PDF-Assistant-RAG/discussions
+            </a>
+          </li>
+          <li>
+            <strong>LinkedIn:</strong>{" "}
+            <a
+              href="https://www.linkedin.com/in/param20h/"
+              target="_blank"
+              rel="noopener noreferrer"
+            >
+              linkedin.com/in/param20h
+            </a>
+          </li>
+        </ul>
+      </>
+    ),
+  },
+];
+export default function TermsPage() {
+  return (
+    <div className="min-h-screen bg-background">
+      {/* ── Header ────────────────────────────────────── */}
+      <header className="sticky top-0 z-50 border-b border-border/50 bg-card/50 backdrop-blur-md">
+        <div className="mx-auto max-w-4xl flex items-center justify-between px-6 h-14">
+          <Link
+            href="/"
+            className="flex items-center gap-2 text-sm text-muted-foreground hover:text-foreground transition-colors"
+          >
+            <ArrowLeft className="w-4 h-4" />
+            Back to Home
+          </Link>
+          <div className="flex items-center gap-2">
+            <div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
+              <Scale className="w-4 h-4 text-primary" />
+            </div>
+            <span className="font-semibold text-sm">Terms of Service</span>
+          </div>
+        </div>
+      </header>
+      {/* ── Hero ──────────────────────────────────────── */}
+      <section className="border-b border-border/50">
+        <div className="mx-auto max-w-4xl px-6 py-16 sm:py-20 text-center">
+          <div className="inline-flex items-center gap-2 px-4 py-1.5 rounded-full bg-primary/10 border border-primary/20 text-sm text-primary mb-6">
+            <Scale className="w-4 h-4" />
+            Know your rights
+          </div>
+          <h1 className="text-4xl sm:text-5xl font-bold tracking-tight mb-4">
+            Terms of Service
+          </h1>
+          <p className="text-lg text-muted-foreground max-w-2xl mx-auto">
+            The rules and guidelines for using PDF-Assistant-RAG, our open-source document
+            analysis platform.
+          </p>
+          <p className="mt-4 text-sm text-muted-foreground">
+            <em>Last updated: May 30, 2026</em>
+          </p>
+        </div>
+      </section>
+      {/* ── Content ───────────────────────────────────── */}
+      <div className="mx-auto max-w-4xl px-6 py-12 sm:py-16">
+        {/* Table of Contents */}
+        <nav
+          className="mb-12 p-6 rounded-xl border border-border/50 bg-card/30"
+          aria-label="Table of contents"
+        >
+          <h2 className="text-sm font-semibold uppercase tracking-wider text-muted-foreground mb-4">
+            On this page
+          </h2>
+          <ul className="space-y-2">
+            {sections.map((section) => (
+              <li key={section.id}>
+                <a
+                  href={`#${section.id}`}
+                  className="flex items-center gap-2 text-sm text-muted-foreground hover:text-foreground transition-colors"
+                >
+                  <section.icon className="w-3.5 h-3.5 shrink-0 text-primary" />
+                  {section.title}
+                </a>
+              </li>
+            ))}
+          </ul>
+        </nav>
+        {/* Sections */}
+        <div className="prose prose-sm sm:prose-base dark:prose-invert max-w-none prose-headings:font-semibold prose-headings:tracking-tight prose-h2:text-foreground prose-h3:text-foreground prose-p:text-muted-foreground prose-p:leading-relaxed prose-a:text-primary prose-a:no-underline hover:prose-a:underline prose-strong:text-foreground prose-li:text-muted-foreground prose-li:marker:text-primary/60">
+          {sections.map((section) => (
+            <section key={section.id} id={section.id} className="mb-12 scroll-mt-20">
+              <div className="flex items-center gap-3 mb-6">
+                <div className="w-8 h-8 rounded-lg bg-primary/10 flex items-center justify-center shrink-0">
+                  <section.icon className="w-4 h-4 text-primary" />
+                </div>
+                <h2 className="text-xl sm:text-2xl !my-0">{section.title}</h2>
+              </div>
+              {section.content}
+              <hr className="mt-8 border-border/30" />
+            </section>
+          ))}
+        </div>
+        {/* Footer note */}
+        <div className="mt-8 text-center">
+          <p className="text-sm text-muted-foreground">
+            Have questions?{" "}
+            <a
+              href="https://github.com/param20h/PDF-Assistant-RAG/discussions"
+              target="_blank"
+              rel="noopener noreferrer"
+              className="text-primary hover:underline"
+            >
+              Start a discussion
+            </a>
+          </p>
+        </div>
+      </div>
+      {/* ── Footer ────────────────────────────────── */}
+      <footer className="text-center py-6 text-xs text-muted-foreground border-t border-border/50">
+        Built with FastAPI &bull; LangChain &bull; ChromaDB &bull; HuggingFace &bull; Next.js
+      </footer>
+    </div>
+  );
+}

frontend/src/components/document/PDFViewer.tsx CHANGED Viewed

@@ -3,8 +3,16 @@
 import { useState } from "react";
 import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
-import { ChevronLeft, ChevronRight, ZoomIn, ZoomOut, Loader2 } from "lucide-react";
 import { API_BASE } from "@/lib/api";
 interface Props {
   documentId: string;
@@ -15,15 +23,24 @@ interface Props {
 export default function PDFViewer({ documentId, currentPage, onPageChange, totalPages }: Props) {
   const [scale, setScale] = useState(1.0);
-  const [loading, setLoading] = useState(true);
-  // Local editable value — initialized from currentPage prop.
-  // The iframe key={documentId-currentPage} already forces remount on
-  // external page changes, so no useEffect sync is needed.
   const [pageInput, setPageInput] = useState(String(currentPage));
   const pdfUrl = `${API_BASE}/api/v1/documents/${documentId}/pdf`;
-  // Append page fragment for native viewer navigation
-  const iframeSrc = `${pdfUrl}#page=${currentPage}`;
   const handlePageSubmit = (e: React.FormEvent) => {
     e.preventDefault();
@@ -31,12 +48,10 @@ export default function PDFViewer({ documentId, currentPage, onPageChange, total
     if (!isNaN(num) && num >= 1 && num <= totalPages) {
       onPageChange(num);
     } else {
-      // Reset to the current valid page without needing a useEffect
       setPageInput(String(currentPage));
     }
   };
   return (
     <div className="h-full flex flex-col bg-background">
       {/* ── Toolbar ─────────────────────────────────── */}
@@ -46,7 +61,11 @@ export default function PDFViewer({ documentId, currentPage, onPageChange, total
             variant="ghost"
             size="icon"
             className="h-7 w-7"
-            onClick={() => onPageChange(Math.max(1, currentPage - 1))}
             disabled={currentPage <= 1}
           >
             <ChevronLeft className="w-4 h-4" />
@@ -68,7 +87,11 @@ export default function PDFViewer({ documentId, currentPage, onPageChange, total
             variant="ghost"
             size="icon"
             className="h-7 w-7"
-            onClick={() => onPageChange(Math.min(totalPages, currentPage + 1))}
             disabled={currentPage >= totalPages}
           >
             <ChevronRight className="w-4 h-4" />
@@ -99,20 +122,50 @@ export default function PDFViewer({ documentId, currentPage, onPageChange, total
       </div>
       {/* ── PDF Render ──────────────────────────────── */}
-      <div className="flex-1 overflow-auto relative">
-        {loading && (
-          <div className="absolute inset-0 flex items-center justify-center bg-background/80 z-10">
-            <Loader2 className="w-6 h-6 animate-spin text-primary" />
-          </div>
-        )}
-        <iframe
-          key={`${documentId}-${currentPage}`}
-          src={iframeSrc}
-          className="w-full h-full border-0"
-          style={{ transform: `scale(${scale})`, transformOrigin: "top left", width: `${100/scale}%`, height: `${100/scale}%` }}
-          onLoad={() => setLoading(false)}
-          title="PDF Viewer"
-        />
       </div>
     </div>
   );

 import { useState } from "react";
 import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
+import { ChevronLeft, ChevronRight, ZoomIn, ZoomOut, Loader2, AlertCircle } from "lucide-react";
 import { API_BASE } from "@/lib/api";
+import { Document, Page, pdfjs } from "react-pdf";
+// Import styles for react-pdf layers
+import "react-pdf/dist/Page/AnnotationLayer.css";
+import "react-pdf/dist/Page/TextLayer.css";
+// Configure PDF.js worker using standard unpkg URL
+pdfjs.GlobalWorkerOptions.workerSrc = `//unpkg.com/pdfjs-dist@${pdfjs.version}/build/pdf.worker.min.mjs`;
 interface Props {
   documentId: string;
 export default function PDFViewer({ documentId, currentPage, onPageChange, totalPages }: Props) {
   const [scale, setScale] = useState(1.0);
+  const [, setLoading] = useState(true);
   const [pageInput, setPageInput] = useState(String(currentPage));
+  const [prevCurrentPage, setPrevCurrentPage] = useState(currentPage);
+  // Sync page input state with current page prop updates during render phase
+  if (currentPage !== prevCurrentPage) {
+    setPrevCurrentPage(currentPage);
+    setPageInput(String(currentPage));
+  }
   const pdfUrl = `${API_BASE}/api/v1/documents/${documentId}/pdf`;
+  const token = typeof window !== "undefined" ? localStorage.getItem("token") : null;
+  // Configure file object with Authorization headers
+  const fileConfig = {
+    url: pdfUrl,
+    httpHeaders: token ? { Authorization: `Bearer ${token}` } : undefined,
+  };
   const handlePageSubmit = (e: React.FormEvent) => {
     e.preventDefault();
     if (!isNaN(num) && num >= 1 && num <= totalPages) {
       onPageChange(num);
     } else {
       setPageInput(String(currentPage));
     }
   };
   return (
     <div className="h-full flex flex-col bg-background">
       {/* ── Toolbar ─────────────────────────────────── */}
             variant="ghost"
             size="icon"
             className="h-7 w-7"
+            onClick={() => {
+              const newPage = Math.max(1, currentPage - 1);
+              onPageChange(newPage);
+              setPageInput(String(newPage));
+            }}
             disabled={currentPage <= 1}
           >
             <ChevronLeft className="w-4 h-4" />
             variant="ghost"
             size="icon"
             className="h-7 w-7"
+            onClick={() => {
+              const newPage = Math.min(totalPages, currentPage + 1);
+              onPageChange(newPage);
+              setPageInput(String(newPage));
+            }}
             disabled={currentPage >= totalPages}
           >
             <ChevronRight className="w-4 h-4" />
       </div>
       {/* ── PDF Render ──────────────────────────────── */}
+      <div className="flex-1 overflow-auto bg-muted/30 flex justify-center items-start p-4 relative w-full">
+        <Document
+          file={fileConfig}
+          onLoadSuccess={() => setLoading(false)}
+          onLoadError={(err) => {
+            console.error("PDF load error:", err);
+            setLoading(false);
+          }}
+          loading={
+            <div className="absolute inset-0 flex items-center justify-center bg-background/80 z-10">
+              <Loader2 className="w-6 h-6 animate-spin text-primary" />
+            </div>
+          }
+          error={
+            <div className="flex flex-col items-center justify-center p-8 text-center bg-card border border-destructive/20 rounded-lg max-w-md mx-auto my-12 shadow-sm gap-3">
+              <AlertCircle className="w-8 h-8 text-destructive animate-pulse" />
+              <div>
+                <p className="font-semibold text-sm text-foreground mb-1">Failed to load PDF</p>
+                <p className="text-xs text-muted-foreground leading-relaxed">
+                  We encountered an error loading this PDF document. Please verify the document is ready or try refreshing the page.
+                </p>
+              </div>
+            </div>
+          }
+          noData={
+            <div className="flex flex-col items-center justify-center p-8 text-center bg-card border border-border rounded-lg max-w-md mx-auto my-12 shadow-sm gap-2">
+              <p className="font-semibold text-sm text-foreground">No PDF document selected</p>
+              <p className="text-xs text-muted-foreground">Select or upload a document to view it here.</p>
+            </div>
+          }
+          className="shadow-md border border-border bg-card max-w-full"
+        >
+          <Page
+            pageNumber={currentPage}
+            scale={scale}
+            renderAnnotationLayer={false}
+            renderTextLayer={true}
+            loading={
+              <div className="flex items-center justify-center p-8">
+                <Loader2 className="w-6 h-6 animate-spin text-primary" />
+              </div>
+            }
+          />
+        </Document>
       </div>
     </div>
   );

frontend/src/components/layout/Header.tsx CHANGED Viewed

@@ -1,5 +1,6 @@
 "use client";
 import { useAuth } from "@/lib/auth";
 import { useTranslation } from "react-i18next";
 import { useRouter } from "next/navigation";
@@ -22,28 +23,39 @@ import {
   Moon,
   Shield,
   Sun,
 } from "lucide-react";
-import { useSyncExternalStore } from "react";
 import { useTheme } from "next-themes";
 interface HeaderProps {
   sidebarOpen: boolean;
   onToggleSidebar: () => void;
   viewerOpen: boolean;
   onToggleViewer: () => void;
 }
 const subscribe = () => () => {};
 const getSnapshot = () => true;
 const getServerSnapshot = () => false;
-export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onToggleViewer }: HeaderProps) {
   const { user, logout } = useAuth();
   const { t, i18n } = useTranslation();
   const router = useRouter();
   const { theme, setTheme } = useTheme();
-  const mounted = useSyncExternalStore(subscribe, getSnapshot, getServerSnapshot); // ← replaces useState + useEffect
   const isDark = theme === "dark";
   const toggleTheme = () => setTheme(isDark ? "light" : "dark");
@@ -67,79 +79,147 @@ export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onTog
   };
   return (
-    <header className="h-14 flex items-center justify-between px-4 border-b border-border/50 bg-card/50 backdrop-blur-md flex-shrink-0 z-50">
-      {/* Left */}
-      <div className="flex items-center gap-3">
-        <Button variant="ghost" size="icon" className="h-8 w-8" onClick={onToggleSidebar} title={sidebarOpen ? t("header.closeSidebar") : t("header.openSidebar")}>
-          {sidebarOpen ? <PanelLeftClose className="w-4 h-4" /> : <PanelLeftOpen className="w-4 h-4" />}
-        </Button>
-        <div className="flex items-center gap-2">
-          <div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
-            <Brain className="w-4 h-4 text-primary" />
           </div>
-          <span className="font-semibold text-sm hidden sm:inline">{t("common.appName")}</span>
         </div>
-      </div>
-      {/* Right */}
-      <div className="flex items-center gap-2">
-        <Button variant="ghost" size="icon" className="h-8 w-8" onClick={onToggleViewer} title={viewerOpen ? t("header.closeViewer") : t("header.openViewer")}>
-          {viewerOpen ? <PanelRightClose className="w-4 h-4" /> : <PanelRightOpen className="w-4 h-4" />}
-        </Button>
-        {mounted && (
-          <Button variant="ghost" size="icon" className="h-8 w-8" onClick={toggleTheme} title={isDark ? t("header.lightMode") : t("header.darkMode")}>
-            {isDark ? <Sun className="w-4 h-4" /> : <Moon className="w-4 h-4" />}
           </Button>
-        )}
-        <select
-          aria-label={t("common.language")}
-          value={i18n.resolvedLanguage || "en"}
-          onChange={(e) => void i18n.changeLanguage(e.target.value)}
-          className="h-8 rounded-md border border-border bg-background px-2 text-xs text-foreground"
-        >
-          <option value="en">{languageLabel("en")}</option>
-          <option value="hi">{languageLabel("hi")}</option>
-          <option value="es">{languageLabel("es")}</option>
-          <option value="fr">{languageLabel("fr")}</option>
-        </select>
-        <DropdownMenu>
-          <DropdownMenuTrigger
-            render={
-              <button className="flex items-center h-8 gap-2 px-2 rounded-md hover:bg-accent transition-colors cursor-pointer">
-                <Avatar className="w-6 h-6">
-                  <AvatarFallback className="text-[10px] bg-primary/20 text-primary">
-                    {user?.username?.slice(0, 2).toUpperCase() || "U"}
-                  </AvatarFallback>
-                </Avatar>
-                <span className="text-sm hidden sm:inline">{user?.username}</span>
-              </button>
-            }
-          />
-          <DropdownMenuContent align="end" className="w-56">
-            <div className="px-3 py-2">
-              <p className="text-sm font-medium">{user?.username}</p>
-              <p className="text-xs text-muted-foreground truncate">{user?.email}</p>
-            </div>
-            <DropdownMenuSeparator />
-            {user?.is_admin && (
-              <DropdownMenuItem className="cursor-pointer" onClick={() => router.push("/admin")}>
-                <Shield className="w-4 h-4 mr-2" />
-                Admin metrics
               </DropdownMenuItem>
-            )}
-            {user?.is_admin && <DropdownMenuSeparator />}
-            <DropdownMenuItem className="text-destructive cursor-pointer" onClick={handleLogout}>
-              <LogOut className="w-4 h-4 mr-2" />
-              {t("header.signOut")}
-            </DropdownMenuItem>
-          </DropdownMenuContent>
-        </DropdownMenu>
       </div>
-    </header>
   );
 }

 "use client";
+import { useState } from "react";
 import { useAuth } from "@/lib/auth";
 import { useTranslation } from "react-i18next";
 import { useRouter } from "next/navigation";
   Moon,
   Shield,
   Sun,
+  Menu,
+  X,
 } from "lucide-react";
 import { useTheme } from "next-themes";
+import { useSyncExternalStore } from "react";
 interface HeaderProps {
   sidebarOpen: boolean;
   onToggleSidebar: () => void;
   viewerOpen: boolean;
   onToggleViewer: () => void;
+  /** Pass DocumentSidebar JSX so the mobile sheet can render it */
+  mobileSheetContent?: React.ReactNode;
 }
 const subscribe = () => () => {};
 const getSnapshot = () => true;
 const getServerSnapshot = () => false;
+export default function Header({
+  sidebarOpen,
+  onToggleSidebar,
+  viewerOpen,
+  onToggleViewer,
+  mobileSheetContent,
+}: HeaderProps) {
   const { user, logout } = useAuth();
   const { t, i18n } = useTranslation();
   const router = useRouter();
   const { theme, setTheme } = useTheme();
+  const mounted = useSyncExternalStore(subscribe, getSnapshot, getServerSnapshot);
+  const [sheetOpen, setSheetOpen] = useState(false);
   const isDark = theme === "dark";
   const toggleTheme = () => setTheme(isDark ? "light" : "dark");
   };
   return (
+    <>
+      <header className="h-14 flex items-center justify-between px-4 border-b border-border/50 bg-card/50 backdrop-blur-md flex-shrink-0 z-50">
+        {/* Left */}
+        <div className="flex items-center gap-3">
+          {/* Hamburger — mobile only */}
+          <Button
+            variant="ghost"
+            size="icon"
+            className="h-8 w-8 md:hidden"
+            onClick={() => setSheetOpen(true)}
+            title="Open sidebar"
+          >
+            <Menu className="w-4 h-4" />
+          </Button>
+          {/* Desktop sidebar toggle — hidden on mobile */}
+          <Button
+            variant="ghost"
+            size="icon"
+            className="h-8 w-8 hidden md:inline-flex"
+            onClick={onToggleSidebar}
+            title={sidebarOpen ? "Close sidebar" : "Open sidebar"}
+          >
+            {sidebarOpen ? (
+              <PanelLeftClose className="w-4 h-4" />
+            ) : (
+              <PanelLeftOpen className="w-4 h-4" />
+            )}
+          </Button>
+          <div className="flex items-center gap-2">
+            <div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
+              <Brain className="w-4 h-4 text-primary" />
+            </div>
+            <span className="font-semibold text-sm hidden sm:inline">
+              Document AI Analyst
+            </span>
           </div>
         </div>
+        {/* Right */}
+        <div className="flex items-center gap-2">
+          <Button
+            variant="ghost"
+            size="icon"
+            className="h-8 w-8"
+            onClick={onToggleViewer}
+            title={viewerOpen ? "Close viewer" : "Open viewer"}
+          >
+            {viewerOpen ? (
+              <PanelRightClose className="w-4 h-4" />
+            ) : (
+              <PanelRightOpen className="w-4 h-4" />
+            )}
           </Button>
+          {mounted && (
+            <Button
+              variant="ghost"
+              size="icon"
+              className="h-8 w-8"
+              onClick={toggleTheme}
+              title={isDark ? "Light mode" : "Dark mode"}
+            >
+              {isDark ? <Sun className="w-4 h-4" /> : <Moon className="w-4 h-4" />}
+            </Button>
+          )}
+          <DropdownMenu>
+            <DropdownMenuTrigger className="flex items-center h-8 gap-2 px-2 rounded-md hover:bg-accent transition-colors cursor-pointer">
+              <Avatar className="w-6 h-6">
+                <AvatarFallback className="text-[10px] bg-primary/20 text-primary">
+                  {user?.username?.slice(0, 2).toUpperCase() || "U"}
+                </AvatarFallback>
+              </Avatar>
+              <span className="text-sm hidden sm:inline">{user?.username}</span>
+            </DropdownMenuTrigger>
+            <DropdownMenuContent align="end" className="w-48">
+              <div className="px-3 py-2">
+                <p className="text-sm font-medium">{user?.username}</p>
+                <p className="text-xs text-muted-foreground truncate">{user?.email}</p>
+              </div>
+              <DropdownMenuSeparator />
+              <DropdownMenuItem
+                className="text-destructive cursor-pointer"
+                onClick={handleLogout}
+              >
+                <LogOut className="w-4 h-4 mr-2" />
+                Sign out
               </DropdownMenuItem>
+            </DropdownMenuContent>
+          </DropdownMenu>
+        </div>
+      </header>
+      {/* ── Mobile Navigation Sheet ──────────────────────────────────── */}
+      {/* Backdrop */}
+      {sheetOpen && (
+        <div
+          className="fixed inset-0 z-40 bg-black/50 backdrop-blur-sm md:hidden"
+          onClick={() => setSheetOpen(false)}
+          aria-hidden="true"
+        />
+      )}
+      {/* Slide-in panel */}
+<aside
+  className={[
+    "fixed inset-y-0 left-0 z-50 w-72 flex flex-col",
+    "bg-sidebar border-r border-sidebar-border",
+    "transform transition-transform duration-300 ease-in-out md:hidden",
+    sheetOpen ? "translate-x-0" : "-translate-x-full",
+  ].join(" ")}
+  aria-label="Mobile navigation"
+  aria-hidden={!sheetOpen}
+  inert={!sheetOpen ? true : undefined}
+>
+  {/* Sheet header */}
+  <div className="h-14 flex items-center justify-between px-4 border-b border-sidebar-border flex-shrink-0">
+    <div className="flex items-center gap-2">
+      <div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
+        <Brain className="w-4 h-4 text-primary" />
       </div>
+      <span className="font-semibold text-sm">Document AI Analyst</span>
+    </div>
+    <Button
+      variant="ghost"
+      size="icon"
+      className="h-8 w-8"
+      onClick={() => setSheetOpen(false)}
+      aria-label="Close navigation"
+    >
+      <X className="w-4 h-4" />
+    </Button>
+  </div>
+  {/* Sidebar content */}
+  <div className="flex-1 overflow-hidden">
+     {sheetOpen ? mobileSheetContent : null}
+  </div>
+</aside>
+    </>
   );
 }