EZOFISOCR

Running

App Files Files Community

Seth commited on Jan 3

Commit

8e8c6a4

1 Parent(s): a3239f4

Update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.DS_Store +0 -0
.gitattributes +35 -35
API_KEY_QUICK_START.md +140 -0
Dockerfile +83 -50
EXTERNAL_API_DOCUMENTATION.md +458 -0
FIREBASE_OTP_SETUP.md +296 -0
GOOGLE_OAUTH_SETUP.md +79 -0
HUGGINGFACE_SPACES_SETUP.md +186 -0
IMPLEMENTATION_COMPLETE.md +257 -0
README.md +10 -10
backend/app/api_key_auth.py +100 -0
backend/app/apollo_service.py +444 -0
backend/app/auth.py +92 -0
backend/app/auth_routes.py +347 -0
backend/app/brevo_service.py +486 -0
backend/app/db.py +18 -18
backend/app/email_validator.py +61 -0
backend/app/firebase_auth.py +92 -0
backend/app/main.py +786 -292
backend/app/models.py +136 -32
backend/app/monday_service.py +391 -0
backend/app/openrouter_client.py +862 -627
backend/app/otp_service.py +197 -0
backend/app/schemas.py +26 -26
backend/requirements.txt +15 -11
frontend/build-env.sh +22 -0
frontend/index.html +13 -12
frontend/package.json +26 -25
frontend/postcss.config.cjs +6 -6
frontend/src/App.jsx +106 -30
frontend/src/Layout.jsx +179 -143
frontend/src/components/ErrorBoundary.jsx +72 -72
frontend/src/components/ExportButtons.jsx +692 -320
frontend/src/components/ShareLinkModal.jsx +141 -0
frontend/src/components/ShareModal.jsx +197 -0
frontend/src/components/auth/LoginForm.jsx +512 -0
frontend/src/components/ocr/DocumentPreview.jsx +229 -236
frontend/src/components/ocr/ExtractionOutput.jsx +1201 -639
frontend/src/components/ocr/ProcessingStatus.jsx +118 -111
frontend/src/components/ocr/UpgradeModal.jsx +213 -0
frontend/src/components/ocr/UploadZone.jsx +251 -147
frontend/src/components/ui/badge.jsx +24 -24
frontend/src/components/ui/button.jsx +38 -38
frontend/src/components/ui/dropdown-menu.jsx +113 -113
frontend/src/components/ui/input.jsx +14 -14
frontend/src/components/ui/select.jsx +116 -116
frontend/src/components/ui/separator.jsx +16 -0
frontend/src/components/ui/tabs.jsx +45 -45
frontend/src/config/firebase.js +30 -0
frontend/src/contexts/AuthContext.jsx +115 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

API_KEY_QUICK_START.md ADDED Viewed

	@@ -0,0 +1,140 @@

+# API Key Authentication - Quick Start Guide
+## Summary
+API key authentication has been successfully implemented for external applications. The `/api/extract` endpoint now supports both JWT Bearer tokens and API keys.
+## Quick Steps to Use from External Applications
+### 1. Get an API Key
+**Option A: Via Web UI (if available)**
+- Log in to your account
+- Navigate to API Keys section
+- Create a new API key
+- Copy and store it securely
+**Option B: Via API**
+```bash
+# Step 1: Authenticate and get JWT token
+curl -X POST https://your-api-url/api/auth/otp/request \
+  -H "Content-Type: application/json" \
+  -d '{"email": "your-email@company.com"}'
+# Step 2: Verify OTP
+curl -X POST https://your-api-url/api/auth/otp/verify \
+  -H "Content-Type: application/json" \
+  -d '{"email": "your-email@company.com", "otp": "123456"}'
+# Step 3: Create API key (use token from step 2)
+curl -X POST https://your-api-url/api/auth/api-key/create \
+  -H "Authorization: Bearer YOUR_JWT_TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{"name": "My App"}'
+```
+**Response:**
+```json
+{
+  "success": true,
+  "api_key": "sk_live_abc123...",  // ⚠️ SAVE THIS!
+  "key_prefix": "sk_live_abc...",
+  "message": "API key created successfully. Store this key securely - it will not be shown again!"
+}
+```
+### 2. Use API Key to Extract Documents
+```bash
+curl -X POST https://your-api-url/api/extract \
+  -H "X-API-Key: sk_live_abc123..." \
+  -F "file=@document.pdf" \
+  -F "key_fields=Invoice Number,Invoice Date,Total Amount"
+```
+## Authentication Methods
+The `/api/extract` endpoint accepts **either**:
+1. **API Key**: `X-API-Key: sk_live_...` header
+2. **JWT Token**: `Authorization: Bearer <token>` header
+## New Endpoints
+- `POST /api/auth/api-key/create` - Create new API key (requires JWT)
+- `GET /api/auth/api-keys` - List your API keys (requires JWT)
+- `DELETE /api/auth/api-key/{key_id}` - Deactivate API key (requires JWT)
+## Security Features
+- ✅ API keys are hashed (SHA-256) before storage
+- ✅ Only key prefix shown when listing keys
+- ✅ Usage tracking (`last_used_at` timestamp)
+- ✅ Soft delete (deactivation) support
+- ✅ One key per user account
+## Example Code
+### Python
+```python
+import requests
+API_KEY = "sk_live_abc123..."
+url = "https://your-api-url/api/extract"
+with open("document.pdf", "rb") as f:
+    response = requests.post(
+        url,
+        headers={"X-API-Key": API_KEY},
+        files={"file": f},
+        data={"key_fields": "Invoice Number,Invoice Date"}
+    )
+    print(response.json())
+```
+### JavaScript
+```javascript
+const FormData = require('form-data');
+const fs = require('fs');
+const axios = require('axios');
+const form = new FormData();
+form.append('file', fs.createReadStream('document.pdf'));
+form.append('key_fields', 'Invoice Number,Invoice Date');
+axios.post('https://your-api-url/api/extract', form, {
+  headers: {
+    'X-API-Key': 'sk_live_abc123...',
+    ...form.getHeaders()
+  }
+}).then(response => console.log(response.data));
+```
+## Full Documentation
+See `EXTERNAL_API_DOCUMENTATION.md` for complete documentation with:
+- Detailed API reference
+- Error handling
+- Response formats
+- Multiple language examples (Python, JavaScript, PHP)
+- Best practices
+## Database Migration
+The new `api_keys` table will be created automatically when you restart the application (SQLAlchemy's `create_all` handles this).
+## Testing
+1. Start your backend server
+2. Create an API key using the steps above
+3. Test the extraction endpoint with the API key
+4. Verify the response contains extracted data
+## Notes
+- API keys are shown **only once** when created - store them securely!
+- Business email required for account creation
+- Max file size: 4 MB
+- Supported formats: PDF, PNG, JPEG, TIFF

Dockerfile CHANGED Viewed

@@ -1,50 +1,83 @@
-# ---------- 1) Build frontend (React + Vite) ----------
-FROM node:20-alpine AS frontend-build
-WORKDIR /frontend
-# Install frontend dependencies
-COPY frontend/package*.json ./
-RUN npm install
-# Copy rest of frontend and build
-COPY frontend/ .
-RUN npm run build
-# Vite will output to /frontend/dist by default
-# ---------- 2) Backend (FastAPI + Python) ----------
-FROM python:3.11-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ENV PYTHONUNBUFFERED=1
-WORKDIR /app
-# System deps (optional but useful for some libs)
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential \
-    && rm -rf /var/lib/apt/lists/*
-# Install backend dependencies
-COPY backend/requirements.txt ./backend/requirements.txt
-RUN pip install --no-cache-dir -r backend/requirements.txt
-# Copy backend code
-COPY backend ./backend
-# Copy built frontend into backend/frontend_dist
-# FastAPI will serve from this folder later
-RUN mkdir -p backend/frontend_dist
-COPY --from=frontend-build /frontend/dist ./backend/frontend_dist
-# Create data directory for SQLite
-RUN mkdir -p data
-# Env vars used in backend/db.py etc.
-ENV DB_PATH=/app/data/app.db
-ENV PORT=7860
-ENV PYTHONPATH=/app
-EXPOSE 7860
-# Launch FastAPI app (we'll use backend.app.main:app)
-CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "7860"]

+# ---------- 1) Build frontend (React + Vite) ----------
+FROM node:20-alpine AS frontend-build
+WORKDIR /frontend
+# Accept build arguments for Vite environment variables
+ARG VITE_FIREBASE_API_KEY
+ARG VITE_FIREBASE_AUTH_DOMAIN
+ARG VITE_FIREBASE_PROJECT_ID
+ARG VITE_FIREBASE_STORAGE_BUCKET
+ARG VITE_FIREBASE_MESSAGING_SENDER_ID
+ARG VITE_FIREBASE_APP_ID
+ARG VITE_API_BASE_URL
+# Set as environment variables so they're available to the build script
+ENV VITE_FIREBASE_API_KEY=$VITE_FIREBASE_API_KEY
+ENV VITE_FIREBASE_AUTH_DOMAIN=$VITE_FIREBASE_AUTH_DOMAIN
+ENV VITE_FIREBASE_PROJECT_ID=$VITE_FIREBASE_PROJECT_ID
+ENV VITE_FIREBASE_STORAGE_BUCKET=$VITE_FIREBASE_STORAGE_BUCKET
+ENV VITE_FIREBASE_MESSAGING_SENDER_ID=$VITE_FIREBASE_MESSAGING_SENDER_ID
+ENV VITE_FIREBASE_APP_ID=$VITE_FIREBASE_APP_ID
+ENV VITE_API_BASE_URL=$VITE_API_BASE_URL
+# Install frontend dependencies
+COPY frontend/package*.json ./
+RUN npm install
+# Copy rest of frontend
+COPY frontend/ .
+# Create .env file from environment variables and build
+# Inline the script to avoid permission issues
+RUN echo "Checking environment variables..." && \
+    [ -z "$VITE_FIREBASE_API_KEY" ] && echo "WARNING: VITE_FIREBASE_API_KEY is not set" || echo "✓ VITE_FIREBASE_API_KEY is set" && \
+    [ -z "$VITE_FIREBASE_AUTH_DOMAIN" ] && echo "WARNING: VITE_FIREBASE_AUTH_DOMAIN is not set" || echo "✓ VITE_FIREBASE_AUTH_DOMAIN is set" && \
+    [ -z "$VITE_FIREBASE_PROJECT_ID" ] && echo "WARNING: VITE_FIREBASE_PROJECT_ID is not set" || echo "✓ VITE_FIREBASE_PROJECT_ID is set" && \
+    echo "VITE_FIREBASE_API_KEY=${VITE_FIREBASE_API_KEY:-}" > .env && \
+    echo "VITE_FIREBASE_AUTH_DOMAIN=${VITE_FIREBASE_AUTH_DOMAIN:-}" >> .env && \
+    echo "VITE_FIREBASE_PROJECT_ID=${VITE_FIREBASE_PROJECT_ID:-}" >> .env && \
+    echo "VITE_FIREBASE_STORAGE_BUCKET=${VITE_FIREBASE_STORAGE_BUCKET:-}" >> .env && \
+    echo "VITE_FIREBASE_MESSAGING_SENDER_ID=${VITE_FIREBASE_MESSAGING_SENDER_ID:-}" >> .env && \
+    echo "VITE_FIREBASE_APP_ID=${VITE_FIREBASE_APP_ID:-}" >> .env && \
+    echo "VITE_API_BASE_URL=${VITE_API_BASE_URL:-}" >> .env && \
+    echo "Created .env file with environment variables" && \
+    npm run build
+# Vite will output to /frontend/dist by default
+# ---------- 2) Backend (FastAPI + Python) ----------
+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+# System deps (optional but useful for some libs)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Install backend dependencies
+COPY backend/requirements.txt ./backend/requirements.txt
+RUN pip install --no-cache-dir -r backend/requirements.txt
+# Copy backend code
+COPY backend ./backend
+# Copy built frontend into backend/frontend_dist
+# FastAPI will serve from this folder later
+RUN mkdir -p backend/frontend_dist
+COPY --from=frontend-build /frontend/dist ./backend/frontend_dist
+# Create data directory for SQLite
+RUN mkdir -p data
+# Env vars used in backend/db.py etc.
+ENV DB_PATH=/app/data/app.db
+ENV PORT=7860
+ENV PYTHONPATH=/app
+EXPOSE 7860
+# Launch FastAPI app (we'll use backend.app.main:app)
+CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "7860"]

EXTERNAL_API_DOCUMENTATION.md ADDED Viewed

	@@ -0,0 +1,458 @@

+# External API Documentation
+This document explains how to use the Document Parsing API from external applications using API key authentication.
+## Table of Contents
+1. [Overview](#overview)
+2. [Authentication](#authentication)
+3. [API Endpoints](#api-endpoints)
+4. [Usage Examples](#usage-examples)
+5. [Response Format](#response-format)
+6. [Error Handling](#error-handling)
+## Overview
+The Document Parsing API allows external applications to extract text and structured data from PDF and image files. The API supports:
+- **File Types**: PDF, PNG, JPEG, TIFF
+- **Max File Size**: 4 MB
+- **Authentication**: API Key (via `X-API-Key` header) or JWT Bearer token
+- **Response Format**: JSON
+## Authentication
+### Step 1: Create an Account
+First, you need to create an account using one of these methods:
+1. **Firebase Authentication** (via web UI)
+2. **OTP Authentication** (via API)
+#### OTP Authentication Flow
+```bash
+# 1. Request OTP
+curl -X POST https://your-api-url/api/auth/otp/request \
+  -H "Content-Type: application/json" \
+  -d '{
+    "email": "your-business-email@company.com"
+  }'
+# Response:
+# {
+#   "success": true,
+#   "message": "OTP sent to your email"
+# }
+# 2. Verify OTP and get JWT token
+curl -X POST https://your-api-url/api/auth/otp/verify \
+  -H "Content-Type: application/json" \
+  -d '{
+    "email": "your-business-email@company.com",
+    "otp": "123456"
+  }'
+# Response:
+# {
+#   "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
+#   "user": { ... }
+# }
+```
+**Note**: Only business email addresses are allowed (no Gmail, Yahoo, etc.)
+### Step 2: Create an API Key
+Once authenticated, create an API key for your external application:
+```bash
+# Create API key (requires JWT token from Step 1)
+curl -X POST https://your-api-url/api/auth/api-key/create \
+  -H "Authorization: Bearer YOUR_JWT_TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "My External App"
+  }'
+# Response:
+# {
+#   "success": true,
+#   "api_key": "sk_live_abc123...",  # ⚠️ SAVE THIS - shown only once!
+#   "key_id": 1,
+#   "key_prefix": "sk_live_abc...",
+#   "name": "My External App",
+#   "created_at": "2024-01-15T10:30:00",
+#   "message": "API key created successfully. Store this key securely - it will not be shown again!"
+# }
+```
+**⚠️ IMPORTANT**: The full API key is only shown once when created. Store it securely in your application's environment variables or secret management system.
+### Step 3: Use API Key for Authentication
+Use the API key in the `X-API-Key` header for all subsequent API calls:
+```bash
+curl -X POST https://your-api-url/api/extract \
+  -H "X-API-Key: sk_live_abc123..." \
+  -F "file=@document.pdf" \
+  -F "key_fields=Invoice Number,Invoice Date,Total Amount"
+```
+## API Endpoints
+### 1. Document Extraction
+**Endpoint**: `POST /api/extract`
+**Authentication**:
+- API Key: `X-API-Key: <your-api-key>`
+- OR JWT: `Authorization: Bearer <jwt-token>`
+**Parameters**:
+- `file` (required): The document file (PDF, PNG, JPEG, TIFF)
+- `key_fields` (optional): Comma-separated list of specific fields to extract
+**Example Request**:
+```bash
+curl -X POST https://your-api-url/api/extract \
+  -H "X-API-Key: sk_live_abc123..." \
+  -F "file=@invoice.pdf" \
+  -F "key_fields=Invoice Number,Invoice Date,Total Amount,PO Number"
+```
+**Example with cURL (file upload)**:
+```bash
+curl -X POST https://your-api-url/api/extract \
+  -H "X-API-Key: sk_live_abc123..." \
+  -F "file=@/path/to/document.pdf"
+```
+### 2. List API Keys
+**Endpoint**: `GET /api/auth/api-keys`
+**Authentication**: JWT Bearer token (required)
+**Example**:
+```bash
+curl -X GET https://your-api-url/api/auth/api-keys \
+  -H "Authorization: Bearer YOUR_JWT_TOKEN"
+```
+**Response**:
+```json
+{
+  "success": true,
+  "api_keys": [
+    {
+      "id": 1,
+      "name": "My External App",
+      "key_prefix": "sk_live_abc...",
+      "is_active": true,
+      "last_used_at": "2024-01-15T14:30:00",
+      "created_at": "2024-01-15T10:30:00"
+    }
+  ]
+}
+```
+### 3. Delete API Key
+**Endpoint**: `DELETE /api/auth/api-key/{key_id}`
+**Authentication**: JWT Bearer token (required)
+**Example**:
+```bash
+curl -X DELETE https://your-api-url/api/auth/api-key/1 \
+  -H "Authorization: Bearer YOUR_JWT_TOKEN"
+```
+## Usage Examples
+### Python Example
+```python
+import requests
+# API Configuration
+API_BASE_URL = "https://your-api-url"
+API_KEY = "sk_live_abc123..."  # Your API key
+# Extract document
+def extract_document(file_path, key_fields=None):
+    url = f"{API_BASE_URL}/api/extract"
+    headers = {
+        "X-API-Key": API_KEY
+    }
+    with open(file_path, 'rb') as f:
+        files = {'file': f}
+        data = {}
+        if key_fields:
+            data['key_fields'] = key_fields
+        response = requests.post(url, headers=headers, files=files, data=data)
+        response.raise_for_status()
+        return response.json()
+# Usage
+result = extract_document("invoice.pdf", key_fields="Invoice Number,Invoice Date,Total Amount")
+print(result)
+```
+### JavaScript/Node.js Example
+```javascript
+const FormData = require('form-data');
+const fs = require('fs');
+const axios = require('axios');
+// API Configuration
+const API_BASE_URL = 'https://your-api-url';
+const API_KEY = 'sk_live_abc123...'; // Your API key
+// Extract document
+async function extractDocument(filePath, keyFields = null) {
+  const form = new FormData();
+  form.append('file', fs.createReadStream(filePath));
+  if (keyFields) {
+    form.append('key_fields', keyFields);
+  }
+  try {
+    const response = await axios.post(`${API_BASE_URL}/api/extract`, form, {
+      headers: {
+        'X-API-Key': API_KEY,
+        ...form.getHeaders()
+      }
+    });
+    return response.data;
+  } catch (error) {
+    console.error('Error:', error.response?.data || error.message);
+    throw error;
+  }
+}
+// Usage
+extractDocument('invoice.pdf', 'Invoice Number,Invoice Date,Total Amount')
+  .then(result => console.log(result))
+  .catch(error => console.error(error));
+```
+### PHP Example
+```php
+<?php
+$apiBaseUrl = "https://your-api-url";
+$apiKey = "sk_live_abc123..."; // Your API key
+function extractDocument($filePath, $keyFields = null) {
+    global $apiBaseUrl, $apiKey;
+    $url = $apiBaseUrl . "/api/extract";
+    $curl = curl_init();
+    $postData = [
+        'file' => new CURLFile($filePath)
+    ];
+    if ($keyFields) {
+        $postData['key_fields'] = $keyFields;
+    }
+    curl_setopt_array($curl, [
+        CURLOPT_URL => $url,
+        CURLOPT_RETURNTRANSFER => true,
+        CURLOPT_POST => true,
+        CURLOPT_POSTFIELDS => $postData,
+        CURLOPT_HTTPHEADER => [
+            "X-API-Key: " . $apiKey
+        ]
+    ]);
+    $response = curl_exec($curl);
+    $httpCode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
+    curl_close($curl);
+    if ($httpCode !== 200) {
+        throw new Exception("API request failed: " . $response);
+    }
+    return json_decode($response, true);
+}
+// Usage
+try {
+    $result = extractDocument("invoice.pdf", "Invoice Number,Invoice Date,Total Amount");
+    print_r($result);
+} catch (Exception $e) {
+    echo "Error: " . $e->getMessage();
+}
+?>
+```
+## Response Format
+### Success Response
+```json
+{
+  "id": 123,
+  "fileName": "invoice.pdf",
+  "fileType": "application/pdf",
+  "fileSize": "2.5 MB",
+  "status": "completed",
+  "confidence": 92.5,
+  "fieldsExtracted": 15,
+  "totalTime": 3500,
+  "fields": {
+    "page_1": {
+      "text": "Extracted text from page 1...",
+      "table": {
+        "row_1": {
+          "column_1": "value1",
+          "column_2": "value2"
+        }
+      },
+      "footer_notes": ["Note 1", "Note 2"]
+    }
+  },
+  "full_text": "Complete extracted text from all pages...",
+  "Fields": {
+    "Invoice Number": "INV-001",
+    "Invoice Date": "2024-01-15",
+    "Total Amount": "$1,234.56"
+  },
+  "stages": {
+    "uploading": {
+      "time": 525,
+      "status": "completed",
+      "variation": "normal"
+    },
+    "aiAnalysis": {
+      "time": 1925,
+      "status": "completed",
+      "variation": "normal"
+    },
+    "dataExtraction": {
+      "time": 700,
+      "status": "completed",
+      "variation": "fast"
+    },
+    "outputRendering": {
+      "time": 350,
+      "status": "completed",
+      "variation": "normal"
+    }
+  },
+  "errorMessage": null
+}
+```
+### Response Fields
+- `id`: Extraction record ID
+- `fileName`: Original filename
+- `fileType`: MIME type of the file
+- `fileSize`: File size as string
+- `status`: "completed" or "failed"
+- `confidence`: Extraction confidence (0-100)
+- `fieldsExtracted`: Number of fields extracted
+- `totalTime`: Total processing time in milliseconds
+- `fields`: Structured data with page-wise extraction (tables, text, metadata)
+- `full_text`: Complete extracted text from all pages
+- `Fields`: User-specified fields extracted (if `key_fields` parameter was provided)
+- `stages`: Processing stage timings
+- `errorMessage`: Error message if extraction failed
+## Error Handling
+### Authentication Errors
+**401 Unauthorized** - Invalid or missing API key:
+```json
+{
+  "detail": "Invalid API key"
+}
+```
+**401 Unauthorized** - No authentication provided:
+```json
+{
+  "detail": "Authentication required. Provide either a Bearer token or X-API-Key header."
+}
+```
+### Validation Errors
+**400 Bad Request** - File too large:
+```json
+{
+  "detail": "File size exceeds 4 MB limit. Your file is 5.2 MB."
+}
+```
+**400 Bad Request** - Invalid file type:
+```json
+{
+  "detail": "Only PDF, PNG, JPG, and TIFF files are allowed."
+}
+```
+### Processing Errors
+**500 Internal Server Error** - Extraction failed:
+```json
+{
+  "id": 123,
+  "status": "failed",
+  "confidence": 0.0,
+  "fieldsExtracted": 0,
+  "errorMessage": "OCR processing failed: ..."
+}
+```
+## Best Practices
+1. **Store API Keys Securely**: Never commit API keys to version control. Use environment variables or secret management systems.
+2. **Handle Errors Gracefully**: Always check the `status` field in the response. If `status` is "failed", check `errorMessage` for details.
+3. **Respect Rate Limits**: If rate limiting is implemented, handle 429 responses appropriately with exponential backoff.
+4. **Validate File Types**: Check file type and size before uploading to avoid unnecessary API calls.
+5. **Use Specific Fields**: When you know what fields to extract, use the `key_fields` parameter for better accuracy and faster processing.
+6. **Monitor API Key Usage**: Regularly check your API keys via the `/api/auth/api-keys` endpoint to monitor usage and detect unauthorized access.
+## Security Notes
+- API keys are hashed before storage in the database
+- Only the key prefix is shown when listing API keys
+- API keys can be deactivated (soft deleted) but not permanently deleted
+- Each API key is tied to a specific user account
+- API key usage is tracked with `last_used_at` timestamp
+## Support
+For issues or questions:
+1. Check the error message in the API response
+2. Verify your API key is active and correct
+3. Ensure your file meets the requirements (type, size)
+4. Check the API status endpoint: `GET /ping`

FIREBASE_OTP_SETUP.md ADDED Viewed

	@@ -0,0 +1,296 @@

+# Firebase Authentication + OTP Setup Guide
+This application uses Firebase Authentication for Google sign-in and Brevo for OTP email delivery. Only business email addresses are allowed.
+## Prerequisites
+1. Firebase project
+2. Brevo account (for sending OTP emails)
+3. Business email domain verification
+---
+## Step 1: Firebase Setup
+### 1.1 Create Firebase Project
+1. Go to [Firebase Console](https://console.firebase.google.com/)
+2. Click "Add project" or select an existing project
+3. Follow the setup wizard
+### 1.2 Enable Google Authentication
+1. In Firebase Console, go to **Authentication** → **Sign-in method**
+2. Click on **Google** provider
+3. Enable it and set your project support email
+4. Save the changes
+### 1.3 Get Firebase Web App Configuration
+1. In Firebase Console, go to **Project Settings** (gear icon)
+2. Scroll down to "Your apps" section
+3. Click the **Web** icon (`</>`) to add a web app
+4. Register your app (you can skip Firebase Hosting for now)
+5. Copy the Firebase configuration object
+### 1.4 Get Firebase Service Account Key
+1. In Firebase Console, go to **Project Settings** → **Service accounts**
+2. Click **Generate new private key**
+3. Download the JSON file (keep it secure!)
+### 1.5 Set Frontend Environment Variables
+Create or update `frontend/.env`:
+```bash
+VITE_FIREBASE_API_KEY=your-api-key
+VITE_FIREBASE_AUTH_DOMAIN=your-project.firebaseapp.com
+VITE_FIREBASE_PROJECT_ID=your-project-id
+VITE_FIREBASE_STORAGE_BUCKET=your-project.appspot.com
+VITE_FIREBASE_MESSAGING_SENDER_ID=your-sender-id
+VITE_FIREBASE_APP_ID=your-app-id
+```
+### 1.6 Set Backend Environment Variables
+You have two options for Firebase Admin SDK:
+**Option A: Service Account JSON File**
+```bash
+FIREBASE_SERVICE_ACCOUNT_KEY=/path/to/service-account-key.json
+```
+**Option B: Service Account JSON String (Recommended for Docker/Cloud/Hugging Face Spaces)**
+```bash
+FIREBASE_SERVICE_ACCOUNT_JSON='{"type":"service_account","project_id":"...","private_key_id":"...","private_key":"...","client_email":"...","client_id":"...","auth_uri":"...","token_uri":"...","auth_provider_x509_cert_url":"...","client_x509_cert_url":"..."}'
+```
+**For Hugging Face Spaces:**
+- Use **Option B** (JSON String) as a **Secret** (Private)
+- Copy the entire contents of your service account JSON file
+- Paste it as the value for `FIREBASE_SERVICE_ACCOUNT_JSON`
+- Make sure to keep the single quotes around the JSON if setting via command line, or just paste the raw JSON in the Spaces UI
+---
+## Step 2: Brevo Setup
+### 2.1 Create Brevo Account
+1. Go to [Brevo](https://www.brevo.com/) (formerly Sendinblue)
+2. Sign up for a free account (300 emails/day free tier)
+3. Verify your email address
+### 2.2 Get API Key
+1. Log in to Brevo
+2. Go to **Settings** → **API Keys**
+3. Click **Generate a new API key**
+4. Copy the API key (starts with `xkeysib-...`)
+### 2.3 Verify Sender Email
+1. Go to **Senders & IP** → **Senders**
+2. Click **Add a sender**
+3. Enter your sender email (e.g., `noreply@yourdomain.com`)
+4. Verify the email address (check your inbox for verification email)
+5. Once verified, you can use it to send emails
+### 2.4 Set Backend Environment Variables
+```bash
+BREVO_API_KEY=xkeysib-your-api-key-here
+BREVO_SENDER_EMAIL=noreply@yourdomain.com
+BREVO_SENDER_NAME=EZOFIS AI
+```
+---
+## Step 3: JWT Secret Key
+Generate a strong random string for JWT token signing:
+```bash
+# Generate a random secret (Linux/Mac)
+openssl rand -hex 32
+# Or use Python
+python -c "import secrets; print(secrets.token_hex(32))"
+```
+Set the environment variable:
+```bash
+JWT_SECRET_KEY=your-generated-secret-key-here
+```
+---
+## Step 4: Frontend URL
+Set the frontend URL for OAuth redirects:
+```bash
+FRONTEND_URL=http://localhost:5173  # Development
+# OR
+FRONTEND_URL=https://your-domain.com  # Production
+```
+---
+## Step 5: Install Dependencies
+### Backend
+```bash
+cd backend
+pip install -r requirements.txt
+```
+### Frontend
+```bash
+cd frontend
+npm install
+```
+---
+## Step 6: Database Migration
+The database will automatically create the new schema when you start the application. However, if you have existing data:
+**Option 1: Fresh Start (Recommended for Development)**
+- Delete the existing database file: `data/app.db`
+- Restart the application (tables will be recreated)
+**Option 2: Manual Migration (For Production)**
+- The new `users` table will be created automatically
+- Existing `extractions` table needs `user_id` column added
+- You'll need to assign existing records to a default user or migrate them
+---
+## Step 7: Test the Setup
+### 7.1 Test Firebase Authentication
+1. Start the backend server
+2. Start the frontend development server
+3. Navigate to the application
+4. Click "Google Sign In"
+5. Sign in with a business Google account
+6. Verify you're redirected to the dashboard
+### 7.2 Test OTP Authentication
+1. Click on "Email / OTP" tab
+2. Enter a business email address
+3. Click "Send OTP"
+4. Check your email for the OTP code
+5. Enter the OTP and verify
+6. Verify you're redirected to the dashboard
+### 7.3 Test Business Email Validation
+1. Try to sign in with a personal Gmail account
+2. Verify you get an error message
+3. Try OTP with a personal email
+4. Verify it's blocked
+---
+## Environment Variables Summary
+### Backend (.env or environment)
+```bash
+# Firebase
+FIREBASE_SERVICE_ACCOUNT_JSON='{...}'  # OR
+FIREBASE_SERVICE_ACCOUNT_KEY=/path/to/key.json
+# Brevo
+BREVO_API_KEY=xkeysib-...
+BREVO_SENDER_EMAIL=noreply@yourdomain.com
+BREVO_SENDER_NAME=EZOFIS AI
+# JWT
+JWT_SECRET_KEY=your-secret-key
+# Frontend URL
+FRONTEND_URL=http://localhost:5173  # For local development
+# For Hugging Face Spaces: https://your-username-ezofisocr.hf.space
+```
+**For Hugging Face Spaces:**
+- Set `FIREBASE_SERVICE_ACCOUNT_JSON`, `BREVO_API_KEY`, and `JWT_SECRET_KEY` as **Secrets (Private)**
+- Set `BREVO_SENDER_EMAIL`, `BREVO_SENDER_NAME`, and `FRONTEND_URL` as **Variables (Public)**
+- See `HUGGINGFACE_SPACES_SETUP.md` for detailed instructions
+### Frontend (.env)
+```bash
+VITE_FIREBASE_API_KEY=...
+VITE_FIREBASE_AUTH_DOMAIN=...
+VITE_FIREBASE_PROJECT_ID=...
+VITE_FIREBASE_STORAGE_BUCKET=...
+VITE_FIREBASE_MESSAGING_SENDER_ID=...
+VITE_FIREBASE_APP_ID=...
+VITE_API_BASE_URL=http://localhost:7860
+```
+---
+## Troubleshooting
+### Firebase Issues
+- **"Firebase not configured"**: Check that `FIREBASE_SERVICE_ACCOUNT_JSON` or `FIREBASE_SERVICE_ACCOUNT_KEY` is set correctly
+- **"Invalid Firebase token"**: Ensure Firebase Web SDK is properly configured in frontend
+- **"Email not found"**: Make sure Google sign-in is enabled in Firebase Console
+### Brevo Issues
+- **"Failed to send email"**:
+  - Verify your API key is correct
+  - Check that sender email is verified in Brevo
+  - Ensure you haven't exceeded the free tier limit (300 emails/day)
+- **"API key not set"**: Check that `BREVO_API_KEY` environment variable is set
+### Business Email Validation
+- Personal emails (Gmail, Yahoo, etc.) are automatically blocked
+- Only business/corporate email domains are allowed
+- The validation happens on both frontend and backend
+---
+## Security Notes
+1. **Never commit** Firebase service account keys or API keys to version control
+2. Use environment variables or secure secret management
+3. JWT tokens expire after 7 days
+4. OTP codes expire after 10 minutes
+5. Maximum 5 OTP verification attempts per email
+6. All extraction records are filtered by user_id for data isolation
+---
+## Production Deployment
+1. Set all environment variables in your hosting platform
+2. Use HTTPS for both frontend and backend
+3. Update `FRONTEND_URL` to your production domain
+4. Verify sender email in Brevo with your production domain
+5. Consider using Redis for OTP storage instead of in-memory (for scalability)
+6. Set up proper error monitoring and logging
+---
+## Support
+For issues:
+- Firebase: [Firebase Documentation](https://firebase.google.com/docs)
+- Brevo: [Brevo API Documentation](https://developers.brevo.com/)

GOOGLE_OAUTH_SETUP.md ADDED Viewed

	@@ -0,0 +1,79 @@

+# Google OAuth Setup Guide
+This application uses Google OAuth for user authentication. Follow these steps to set it up:
+## 1. Create Google OAuth Credentials
+1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
+2. Create a new project or select an existing one
+3. Enable the Google+ API
+4. Go to "Credentials" → "Create Credentials" → "OAuth client ID"
+5. Choose "Web application"
+6. Add authorized redirect URIs:
+   - For development: `http://localhost:7860/api/auth/callback`
+   - For production: `https://your-domain.com/api/auth/callback`
+7. Copy the Client ID and Client Secret
+## 2. Set Environment Variables
+Set the following environment variables:
+```bash
+# Google OAuth
+GOOGLE_CLIENT_ID=your-client-id-here
+GOOGLE_CLIENT_SECRET=your-client-secret-here
+# JWT Secret (use a strong random string)
+JWT_SECRET_KEY=your-secret-key-here
+# Frontend URL (for OAuth redirect)
+FRONTEND_URL=http://localhost:5173  # or your production URL
+```
+## 3. Database Migration
+The database will automatically create the new `users` table and add `user_id` to the `extractions` table when you start the application.
+**Note:** If you have an existing database with extraction records, you'll need to:
+1. Back up your data
+2. Delete the old database file
+3. Restart the application to recreate tables with the new schema
+Or manually migrate:
+- Add `user_id` column to `extractions` table (you may need to set a default user_id for existing records)
+## 4. Install Dependencies
+Make sure to install the new Python dependencies:
+```bash
+pip install -r backend/requirements.txt
+```
+New dependencies added:
+- `authlib` - OAuth library
+- `pyjwt` - JWT token handling
+- `python-jose[cryptography]` - JWT verification
+## 5. Start the Application
+1. Start the backend server
+2. Start the frontend development server
+3. Users will be prompted to sign in with Google when they try to access the application
+## How It Works
+1. User clicks "Sign in with Google" → redirected to Google login
+2. After authentication, Google redirects to `/api/auth/callback`
+3. Backend creates/updates user in database and generates JWT token
+4. Frontend receives token and stores it in localStorage
+5. All API requests include the JWT token in the Authorization header
+6. Backend verifies token and filters data by user_id
+## Security Notes
+- JWT tokens expire after 7 days
+- Tokens are stored in localStorage (consider httpOnly cookies for production)
+- All extraction records are filtered by user_id
+- Users can only see their own data and history

HUGGINGFACE_SPACES_SETUP.md ADDED Viewed

	@@ -0,0 +1,186 @@

+# Hugging Face Spaces Setup Guide
+This guide provides specific instructions for deploying the EZOFIS OCR application to Hugging Face Spaces.
+## Prerequisites
+1. ✅ Firebase project configured
+2. ✅ Brevo account set up
+3. ✅ Hugging Face account with a Space created
+---
+## Step 1: Frontend Environment Variables
+Set these in **Hugging Face Spaces → Settings → Variables and secrets**:
+### Variables (Public):
+- `VITE_FIREBASE_API_KEY` → Set as **Secret (Private)** ✅
+- `VITE_FIREBASE_AUTH_DOMAIN` → Set as **Variable (Public)**
+- `VITE_FIREBASE_PROJECT_ID` → Set as **Variable (Public)**
+- `VITE_FIREBASE_STORAGE_BUCKET` → Set as **Variable (Public)**
+- `VITE_FIREBASE_MESSAGING_SENDER_ID` → Set as **Variable (Public)**
+- `VITE_FIREBASE_APP_ID` → Set as **Variable (Public)**
+- `VITE_API_BASE_URL` → Set as **Variable (Public)** (e.g., `https://your-username-ezofisocr.hf.space`)
+**Note:** These variables are used during the Docker build process. The Dockerfile has been updated to accept them as build arguments.
+---
+## Step 2: Backend Environment Variables
+Set these in **Hugging Face Spaces → Settings → Variables and secrets**:
+### Secrets (Private):
+1. **`FIREBASE_SERVICE_ACCOUNT_JSON`**
+   - Get your Firebase service account JSON file (from Firebase Console → Project Settings → Service accounts)
+   - Copy the **entire JSON content** (all in one line or formatted)
+   - Paste it as the value for this secret
+   - Example format:
+     ```json
+     {"type":"service_account","project_id":"your-project","private_key_id":"...","private_key":"-----BEGIN PRIVATE KEY-----\n...\n-----END PRIVATE KEY-----\n","client_email":"...","client_id":"...","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_x509_cert_url":"..."}
+     ```
+2. **`BREVO_API_KEY`**
+   - Get from Brevo → Settings → API Keys
+   - Format: `xkeysib-...`
+3. **`JWT_SECRET_KEY`**
+   - Generate a secure random key:
+     ```bash
+     openssl rand -hex 32
+     ```
+   - Or use Python:
+     ```bash
+     python -c "import secrets; print(secrets.token_hex(32))"
+     ```
+### Variables (Public):
+1. **`BREVO_SENDER_EMAIL`**
+   - Your verified sender email in Brevo
+   - Example: `noreply@yourdomain.com`
+2. **`BREVO_SENDER_NAME`**
+   - Display name for emails
+   - Example: `EZOFIS AI`
+3. **`FRONTEND_URL`**
+   - Your Hugging Face Space URL
+   - Format: `https://your-username-ezofisocr.hf.space`
+   - Replace `your-username` with your actual Hugging Face username
+---
+## Step 3: Verify Dockerfile
+The Dockerfile has been updated to accept frontend environment variables as build arguments. Make sure your `Dockerfile` includes:
+```dockerfile
+# Accept build arguments for Vite environment variables
+ARG VITE_FIREBASE_API_KEY
+ARG VITE_FIREBASE_AUTH_DOMAIN
+ARG VITE_FIREBASE_PROJECT_ID
+ARG VITE_FIREBASE_STORAGE_BUCKET
+ARG VITE_FIREBASE_MESSAGING_SENDER_ID
+ARG VITE_FIREBASE_APP_ID
+ARG VITE_API_BASE_URL
+# Set as environment variables so Vite can access them during build
+ENV VITE_FIREBASE_API_KEY=$VITE_FIREBASE_API_KEY
+ENV VITE_FIREBASE_AUTH_DOMAIN=$VITE_FIREBASE_AUTH_DOMAIN
+ENV VITE_FIREBASE_PROJECT_ID=$VITE_FIREBASE_PROJECT_ID
+ENV VITE_FIREBASE_STORAGE_BUCKET=$VITE_FIREBASE_STORAGE_BUCKET
+ENV VITE_FIREBASE_MESSAGING_SENDER_ID=$VITE_FIREBASE_MESSAGING_SENDER_ID
+ENV VITE_FIREBASE_APP_ID=$VITE_FIREBASE_APP_ID
+ENV VITE_API_BASE_URL=$VITE_API_BASE_URL
+```
+---
+## Step 4: Deploy
+1. **Commit and push** your code to the Hugging Face Space repository
+   - Make sure `frontend/build-env.sh` is included in your commit
+2. **Wait for the build** to complete (check the "Logs" tab)
+   - Look for "Checking environment variables..." messages in the build logs
+   - Verify all variables show "✓ ... is set" (not "WARNING: ... is not set")
+3. **Test the deployment**:
+   - Open your Space URL
+   - Try Firebase login
+   - Try OTP authentication
+**Important:** After setting or updating environment variables in Hugging Face Spaces, you need to **rebuild** the Space for the changes to take effect. The frontend is built during the Docker build process, so environment variable changes require a rebuild.
+---
+## Troubleshooting
+### Build Fails with "VITE_* variables not found"
+- **Solution:** Make sure all `VITE_*` variables are set in Spaces → Variables and secrets
+- Hugging Face Spaces automatically makes environment variables available during build
+- The Dockerfile uses a build script to create a `.env` file from these variables
+### Firebase Authentication Not Working - "auth/invalid-api-key" Error
+- **Check:** `VITE_FIREBASE_API_KEY` is set correctly (as a Secret) and contains the full API key
+- **Check:** All other `VITE_FIREBASE_*` variables are set with correct values
+- **Check:** After updating variables, rebuild the Space (the frontend needs to be rebuilt)
+- **Check:** Firebase Console → Authentication → Sign-in method → Google is enabled
+- **Check:** The API key matches the one in Firebase Console → Project Settings → Your apps
+- **Solution:** If the error persists, check the build logs to see if the `.env` file is being created correctly
+### OTP Emails Not Sending
+- **Check:** `BREVO_API_KEY` is set correctly (as a Secret)
+- **Check:** `BREVO_SENDER_EMAIL` is verified in Brevo
+- **Check:** `BREVO_SENDER_NAME` is set
+- **Check:** You haven't exceeded Brevo free tier (300 emails/day)
+### Backend Errors
+- **Check:** `FIREBASE_SERVICE_ACCOUNT_JSON` contains the full JSON (all fields)
+- **Check:** `JWT_SECRET_KEY` is set
+- **Check:** `FRONTEND_URL` matches your Space URL exactly
+---
+## Environment Variables Checklist
+### Frontend (Build-time):
+- [ ] `VITE_FIREBASE_API_KEY` (Secret)
+- [ ] `VITE_FIREBASE_AUTH_DOMAIN` (Variable)
+- [ ] `VITE_FIREBASE_PROJECT_ID` (Variable)
+- [ ] `VITE_FIREBASE_STORAGE_BUCKET` (Variable)
+- [ ] `VITE_FIREBASE_MESSAGING_SENDER_ID` (Variable)
+- [ ] `VITE_FIREBASE_APP_ID` (Variable)
+- [ ] `VITE_API_BASE_URL` (Variable)
+### Backend (Runtime):
+- [ ] `FIREBASE_SERVICE_ACCOUNT_JSON` (Secret)
+- [ ] `BREVO_API_KEY` (Secret)
+- [ ] `JWT_SECRET_KEY` (Secret)
+- [ ] `BREVO_SENDER_EMAIL` (Variable)
+- [ ] `BREVO_SENDER_NAME` (Variable)
+- [ ] `FRONTEND_URL` (Variable)
+---
+## Notes
+1. **Build vs Runtime:** Frontend variables (`VITE_*`) are used during Docker build, backend variables are used at runtime.
+2. **Secrets vs Variables:**
+   - Use **Secrets** for sensitive data (API keys, private keys, JWT secrets)
+   - Use **Variables** for non-sensitive configuration (URLs, display names)
+3. **Firebase Service Account JSON:** When copying the JSON, make sure to include the entire content, including the `private_key` field with newlines preserved (they should be `\n` in the JSON string).
+4. **Space URL:** Your Space URL format is `https://{username}-{space-name}.hf.space`. Make sure `FRONTEND_URL` and `VITE_API_BASE_URL` match this exactly.
+---
+## Support
+If you encounter issues:
+1. Check the build logs in Hugging Face Spaces
+2. Verify all environment variables are set correctly
+3. Ensure Firebase and Brevo are properly configured
+4. Review the main setup guide: `FIREBASE_OTP_SETUP.md`

IMPLEMENTATION_COMPLETE.md ADDED Viewed

	@@ -0,0 +1,257 @@

+# ✅ Firebase + OTP Authentication Implementation Complete
+All code changes have been applied successfully! Here are the next steps you need to follow:
+## 📋 Implementation Summary
+### ✅ Backend Changes (Completed)
+- ✅ Updated `requirements.txt` with Firebase Admin SDK
+- ✅ Updated `models.py` - User model now supports Firebase and OTP auth methods
+- ✅ Created `email_validator.py` - Business email validation
+- ✅ Created `firebase_auth.py` - Firebase token verification
+- ✅ Created `brevo_service.py` - Brevo email service for OTP
+- ✅ Created `otp_service.py` - OTP generation and verification
+- ✅ Updated `auth_routes.py` - New endpoints for Firebase and OTP login
+### ✅ Frontend Changes (Completed)
+- ✅ Updated `package.json` with Firebase SDK
+- ✅ Created `config/firebase.js` - Firebase configuration
+- ✅ Updated `services/auth.js` - Firebase and OTP auth functions
+- ✅ Updated `contexts/AuthContext.jsx` - Firebase and OTP support
+- ✅ Created `components/auth/LoginForm.jsx` - Login UI with both options
+- ✅ Updated `App.jsx` - Integrated LoginForm component
+---
+## 🚀 Next Steps (YOU NEED TO DO THESE)
+### Step 1: Install Dependencies
+**Backend:**
+```bash
+cd backend
+pip install -r requirements.txt
+```
+**Frontend:**
+```bash
+cd frontend
+npm install
+```
+---
+### Step 2: Set Up Firebase
+1. **Create Firebase Project:**
+   - Go to https://console.firebase.google.com/
+   - Create a new project or use existing one
+2. **Enable Google Authentication:**
+   - In Firebase Console → Authentication → Sign-in method
+   - Enable "Google" provider
+   - Set project support email
+3. **Get Web App Config:**
+   - Project Settings → Your apps → Add Web app
+   - Copy the config values
+4. **Get Service Account Key:**
+   - Project Settings → Service accounts
+   - Click "Generate new private key"
+   - Download the JSON file
+5. **Set Frontend Environment Variables:**
+   Create `frontend/.env`:
+   ```bash
+   VITE_FIREBASE_API_KEY=your-api-key-here
+   VITE_FIREBASE_AUTH_DOMAIN=your-project.firebaseapp.com
+   VITE_FIREBASE_PROJECT_ID=your-project-id
+   VITE_FIREBASE_STORAGE_BUCKET=your-project.appspot.com
+   VITE_FIREBASE_MESSAGING_SENDER_ID=your-sender-id
+   VITE_FIREBASE_APP_ID=your-app-id
+   VITE_API_BASE_URL=http://localhost:7860
+   ```
+6. **Set Backend Environment Variables:**
+   Option A (JSON file path):
+   ```bash
+   FIREBASE_SERVICE_ACCOUNT_KEY=/path/to/service-account-key.json
+   ```
+   Option B (JSON string - recommended for Docker):
+   ```bash
+   FIREBASE_SERVICE_ACCOUNT_JSON='{"type":"service_account","project_id":"...","private_key":"...","client_email":"..."}'
+   ```
+   (Copy the entire JSON content from the downloaded file)
+---
+### Step 3: Set Up Brevo
+1. **Create Brevo Account:**
+   - Go to https://www.brevo.com/
+   - Sign up (free tier: 300 emails/day)
+2. **Get API Key:**
+   - Settings → API Keys
+   - Generate new API key
+   - Copy the key (starts with `xkeysib-`)
+3. **Verify Sender Email:**
+   - Senders & IP → Senders
+   - Add sender email (e.g., `noreply@yourdomain.com`)
+   - Verify via email
+4. **Set Backend Environment Variables:**
+   ```bash
+   BREVO_API_KEY=xkeysib-your-api-key-here
+   BREVO_SENDER_EMAIL=noreply@yourdomain.com
+   BREVO_SENDER_NAME=EZOFIS AI
+   ```
+---
+### Step 4: Set JWT Secret
+Generate a secure random key:
+```bash
+# Linux/Mac
+openssl rand -hex 32
+# Or Python
+python -c "import secrets; print(secrets.token_hex(32))"
+```
+Set environment variable:
+```bash
+JWT_SECRET_KEY=your-generated-secret-key-here
+```
+---
+### Step 5: Set Frontend URL
+```bash
+FRONTEND_URL=http://localhost:5173  # Development
+# OR
+FRONTEND_URL=https://your-domain.com  # Production
+```
+---
+### Step 6: Database Migration
+**If you have existing data:**
+- The new schema will be created automatically
+- Existing `extractions` table needs `user_id` column
+- You may need to assign existing records to a default user
+**For fresh start (recommended for development):**
+- Delete `data/app.db` (if exists)
+- Restart application - tables will be recreated
+---
+### Step 7: Test the Implementation
+1. **Start Backend:**
+   ```bash
+   cd backend
+   uvicorn app.main:app --reload --port 7860
+   ```
+2. **Start Frontend:**
+   ```bash
+   cd frontend
+   npm run dev
+   ```
+3. **Test Firebase Login:**
+   - Navigate to http://localhost:5173
+   - Click "Google Sign In" tab
+   - Sign in with business Google account
+   - Should redirect to dashboard
+4. **Test OTP Login:**
+   - Click "Email / OTP" tab
+   - Enter business email
+   - Click "Send OTP"
+   - Check email for OTP code
+   - Enter OTP and verify
+   - Should redirect to dashboard
+5. **Test Business Email Validation:**
+   - Try personal Gmail account → Should be blocked
+   - Try OTP with personal email → Should be blocked
+---
+## 📝 Environment Variables Checklist
+### Backend (.env or system environment)
+- [ ] `FIREBASE_SERVICE_ACCOUNT_JSON` or `FIREBASE_SERVICE_ACCOUNT_KEY`
+- [ ] `BREVO_API_KEY`
+- [ ] `BREVO_SENDER_EMAIL`
+- [ ] `BREVO_SENDER_NAME`
+- [ ] `JWT_SECRET_KEY`
+- [ ] `FRONTEND_URL`
+### Frontend (.env)
+- [ ] `VITE_FIREBASE_API_KEY`
+- [ ] `VITE_FIREBASE_AUTH_DOMAIN`
+- [ ] `VITE_FIREBASE_PROJECT_ID`
+- [ ] `VITE_FIREBASE_STORAGE_BUCKET`
+- [ ] `VITE_FIREBASE_MESSAGING_SENDER_ID`
+- [ ] `VITE_FIREBASE_APP_ID`
+- [ ] `VITE_API_BASE_URL`
+---
+## 🔒 Security Reminders
+1. ✅ Never commit API keys or secrets to git
+2. ✅ Use `.env` files (add to `.gitignore`)
+3. ✅ Business email validation is enforced on both frontend and backend
+4. ✅ JWT tokens expire after 7 days
+5. ✅ OTP codes expire after 10 minutes
+6. ✅ Maximum 5 OTP verification attempts
+---
+## 📚 Documentation
+- **Firebase Setup:** See `FIREBASE_OTP_SETUP.md` for detailed instructions
+- **Brevo API:** https://developers.brevo.com/reference/sendtransacemail
+---
+## ⚠️ Important Notes
+1. **Database Schema Change:**
+   - User model changed from `google_id` (required) to `firebase_uid` (optional)
+   - If you have existing users, you'll need to migrate the data
+   - For development, deleting `data/app.db` is the easiest option
+2. **Business Email Validation:**
+   - Personal email domains are blocked (Gmail, Yahoo, Outlook, etc.)
+   - Validation happens on both frontend and backend
+   - Users must use their work/corporate email addresses
+3. **OTP Storage:**
+   - Currently stored in memory (works for single server)
+   - For production with multiple servers, consider using Redis
+---
+## 🎉 You're All Set!
+Once you complete the setup steps above, your application will have:
+- ✅ Firebase Google Sign-in (no OAuth credentials needed!)
+- ✅ Email/OTP authentication via Brevo
+- ✅ Business email validation
+- ✅ User-specific data isolation
+- ✅ Secure JWT token authentication
+Good luck! 🚀

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
----
-title: EZOFISAIOCR
-emoji: 🌍
-colorFrom: blue
-colorTo: purple
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: EZOFISAIOCR
+emoji: 🌍
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

backend/app/api_key_auth.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import os
+import secrets
+import hashlib
+from datetime import datetime
+from typing import Optional
+from fastapi import Depends, HTTPException, status, Header
+from sqlalchemy.orm import Session
+from .db import SessionLocal
+from .models import APIKey, User
+def get_db():
+    """Database dependency."""
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+def generate_api_key() -> str:
+    """
+    Generate a secure API key.
+    Format: sk_live_<random_64_char_hex>
+    """
+    random_bytes = secrets.token_bytes(32)
+    random_hex = random_bytes.hex()
+    return f"sk_live_{random_hex}"
+def hash_api_key(key: str) -> str:
+    """Hash an API key using SHA-256."""
+    return hashlib.sha256(key.encode()).hexdigest()
+def verify_api_key(key: str, key_hash: str) -> bool:
+    """Verify an API key against its hash."""
+    return hash_api_key(key) == key_hash
+def get_api_key_prefix(key: str) -> str:
+    """Get the prefix of an API key for display purposes."""
+    return key[:12] + "..." if len(key) > 12 else key
+async def get_user_from_api_key(
+    api_key: Optional[str] = Header(None, alias="X-API-Key"),
+    db: Session = Depends(get_db)
+) -> Optional[User]:
+    """
+    Authenticate user from API key header.
+    Returns User if valid, None if no API key provided.
+    Raises HTTPException if API key is invalid.
+    """
+    if not api_key:
+        return None
+    # Hash the provided key
+    key_hash = hash_api_key(api_key)
+    # Find the API key in database
+    api_key_record = (
+        db.query(APIKey)
+        .filter(APIKey.key_hash == key_hash)
+        .filter(APIKey.is_active == True)
+        .first()
+    )
+    if not api_key_record:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid API key",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    # Update last used timestamp
+    api_key_record.last_used_at = datetime.utcnow()
+    db.commit()
+    # Get the user
+    user = db.query(User).filter(User.id == api_key_record.user_id).first()
+    if not user:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="User not found for API key",
+        )
+    return user
+async def get_current_user_or_api_key(
+    api_key_user: Optional[User] = Depends(get_user_from_api_key),
+    # JWT auth will be handled separately in main.py
+) -> Optional[User]:
+    """
+    Dependency that returns user from API key if provided, otherwise None.
+    This allows endpoints to support both JWT and API key authentication.
+    """
+    return api_key_user

backend/app/apollo_service.py ADDED Viewed

	@@ -0,0 +1,444 @@

+"""
+Apollo.io API service for creating contacts, enriching contact data, and adding them to sequences.
+Reference:
+- Create contact: https://docs.apollo.io/reference/create-a-contact
+- Add to sequence: https://docs.apollo.io/reference/add-contacts-to-sequence
+- Enrich person: https://docs.apollo.io/reference/enrich-people-data
+"""
+import os
+import httpx
+from typing import Optional, Dict, Any
+APOLLO_API_KEY = os.environ.get("APOLLO_API_KEY", "")
+APOLLO_API_URL = "https://api.apollo.io/api/v1"
+APOLLO_TRIAL_LIST_NAME = "VPR TRIAL LEADS"
+# Allow list ID to be set directly via environment variable (more reliable than lookup)
+APOLLO_TRIAL_LIST_ID = os.environ.get("APOLLO_TRIAL_LIST_ID", None)
+# Sequence ID for adding contacts to email sequences (preferred over lists)
+APOLLO_TRIAL_SEQUENCE_ID = os.environ.get("APOLLO_TRIAL_SEQUENCE_ID", None)
+async def get_list_id(list_name: Optional[str] = None) -> Optional[str]:
+    """
+    Get Apollo list ID. First tries environment variable, then attempts API lookup.
+    Args:
+        list_name: Name of the list (for lookup if env var not set)
+    Returns:
+        List ID as string if found, None otherwise
+    """
+    # First, try to use the list ID from environment variable (most reliable)
+    if APOLLO_TRIAL_LIST_ID:
+        # Apollo list IDs are typically hexadecimal strings (MongoDB ObjectIds)
+        # Accept them as strings, just strip whitespace
+        list_id = str(APOLLO_TRIAL_LIST_ID).strip()
+        if list_id:
+            print(f"[INFO] Using Apollo list ID from environment variable: {list_id}")
+            return list_id
+        else:
+            print(f"[WARNING] APOLLO_TRIAL_LIST_ID is empty")
+    # If no env var, try to look up by name (this may not work if API endpoint is different)
+    if not list_name or not APOLLO_API_KEY:
+        return None
+    # Note: The /lists endpoint may not be available in all Apollo API versions
+    # Try alternative: search for lists using a different endpoint
+    try:
+        async with httpx.AsyncClient() as client:
+            # Try the lists endpoint (may return 404 in some API versions)
+            response = await client.get(
+                f"{APOLLO_API_URL}/lists",
+                headers={
+                    "Content-Type": "application/json",
+                    "Cache-Control": "no-cache",
+                    "X-Api-Key": APOLLO_API_KEY
+                },
+                timeout=10.0
+            )
+            if response.status_code == 200:
+                data = response.json()
+                lists = data.get("lists", [])
+                for list_item in lists:
+                    if list_item.get("name") == list_name:
+                        list_id = list_item.get("id")
+                        print(f"[INFO] Found Apollo list '{list_name}' with ID: {list_id}")
+                        # Return as string (Apollo IDs are typically hex strings)
+                        return str(list_id) if list_id else None
+                print(f"[WARNING] Apollo list '{list_name}' not found in available lists")
+            else:
+                print(f"[WARNING] Apollo lists endpoint returned {response.status_code}, cannot lookup list by name")
+    except Exception as e:
+        print(f"[WARNING] Failed to fetch Apollo list ID: {str(e)}")
+    return None
+async def add_contact_to_sequence(contact_id: str, sequence_id: str) -> bool:
+    """
+    Add a contact to an Apollo.io email sequence.
+    Args:
+        contact_id: The Apollo contact ID
+        sequence_id: The Apollo sequence ID
+    Returns:
+        True if contact was successfully added to sequence, False otherwise
+    """
+    if not APOLLO_API_KEY:
+        print("[WARNING] APOLLO_API_KEY not set, skipping sequence enrollment")
+        return False
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                f"{APOLLO_API_URL}/sequence_contacts",
+                headers={
+                    "Content-Type": "application/json",
+                    "Cache-Control": "no-cache",
+                    "X-Api-Key": APOLLO_API_KEY
+                },
+                json={
+                    "sequence_id": sequence_id,
+                    "contact_id": contact_id
+                },
+                timeout=10.0
+            )
+            if response.status_code in [200, 201]:
+                print(f"[INFO] Successfully added contact {contact_id} to sequence {sequence_id}")
+                return True
+            else:
+                error_data = response.text
+                print(f"[ERROR] Failed to add contact to sequence: {response.status_code} - {error_data}")
+                return False
+    except httpx.HTTPStatusError as e:
+        print(f"[ERROR] Apollo API HTTP error adding to sequence: {e.response.status_code} - {e.response.text}")
+        return False
+    except Exception as e:
+        print(f"[ERROR] Failed to add contact to sequence: {str(e)}")
+        return False
+async def create_apollo_contact(
+    email: str,
+    first_name: Optional[str] = None,
+    last_name: Optional[str] = None,
+    organization_name: Optional[str] = None,
+    title: Optional[str] = None,
+    list_name: Optional[str] = None,
+    sequence_id: Optional[str] = None
+) -> bool:
+    """
+    Create a contact in Apollo.io and optionally add to a sequence or list.
+    Args:
+        email: Contact email address (required)
+        first_name: Contact first name
+        last_name: Contact last name
+        organization_name: Organization name
+        title: Job title
+        list_name: Name of the list to add contact to (defaults to APOLLO_TRIAL_LIST_NAME)
+        sequence_id: ID of the sequence to add contact to (preferred over list)
+    Returns:
+        True if contact created successfully, False otherwise
+    Raises:
+        ValueError: If APOLLO_API_KEY is not set
+    """
+    if not APOLLO_API_KEY:
+        print("[WARNING] APOLLO_API_KEY not set, skipping Apollo contact creation")
+        return False
+    # Use default list name if not provided
+    if list_name is None:
+        list_name = APOLLO_TRIAL_LIST_NAME
+    # Parse name if full name is provided but first/last are not
+    if not first_name and not last_name:
+        # Try to extract from email or use email prefix
+        email_prefix = email.split('@')[0]
+        if '.' in email_prefix:
+            parts = email_prefix.split('.')
+            first_name = parts[0].capitalize() if parts else None
+            last_name = parts[1].capitalize() if len(parts) > 1 else None
+        else:
+            first_name = email_prefix.capitalize()
+    # Extract organization domain from email
+    organization_domain = None
+    if '@' in email:
+        organization_domain = email.split('@')[1]
+    # Prepare contact data
+    contact_data: Dict[str, Any] = {
+        "email": email.lower(),
+        "run_dedupe": True  # Prevent duplicate contacts
+    }
+    if first_name:
+        contact_data["first_name"] = first_name
+    if last_name:
+        contact_data["last_name"] = last_name
+    if organization_name:
+        contact_data["organization_name"] = organization_name
+    if organization_domain:
+        contact_data["organization_domain"] = organization_domain
+    if title:
+        contact_data["title"] = title
+    try:
+        async with httpx.AsyncClient() as client:
+            # Get the list ID if list_name is provided
+            list_ids = []
+            target_list_id = None  # Store for later use
+            if list_name:
+                list_id = await get_list_id(list_name)
+                if list_id:
+                    target_list_id = list_id  # Store for verification later
+                    # Apollo API accepts list_ids as an array of strings (hex IDs)
+                    list_ids = [str(list_id)]
+                    contact_data["list_ids"] = list_ids
+                    print(f"[INFO] Adding contact to list ID: {list_id}")
+                else:
+                    print(f"[WARNING] Could not find list '{list_name}'. Set APOLLO_TRIAL_LIST_ID environment variable with the list ID, or create contact without list assignment")
+            # Log the payload being sent (for debugging)
+            print(f"[DEBUG] Creating Apollo contact with payload: {contact_data}")
+            # Create the contact
+            response = await client.post(
+                f"{APOLLO_API_URL}/contacts",
+                headers={
+                    "Content-Type": "application/json",
+                    "Cache-Control": "no-cache",
+                    "X-Api-Key": APOLLO_API_KEY
+                },
+                json=contact_data,
+                timeout=10.0
+            )
+            # Log the full response for debugging
+            print(f"[DEBUG] Apollo API response status: {response.status_code}")
+            try:
+                response_json = response.json()
+                print(f"[DEBUG] Apollo API response (full): {response_json}")
+            except:
+                print(f"[DEBUG] Apollo API response body (text): {response.text[:1000]}")  # First 1000 chars
+            if response.status_code == 200 or response.status_code == 201:
+                result = response.json()
+                contact = result.get("contact", {})
+                contact_id = contact.get("id")
+                print(f"[INFO] Successfully created Apollo contact: {email} (ID: {contact_id})")
+                # Priority: Add to sequence if sequence_id is provided (this is supported by API)
+                target_sequence_id = sequence_id or APOLLO_TRIAL_SEQUENCE_ID
+                if contact_id and target_sequence_id:
+                    print(f"[INFO] Adding contact to sequence: {target_sequence_id}")
+                    sequence_success = await add_contact_to_sequence(contact_id, target_sequence_id)
+                    if sequence_success:
+                        print(f"[INFO] ✓ Contact successfully enrolled in sequence")
+                    else:
+                        print(f"[WARNING] Failed to add contact to sequence, but contact was created")
+                # Fallback: Try to add to list (API limitation - may not work)
+                if list_ids and contact_id and target_list_id and not target_sequence_id:
+                    print(f"[INFO] Contact created with list_ids parameter: {list_ids}")
+                    print(f"[INFO] ⚠️  Apollo.io API Limitation: The API does not return list_ids in responses,")
+                    print(f"[INFO]    so we cannot verify if the contact was added to the list via API.")
+                    print(f"[INFO]    Please verify manually in Apollo.io that contact '{email}' is in list '{list_name or target_list_id}'")
+                    print(f"[INFO]    Consider using sequences instead (APOLLO_TRIAL_SEQUENCE_ID) for better API support.")
+                return True
+            else:
+                error_data = response.text
+                print(f"[ERROR] Failed to create Apollo contact: {response.status_code} - {error_data}")
+                return False
+    except httpx.HTTPStatusError as e:
+        print(f"[ERROR] Apollo API HTTP error: {e.response.status_code} - {e.response.text}")
+        return False
+    except Exception as e:
+        print(f"[ERROR] Failed to create Apollo contact: {str(e)}")
+        return False
+async def enrich_contact_by_email(email: str) -> Optional[Dict[str, Any]]:
+    """
+    Enrich contact data from Apollo.io using email address.
+    Args:
+        email: Contact email address
+    Returns:
+        Dictionary with enriched contact data, or None if not found
+    """
+    if not APOLLO_API_KEY:
+        print("[WARNING] APOLLO_API_KEY not set, skipping Apollo enrichment")
+        return None
+    try:
+        async with httpx.AsyncClient() as client:
+            # Try people/match endpoint first (for exact email match)
+            print(f"[DEBUG] Attempting Apollo.io enrichment for {email} via /people/match endpoint")
+            response = await client.post(
+                f"{APOLLO_API_URL}/people/match",
+                headers={
+                    "Content-Type": "application/json",
+                    "Cache-Control": "no-cache",
+                    "X-Api-Key": APOLLO_API_KEY
+                },
+                json={
+                    "email": email.lower()
+                    # Note: reveal_phone_number requires webhook_url, so we skip it for now
+                },
+                timeout=10.0
+            )
+            print(f"[DEBUG] Apollo.io /people/match response status: {response.status_code}")
+            if response.status_code == 200:
+                data = response.json()
+                print(f"[DEBUG] Apollo.io /people/match response data keys: {list(data.keys())}")
+                person = data.get("person", {})
+                if person:
+                    print(f"[DEBUG] Found person data in Apollo.io response")
+                    # Extract enriched data
+                    enriched_data = {
+                        "first_name": person.get("first_name"),
+                        "last_name": person.get("last_name"),
+                        "title": person.get("title"),
+                        "phone_number": person.get("phone_numbers", [{}])[0].get("raw_number") if person.get("phone_numbers") else None,
+                        "linkedin_url": person.get("linkedin_url"),
+                        "headline": person.get("headline"),
+                        "organization_name": person.get("organization", {}).get("name") if person.get("organization") else None,
+                        "organization_website": person.get("organization", {}).get("website_url") if person.get("organization") else None,
+                        "organization_address": None,  # May need to parse from organization data
+                    }
+                    # Try to get organization address
+                    if person.get("organization"):
+                        org = person.get("organization", {})
+                        address_parts = []
+                        if org.get("street_address"):
+                            address_parts.append(org.get("street_address"))
+                        if org.get("city"):
+                            address_parts.append(org.get("city"))
+                        if org.get("state"):
+                            address_parts.append(org.get("state"))
+                        if org.get("postal_code"):
+                            address_parts.append(org.get("postal_code"))
+                        if org.get("country"):
+                            address_parts.append(org.get("country"))
+                        if address_parts:
+                            enriched_data["organization_address"] = ", ".join(address_parts)
+                    print(f"[INFO] Successfully enriched contact data for {email} from Apollo.io")
+                    return enriched_data
+                else:
+                    print(f"[DEBUG] Apollo.io /people/match returned 200 but no person data found")
+            elif response.status_code == 404:
+                print(f"[DEBUG] Apollo.io /people/match returned 404 - contact not found in database")
+            elif response.status_code == 401:
+                print(f"[ERROR] Apollo.io API authentication failed - check your API key")
+                try:
+                    error_data = response.json()
+                    print(f"[ERROR] Apollo.io error details: {error_data}")
+                except:
+                    print(f"[ERROR] Apollo.io error response: {response.text}")
+            else:
+                print(f"[DEBUG] Apollo.io /people/match returned status {response.status_code}")
+                try:
+                    error_data = response.json()
+                    print(f"[DEBUG] Apollo.io response: {error_data}")
+                except:
+                    print(f"[DEBUG] Apollo.io response text: {response.text[:500]}")
+            # If match fails, try the new search endpoint (api_search)
+            print(f"[DEBUG] Attempting Apollo.io enrichment for {email} via /mixed_people/api_search endpoint")
+            search_response = await client.post(
+                f"{APOLLO_API_URL}/mixed_people/api_search",
+                headers={
+                    "Content-Type": "application/json",
+                    "Cache-Control": "no-cache",
+                    "X-Api-Key": APOLLO_API_KEY
+                },
+                json={
+                    "email": email.lower(),
+                    "per_page": 1
+                },
+                timeout=10.0
+            )
+            print(f"[DEBUG] Apollo.io /mixed_people/api_search response status: {search_response.status_code}")
+            if search_response.status_code == 200:
+                search_data = search_response.json()
+                print(f"[DEBUG] Apollo.io /mixed_people/api_search response data keys: {list(search_data.keys())}")
+                people = search_data.get("people", [])
+                print(f"[DEBUG] Found {len(people)} people in search results")
+                if people:
+                    person = people[0]
+                    # Extract enriched data (same structure as above)
+                    enriched_data = {
+                        "first_name": person.get("first_name"),
+                        "last_name": person.get("last_name"),
+                        "title": person.get("title"),
+                        "phone_number": person.get("phone_numbers", [{}])[0].get("raw_number") if person.get("phone_numbers") else None,
+                        "linkedin_url": person.get("linkedin_url"),
+                        "headline": person.get("headline"),
+                        "organization_name": person.get("organization", {}).get("name") if person.get("organization") else None,
+                        "organization_website": person.get("organization", {}).get("website_url") if person.get("organization") else None,
+                        "organization_address": None,
+                    }
+                    if person.get("organization"):
+                        org = person.get("organization", {})
+                        address_parts = []
+                        if org.get("street_address"):
+                            address_parts.append(org.get("street_address"))
+                        if org.get("city"):
+                            address_parts.append(org.get("city"))
+                        if org.get("state"):
+                            address_parts.append(org.get("state"))
+                        if org.get("postal_code"):
+                            address_parts.append(org.get("postal_code"))
+                        if org.get("country"):
+                            address_parts.append(org.get("country"))
+                        if address_parts:
+                            enriched_data["organization_address"] = ", ".join(address_parts)
+                    print(f"[INFO] Successfully enriched contact data for {email} from Apollo.io (via search)")
+                    return enriched_data
+                else:
+                    print(f"[DEBUG] Apollo.io /mixed_people/api_search returned 200 but no people in results")
+            elif search_response.status_code == 404:
+                print(f"[DEBUG] Apollo.io /mixed_people/api_search returned 404 - contact not found")
+            elif search_response.status_code == 401:
+                print(f"[ERROR] Apollo.io API authentication failed on search - check your API key")
+                try:
+                    error_data = search_response.json()
+                    print(f"[ERROR] Apollo.io search error details: {error_data}")
+                except:
+                    print(f"[ERROR] Apollo.io search error response: {search_response.text}")
+            else:
+                print(f"[DEBUG] Apollo.io /mixed_people/api_search returned status {search_response.status_code}")
+                try:
+                    error_data = search_response.json()
+                    print(f"[DEBUG] Apollo.io search response: {error_data}")
+                except:
+                    print(f"[DEBUG] Apollo.io search response text: {search_response.text[:500]}")
+            print(f"[INFO] No contact data found in Apollo.io for {email} - contact may not exist in Apollo's database")
+            return None
+    except httpx.HTTPStatusError as e:
+        print(f"[ERROR] Apollo API HTTP error during enrichment: {e.response.status_code} - {e.response.text}")
+        return None
+    except Exception as e:
+        print(f"[ERROR] Failed to enrich contact from Apollo.io: {str(e)}")
+        return None

backend/app/auth.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import os
+import jwt
+from datetime import datetime, timedelta
+from typing import Optional
+from fastapi import Depends, HTTPException, status
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from sqlalchemy.orm import Session
+from .db import SessionLocal
+from .models import User
+# JWT Configuration
+SECRET_KEY = os.environ.get("JWT_SECRET_KEY", "your-secret-key-change-in-production")
+ALGORITHM = "HS256"
+ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7  # 7 days
+security = HTTPBearer()
+def get_db():
+    """Database dependency."""
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
+    """Create a JWT access token."""
+    to_encode = data.copy()
+    # Ensure 'sub' (subject) is a string, not an integer
+    if "sub" in to_encode:
+        to_encode["sub"] = str(to_encode["sub"])
+    if expires_delta:
+        expire = datetime.utcnow() + expires_delta
+    else:
+        expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
+    to_encode.update({"exp": expire})
+    encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
+    return encoded_jwt
+def verify_token(token: str) -> dict:
+    """Verify and decode a JWT token."""
+    try:
+        payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
+        return payload
+    except jwt.ExpiredSignatureError:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Token has expired",
+        )
+    except jwt.InvalidTokenError:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Could not validate credentials",
+        )
+def get_current_user(
+    credentials: HTTPAuthorizationCredentials = Depends(security),
+    db: Session = Depends(get_db)
+) -> User:
+    """Get the current authenticated user from JWT token."""
+    token = credentials.credentials
+    payload = verify_token(token)
+    user_id: int = payload.get("sub")
+    if user_id is None:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Could not validate credentials",
+        )
+    # Convert user_id back to integer for database query
+    try:
+        user_id_int = int(user_id)
+    except (ValueError, TypeError):
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid user ID in token",
+        )
+    user = db.query(User).filter(User.id == user_id_int).first()
+    if user is None:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="User not found",
+        )
+    return user

backend/app/auth_routes.py ADDED Viewed

	@@ -0,0 +1,347 @@

+import os
+from fastapi import APIRouter, Depends, HTTPException, Body
+from pydantic import BaseModel, EmailStr
+from sqlalchemy.orm import Session
+from .models import User, APIKey
+from .auth import create_access_token, get_current_user
+from .firebase_auth import verify_firebase_token
+from .otp_service import request_otp, verify_otp
+from .email_validator import validate_business_email, is_business_email
+from .api_key_auth import generate_api_key, hash_api_key, get_api_key_prefix
+from .db import SessionLocal
+def get_db():
+    """Database dependency."""
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+router = APIRouter()
+class FirebaseLoginRequest(BaseModel):
+    id_token: str
+class OTPRequestRequest(BaseModel):
+    email: EmailStr
+class OTPVerifyRequest(BaseModel):
+    email: EmailStr
+    otp: str
+class CreateAPIKeyRequest(BaseModel):
+    name: str  # User-friendly name for the API key
+@router.post("/api/auth/firebase/login")
+async def firebase_login(
+    request: FirebaseLoginRequest,
+    db: Session = Depends(get_db)
+):
+    """
+    Login with Firebase ID token.
+    Validates business email and creates/updates user.
+    """
+    try:
+        # Verify Firebase token
+        user_info = await verify_firebase_token(request.id_token)
+        email = user_info.get('email')
+        if not email:
+            raise HTTPException(status_code=400, detail="Email not found in Firebase token")
+        # Validate business email
+        if not is_business_email(email):
+            raise HTTPException(
+                status_code=400,
+                detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
+            )
+        # Get or create user
+        user = db.query(User).filter(
+            (User.email == email.lower()) | (User.firebase_uid == user_info['uid'])
+        ).first()
+        if not user:
+            user = User(
+                email=email.lower(),
+                name=user_info.get('name'),
+                picture=user_info.get('picture'),
+                firebase_uid=user_info['uid'],
+                auth_method='firebase',
+                email_verified=True
+            )
+            db.add(user)
+            db.commit()
+            db.refresh(user)
+            print(f"[INFO] New user created via Firebase: {email}")
+            # Enrich contact data from Apollo.io and update Brevo + Monday.com
+            try:
+                from .apollo_service import enrich_contact_by_email
+                from .brevo_service import create_brevo_contact, BREVO_TRIAL_LIST_ID
+                from .monday_service import create_monday_lead
+                # Enrich contact data from Apollo.io
+                enriched_data = await enrich_contact_by_email(email)
+                # Use enriched data if available, otherwise use basic data
+                first_name = enriched_data.get("first_name") if enriched_data else None
+                last_name = enriched_data.get("last_name") if enriched_data else None
+                org_name = enriched_data.get("organization_name") if enriched_data else None
+                # Fallback to Firebase data if Apollo didn't provide it
+                if not first_name or not last_name:
+                    full_name = user_info.get('name', '')
+                    if full_name:
+                        name_parts = full_name.strip().split(' ', 1)
+                        first_name = first_name or (name_parts[0] if name_parts else None)
+                        last_name = last_name or (name_parts[1] if len(name_parts) > 1 else None)
+                if not org_name:
+                    org_domain = email.split('@')[1] if '@' in email else None
+                    org_name = org_domain.split('.')[0].capitalize() if org_domain else None
+                # Update Brevo contact with enriched data
+                await create_brevo_contact(
+                    email=email,
+                    first_name=first_name,
+                    last_name=last_name,
+                    organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
+                    phone_number=enriched_data.get("phone_number") if enriched_data else None,
+                    linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
+                    title=enriched_data.get("title") if enriched_data else None,
+                    headline=enriched_data.get("headline") if enriched_data else None,
+                    organization_website=enriched_data.get("organization_website") if enriched_data else None,
+                    organization_address=enriched_data.get("organization_address") if enriched_data else None,
+                    list_id=BREVO_TRIAL_LIST_ID
+                )
+                # Create lead in Monday.com
+                await create_monday_lead(
+                    email=email,
+                    first_name=first_name,
+                    last_name=last_name,
+                    phone_number=enriched_data.get("phone_number") if enriched_data else None,
+                    linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
+                    title=enriched_data.get("title") if enriched_data else None,
+                    headline=enriched_data.get("headline") if enriched_data else None,
+                    organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
+                    organization_website=enriched_data.get("organization_website") if enriched_data else None,
+                    organization_address=enriched_data.get("organization_address") if enriched_data else None,
+                )
+            except Exception as e:
+                # Don't fail user creation if integrations fail
+                print(f"[WARNING] Failed to enrich/update contact for {email}: {str(e)}")
+        else:
+            # Update user info
+            user.firebase_uid = user_info['uid']
+            user.email_verified = True
+            user.name = user_info.get('name', user.name)
+            user.picture = user_info.get('picture', user.picture)
+            if user.auth_method != 'firebase':
+                user.auth_method = 'firebase'
+            db.commit()
+            print(f"[INFO] User logged in via Firebase: {email}")
+        # Generate JWT token
+        token = create_access_token(data={"sub": user.id})
+        return {
+            "token": token,
+            "user": {
+                "id": user.id,
+                "email": user.email,
+                "name": user.name,
+                "picture": user.picture,
+                "auth_method": user.auth_method
+            }
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"[ERROR] Firebase login failed: {str(e)}")
+        raise HTTPException(status_code=400, detail=f"Authentication failed: {str(e)}")
+@router.post("/api/auth/otp/request")
+async def request_otp_endpoint(
+    request: OTPRequestRequest,
+    db: Session = Depends(get_db)
+):
+    """
+    Request OTP for email login.
+    Validates business email before sending OTP.
+    """
+    try:
+        # Validate business email
+        validate_business_email(request.email)
+        # Request OTP
+        result = await request_otp(request.email, db)
+        return result
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"[ERROR] OTP request failed: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Failed to send OTP: {str(e)}")
+@router.post("/api/auth/otp/verify")
+async def verify_otp_endpoint(
+    request: OTPVerifyRequest,
+    db: Session = Depends(get_db)
+):
+    """
+    Verify OTP and login.
+    Validates business email and OTP code.
+    """
+    try:
+        # Validate business email
+        validate_business_email(request.email)
+        # Verify OTP
+        user = await verify_otp(request.email, request.otp, db)
+        # Generate JWT token
+        token = create_access_token(data={"sub": user.id})
+        return {
+            "token": token,
+            "user": {
+                "id": user.id,
+                "email": user.email,
+                "name": user.name,
+                "picture": user.picture,
+                "auth_method": user.auth_method
+            }
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"[ERROR] OTP verification failed: {str(e)}")
+        raise HTTPException(status_code=400, detail=f"OTP verification failed: {str(e)}")
+@router.get("/api/auth/me")
+async def get_current_user_info(current_user: User = Depends(get_current_user)):
+    """Get current user information."""
+    return {
+        "id": current_user.id,
+        "email": current_user.email,
+        "name": current_user.name,
+        "picture": current_user.picture,
+        "auth_method": current_user.auth_method,
+    }
+@router.post("/api/auth/api-key/create")
+async def create_api_key(
+    request: CreateAPIKeyRequest,
+    current_user: User = Depends(get_current_user),
+    db: Session = Depends(get_db)
+):
+    """
+    Create a new API key for the current user.
+    Returns the API key (only shown once - store it securely!).
+    """
+    if not request.name or not request.name.strip():
+        raise HTTPException(status_code=400, detail="API key name is required")
+    # Generate new API key
+    api_key = generate_api_key()
+    key_hash = hash_api_key(api_key)
+    key_prefix = get_api_key_prefix(api_key)
+    # Create API key record
+    api_key_record = APIKey(
+        user_id=current_user.id,
+        name=request.name.strip(),
+        key_hash=key_hash,
+        key_prefix=key_prefix,
+        is_active=True
+    )
+    db.add(api_key_record)
+    db.commit()
+    db.refresh(api_key_record)
+    print(f"[INFO] API key created for user {current_user.email}: {key_prefix}")
+    return {
+        "success": True,
+        "api_key": api_key,  # Only returned once - user must save this!
+        "key_id": api_key_record.id,
+        "key_prefix": key_prefix,
+        "name": api_key_record.name,
+        "created_at": api_key_record.created_at.isoformat() if api_key_record.created_at else None,
+        "message": "API key created successfully. Store this key securely - it will not be shown again!"
+    }
+@router.get("/api/auth/api-keys")
+async def list_api_keys(
+    current_user: User = Depends(get_current_user),
+    db: Session = Depends(get_db)
+):
+    """
+    List all API keys for the current user.
+    Only shows key prefix, not the full key for security.
+    """
+    api_keys = (
+        db.query(APIKey)
+        .filter(APIKey.user_id == current_user.id)
+        .order_by(APIKey.created_at.desc())
+        .all()
+    )
+    return {
+        "success": True,
+        "api_keys": [
+            {
+                "id": key.id,
+                "name": key.name,
+                "key_prefix": key.key_prefix,
+                "is_active": key.is_active,
+                "last_used_at": key.last_used_at.isoformat() if key.last_used_at else None,
+                "created_at": key.created_at.isoformat() if key.created_at else None,
+            }
+            for key in api_keys
+        ]
+    }
+@router.delete("/api/auth/api-key/{key_id}")
+async def delete_api_key(
+    key_id: int,
+    current_user: User = Depends(get_current_user),
+    db: Session = Depends(get_db)
+):
+    """
+    Delete (deactivate) an API key.
+    """
+    api_key = (
+        db.query(APIKey)
+        .filter(APIKey.id == key_id)
+        .filter(APIKey.user_id == current_user.id)
+        .first()
+    )
+    if not api_key:
+        raise HTTPException(status_code=404, detail="API key not found")
+    # Soft delete by deactivating
+    api_key.is_active = False
+    db.commit()
+    print(f"[INFO] API key {api_key.key_prefix} deactivated for user {current_user.email}")
+    return {
+        "success": True,
+        "message": "API key deactivated successfully"
+    }

backend/app/brevo_service.py ADDED Viewed

	@@ -0,0 +1,486 @@

+"""
+Brevo (formerly Sendinblue) email service for sending transactional emails.
+Reference: https://developers.brevo.com/reference/sendtransacemail
+"""
+import os
+import httpx
+from typing import Optional, Dict, Any
+from difflib import SequenceMatcher
+BREVO_API_KEY = os.environ.get("BREVO_API_KEY", "")
+BREVO_API_URL = "https://api.brevo.com/v3/smtp/email"
+BREVO_SENDER_EMAIL = os.environ.get("BREVO_SENDER_EMAIL", "noreply@yourdomain.com")
+BREVO_SENDER_NAME = os.environ.get("BREVO_SENDER_NAME", "EZOFIS AI")
+BREVO_TRIAL_LIST_ID = int(os.environ.get("BREVO_TRIAL_LIST_ID", "5"))  # Default to 5 for "VRP Trials"
+# Brevo standard attribute names mapping
+BREVO_ATTRIBUTE_MAP = {
+    "first_name": "FIRSTNAME",
+    "last_name": "LASTNAME",
+    "organization_name": "COMPANY",
+    "phone_number": "SMS",
+    "linkedin_url": "LINKEDIN",
+    "title": "JOB_TITLE",
+    "headline": "HEADLINE",
+    "organization_website": "WEBSITE",
+    "organization_address": "ADDRESS",
+    # Common variations
+    "firstname": "FIRSTNAME",
+    "fname": "FIRSTNAME",
+    "given_name": "FIRSTNAME",
+    "lastname": "LASTNAME",
+    "lname": "LASTNAME",
+    "surname": "LASTNAME",
+    "family_name": "LASTNAME",
+    "company": "COMPANY",
+    "org": "COMPANY",
+    "organization": "COMPANY",
+    "phone": "SMS",
+    "mobile": "SMS",
+    "telephone": "SMS",
+    "linkedin": "LINKEDIN",
+    "linkedin_profile": "LINKEDIN",
+    "job_title": "JOB_TITLE",
+    "position": "JOB_TITLE",
+    "role": "JOB_TITLE",
+    "website": "WEBSITE",
+    "url": "WEBSITE",
+    "web": "WEBSITE",
+    "address": "ADDRESS",
+    "location": "ADDRESS",
+}
+def _get_brevo_attribute_name(field_name: str) -> Optional[str]:
+    """
+    Get Brevo attribute name for a given field name using semantic matching.
+    Args:
+        field_name: Field name (e.g., "first_name", "email")
+    Returns:
+        Brevo attribute name (e.g., "FIRSTNAME") or None if not found
+    """
+    # Normalize field name
+    normalized = field_name.lower().replace("_", "").replace("-", "")
+    # Direct lookup first
+    if field_name.lower() in BREVO_ATTRIBUTE_MAP:
+        return BREVO_ATTRIBUTE_MAP[field_name.lower()]
+    if normalized in BREVO_ATTRIBUTE_MAP:
+        return BREVO_ATTRIBUTE_MAP[normalized]
+    # Semantic matching using similarity
+    best_match = None
+    best_score = 0.0
+    for key, value in BREVO_ATTRIBUTE_MAP.items():
+        score = SequenceMatcher(None, normalized, key.lower()).ratio()
+        if score > best_score:
+            best_score = score
+            best_match = value
+    # Only return if similarity is high enough
+    if best_score >= 0.6:
+        return best_match
+    return None
+async def send_otp_email(email: str, otp: str) -> bool:
+    """
+    Send OTP email using Brevo transactional email API.
+    Args:
+        email: Recipient email address
+        otp: One-time password code
+    Returns:
+        True if email sent successfully
+    Raises:
+        ValueError: If BREVO_API_KEY is not set
+        Exception: If email sending fails
+    """
+    if not BREVO_API_KEY:
+        raise ValueError("BREVO_API_KEY environment variable is not set")
+    # Brevo API payload structure
+    payload = {
+        "sender": {
+            "name": BREVO_SENDER_NAME,
+            "email": BREVO_SENDER_EMAIL
+        },
+        "to": [
+            {
+                "email": email
+            }
+        ],
+        "subject": "Your OTP Code for EZOFIS AI",
+        "htmlContent": f"""
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <meta charset="utf-8">
+            <meta name="viewport" content="width=device-width, initial-scale=1.0">
+            <style>
+                body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; line-height: 1.6; color: #333; margin: 0; padding: 0; background-color: #f4f4f4; }}
+                .container {{ max-width: 600px; margin: 20px auto; background: white; border-radius: 10px; overflow: hidden; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }}
+                .content {{ padding: 40px 30px; }}
+                .content p {{ margin: 0 0 15px 0; color: #555; }}
+                .otp-box {{ background: #f8f9fa; border: 2px dashed #667eea; padding: 30px; text-align: center; margin: 30px 0; border-radius: 8px; }}
+                .otp-label {{ font-size: 14px; color: #666; margin-bottom: 10px; }}
+                .otp-code {{ font-size: 36px; font-weight: bold; color: #667eea; letter-spacing: 8px; font-family: 'Courier New', monospace; }}
+                .expiry {{ color: #888; font-size: 14px; margin-top: 20px; }}
+                .footer {{ text-align: center; margin-top: 30px; padding-top: 20px; border-top: 1px solid #eee; color: #999; font-size: 12px; }}
+                .warning {{ background: #fff3cd; border-left: 4px solid #ffc107; padding: 15px; margin: 20px 0; border-radius: 4px; font-size: 14px; color: #856404; }}
+            </style>
+        </head>
+        <body>
+            <div class="container">
+                <div class="content">
+                    <p>Hello,</p>
+                    <p>You requested a one-time password (OTP) to sign in to your EZOFIS account.</p>
+                    <div class="otp-box">
+                        <div class="otp-label">Your OTP code is:</div>
+                        <div class="otp-code">{otp}</div>
+                    </div>
+                    <p class="expiry">This code will expire in <strong>10 minutes</strong>.</p>
+                    <div class="warning">
+                        <strong>⚠️ Security Notice:</strong> If you didn't request this code, please ignore this email. Do not share this code with anyone.
+                    </div>
+                    <div class="footer">
+                        <p>© EZOFIS - Agentic Intelligence Platform</p>
+                        <p>This is an automated message, please do not reply.</p>
+                    </div>
+                </div>
+            </div>
+        </body>
+        </html>
+        """,
+        "textContent": f"""
+Your OTP Code for EZOFIS AI
+Hello,
+You requested a one-time password (OTP) to sign in to your EZOFIS account.
+Your OTP code is: {otp}
+This code will expire in 10 minutes.
+⚠️ Security Notice: If you didn't request this code, please ignore this email. Do not share this code with anyone.
+© EZOFIS - Agentic Intelligence Platform
+This is an automated message, please do not reply.
+        """
+    }
+    headers = {
+        "accept": "application/json",
+        "api-key": BREVO_API_KEY,
+        "content-type": "application/json"
+    }
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(BREVO_API_URL, json=payload, headers=headers)
+            response.raise_for_status()
+            result = response.json()
+            message_id = result.get('messageId', 'N/A')
+            print(f"[INFO] Brevo email sent successfully to {email}. Message ID: {message_id}")
+            return True
+    except httpx.HTTPStatusError as e:
+        error_detail = {}
+        try:
+            error_detail = e.response.json() if e.response else {}
+        except:
+            error_detail = {"message": str(e)}
+        error_msg = error_detail.get('message', f'HTTP {e.response.status_code}' if e.response else 'Unknown error')
+        print(f"[ERROR] Brevo API error: {e.response.status_code if e.response else 'N/A'} - {error_msg}")
+        raise Exception(f"Failed to send email via Brevo: {error_msg}")
+    except httpx.TimeoutException:
+        print(f"[ERROR] Brevo API request timed out")
+        raise Exception("Email service timeout. Please try again.")
+    except Exception as e:
+        print(f"[ERROR] Brevo email sending failed: {str(e)}")
+        raise Exception(f"Failed to send email: {str(e)}")
+async def send_share_email(recipient_email: str, sender_email: str, share_link: str, sender_name: str = None) -> bool:
+    """
+    Send share email using Brevo transactional email API.
+    Args:
+        recipient_email: Recipient email address
+        sender_email: Sender email address
+        share_link: Share link URL
+        sender_name: Sender's display name (optional, falls back to email if not provided)
+    Returns:
+        True if email sent successfully
+    Raises:
+        ValueError: If BREVO_API_KEY is not set
+        Exception: If email sending fails
+    """
+    if not BREVO_API_KEY:
+        raise ValueError("BREVO_API_KEY environment variable is not set")
+    # Get base URL from environment or use default
+    base_url = os.environ.get("VITE_API_BASE_URL", "https://seth0330-ezofisocr.hf.space")
+    # Determine sender display name: use sender_name if available, otherwise extract from email
+    # This is the logged-in user's name, NOT the email sender name (BREVO_SENDER_NAME)
+    # BREVO_SENDER_NAME is only used for the "From" field, not the email body
+    if sender_name and sender_name.strip():
+        # Use the actual logged-in user's name
+        sender_display = sender_name.strip()
+        print(f"[INFO] Using user's name from database: {sender_display}")
+    else:
+        # Extract name from email (part before @) and format it nicely
+        email_name = sender_email.split('@')[0]
+        # Handle cases like "seth.smith" -> "Seth Smith" or "seth_smith" -> "Seth Smith"
+        if '.' in email_name:
+            parts = email_name.split('.')
+            sender_display = ' '.join(part.capitalize() for part in parts)
+        elif '_' in email_name:
+            parts = email_name.split('_')
+            sender_display = ' '.join(part.capitalize() for part in parts)
+        else:
+            # Simple case: "seth" -> "Seth"
+            sender_display = email_name.capitalize()
+        print(f"[INFO] Extracted name from email: {sender_display} (from {sender_email})")
+    # Brevo API payload structure
+    # Note: BREVO_SENDER_NAME is used only for the "From" field in the email header
+    # The email body uses sender_display (the logged-in user's name)
+    payload = {
+        "sender": {
+            "name": BREVO_SENDER_NAME,
+            "email": BREVO_SENDER_EMAIL
+        },
+        "to": [
+            {
+                "email": recipient_email
+            }
+        ],
+        "subject": f"{sender_display} shared a document extraction with you",
+        "htmlContent": f"""
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <meta charset="utf-8">
+            <meta name="viewport" content="width=device-width, initial-scale=1.0">
+            <style>
+                body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; line-height: 1.6; color: #333; margin: 0; padding: 0; background-color: #f4f4f4; }}
+                .container {{ max-width: 600px; margin: 20px auto; background: white; border-radius: 10px; overflow: hidden; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }}
+                .content {{ padding: 40px 30px; }}
+                .content p {{ margin: 0 0 15px 0; color: #555; }}
+                .share-box {{ background: #f8f9fa; border: 2px solid #667eea; padding: 30px; text-align: center; margin: 30px 0; border-radius: 8px; }}
+                .share-button {{ display: inline-block; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: #ffffff !important; padding: 15px 30px; text-decoration: none; border-radius: 8px; font-weight: 600; margin-top: 20px; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; }}
+                .share-button:hover {{ color: #ffffff !important; }}
+                .footer {{ text-align: center; margin-top: 30px; padding-top: 20px; border-top: 1px solid #eee; color: #999; font-size: 12px; }}
+            </style>
+        </head>
+        <body>
+            <div class="container">
+                <div class="content">
+                    <p>Hello,</p>
+                    <p><strong>{sender_display}</strong> wants you to take a look at a document extraction output.</p>
+                    <div class="share-box">
+                        <p style="margin-bottom: 20px; color: #666;">Click the button below to view the shared extraction:</p>
+                        <a href="{share_link}" class="share-button">View Shared Extraction</a>
+                    </div>
+                    <p style="color: #888; font-size: 14px;">You'll need to sign in to your EZOFIS account to view this extraction. If you don't have an account, you can create one using the link above.</p>
+                    <div class="footer">
+                        <p>© EZOFIS - Agentic Intelligence Platform</p>
+                        <p>This is an automated message, please do not reply.</p>
+                    </div>
+                </div>
+            </div>
+        </body>
+        </html>
+        """,
+        "textContent": f"""
+{sender_display} shared a document extraction with you
+Hello,
+{sender_display} wants you to take a look at a document extraction output.
+View the shared extraction: {share_link}
+You'll need to sign in to your EZOFIS account to view this extraction. If you don't have an account, you can create one using the link above.
+© EZOFIS - Agentic Intelligence Platform
+This is an automated message, please do not reply.
+        """
+    }
+    headers = {
+        "accept": "application/json",
+        "api-key": BREVO_API_KEY,
+        "content-type": "application/json"
+    }
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(BREVO_API_URL, json=payload, headers=headers)
+            response.raise_for_status()
+            result = response.json()
+            message_id = result.get('messageId', 'N/A')
+            print(f"[INFO] Brevo share email sent successfully to {recipient_email}. Message ID: {message_id}")
+            return True
+    except httpx.HTTPStatusError as e:
+        error_detail = {}
+        try:
+            error_detail = e.response.json() if e.response else {}
+        except:
+            error_detail = {"message": str(e)}
+        error_msg = error_detail.get('message', f'HTTP {e.response.status_code}' if e.response else 'Unknown error')
+        print(f"[ERROR] Brevo API error: {e.response.status_code if e.response else 'N/A'} - {error_msg}")
+        raise Exception(f"Failed to send email via Brevo: {error_msg}")
+    except httpx.TimeoutException:
+        print(f"[ERROR] Brevo API request timed out")
+        raise Exception("Email service timeout. Please try again.")
+    except Exception as e:
+        print(f"[ERROR] Brevo email sending failed: {str(e)}")
+        raise Exception(f"Failed to send email: {str(e)}")
+async def create_brevo_contact(
+    email: str,
+    first_name: Optional[str] = None,
+    last_name: Optional[str] = None,
+    organization_name: Optional[str] = None,
+    phone_number: Optional[str] = None,
+    linkedin_url: Optional[str] = None,
+    title: Optional[str] = None,
+    headline: Optional[str] = None,
+    organization_website: Optional[str] = None,
+    organization_address: Optional[str] = None,
+    list_id: Optional[int] = None
+) -> bool:
+    """
+    Create a contact in Brevo and optionally add to a list.
+    Args:
+        email: Contact email address (required)
+        first_name: Contact first name
+        last_name: Contact last name
+        organization_name: Organization name
+        phone_number: Phone number
+        linkedin_url: LinkedIn profile URL
+        title: Job title
+        headline: Professional headline
+        organization_website: Company website
+        organization_address: Company address
+        list_id: ID of the list to add contact to (e.g., 5 for "VRP Trials")
+    Returns:
+        True if contact created successfully, False otherwise
+    """
+    if not BREVO_API_KEY:
+        print("[WARNING] BREVO_API_KEY not set, skipping Brevo contact creation")
+        return False
+    # Prepare contact attributes using automatic field mapping
+    attributes = {}
+    # Map all fields automatically
+    field_mappings = {
+        "first_name": first_name,
+        "last_name": last_name,
+        "organization_name": organization_name,
+        "phone_number": phone_number,
+        "linkedin_url": linkedin_url,
+        "title": title,
+        "headline": headline,
+        "organization_website": organization_website,
+        "organization_address": organization_address,
+    }
+    for field_name, field_value in field_mappings.items():
+        if field_value:
+            brevo_attr = _get_brevo_attribute_name(field_name)
+            if brevo_attr:
+                attributes[brevo_attr] = str(field_value).strip()  # Ensure it's a string and trimmed
+                print(f"[DEBUG] Mapped '{field_name}' ({field_value}) to Brevo attribute '{brevo_attr}'")
+            else:
+                print(f"[DEBUG] No Brevo attribute mapping found for '{field_name}'")
+        else:
+            print(f"[DEBUG] Skipping '{field_name}' - value is empty/None")
+    print(f"[DEBUG] Final Brevo attributes to send: {attributes}")
+    # Prepare contact data
+    contact_data = {
+        "email": email.lower(),
+        "updateEnabled": True  # Update existing contact if email already exists
+    }
+    if attributes:
+        contact_data["attributes"] = attributes
+    # Add to list if list_id is provided
+    if list_id:
+        contact_data["listIds"] = [list_id]
+    headers = {
+        "accept": "application/json",
+        "api-key": BREVO_API_KEY,
+        "content-type": "application/json"
+    }
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                "https://api.brevo.com/v3/contacts",
+                json=contact_data,
+                headers=headers
+            )
+            if response.status_code in [200, 201, 204]:
+                print(f"[INFO] Successfully created Brevo contact: {email}" +
+                      (f" and added to list {list_id}" if list_id else ""))
+                return True
+            elif response.status_code == 400:
+                # Contact might already exist, try to update it
+                try:
+                    error_data = response.json()
+                    if "already exists" in str(error_data).lower():
+                        print(f"[INFO] Contact {email} already exists in Brevo, updating...")
+                        # Use PUT to update existing contact
+                        update_response = await client.put(
+                            f"https://api.brevo.com/v3/contacts/{email.lower()}",
+                            json=contact_data,
+                            headers=headers
+                        )
+                        if update_response.status_code in [200, 204]:
+                            print(f"[INFO] Successfully updated Brevo contact: {email}" +
+                                  (f" and added to list {list_id}" if list_id else ""))
+                            return True
+                except:
+                    pass
+                error_data = response.text
+                print(f"[ERROR] Failed to create Brevo contact: {response.status_code} - {error_data}")
+                return False
+            else:
+                error_data = response.text
+                print(f"[ERROR] Failed to create Brevo contact: {response.status_code} - {error_data}")
+                return False
+    except httpx.HTTPStatusError as e:
+        print(f"[ERROR] Brevo API HTTP error: {e.response.status_code} - {e.response.text}")
+        return False
+    except Exception as e:
+        print(f"[ERROR] Failed to create Brevo contact: {str(e)}")
+        return False

backend/app/db.py CHANGED Viewed

@@ -1,18 +1,18 @@
-import os
-from sqlalchemy import create_engine
-from sqlalchemy.orm import sessionmaker, declarative_base
-# SQLite DB path. In Docker / HF we’ll set DB_PATH env, default is local "data/app.db"
-DB_PATH = os.environ.get("DB_PATH", "data/app.db")
-# Create SQLAlchemy engine
-engine = create_engine(
-    f"sqlite:///{DB_PATH}",
-    connect_args={"check_same_thread": False},
-)
-# Session factory
-SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-# Base model class
-Base = declarative_base()

+import os
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker, declarative_base
+# SQLite DB path. In Docker / HF we’ll set DB_PATH env, default is local "data/app.db"
+DB_PATH = os.environ.get("DB_PATH", "data/app.db")
+# Create SQLAlchemy engine
+engine = create_engine(
+    f"sqlite:///{DB_PATH}",
+    connect_args={"check_same_thread": False},
+)
+# Session factory
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+# Base model class
+Base = declarative_base()

backend/app/email_validator.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""
+Email validation utilities to ensure only business emails are allowed.
+"""
+from fastapi import HTTPException
+# List of personal email domains to block
+PERSONAL_EMAIL_DOMAINS = {
+    'gmail.com', 'yahoo.com', 'hotmail.com', 'outlook.com',
+    'aol.com', 'icloud.com', 'mail.com', 'protonmail.com',
+    'yandex.com', 'zoho.com', 'gmx.com', 'live.com', 'msn.com',
+    'me.com', 'mac.com', 'yahoo.co.uk', 'yahoo.co.jp', 'yahoo.fr',
+    'yahoo.de', 'yahoo.it', 'yahoo.es', 'yahoo.in', 'yahoo.com.au',
+    'gmail.co.uk', 'gmail.fr', 'gmail.de', 'gmail.it', 'gmail.es',
+    'gmail.in', 'gmail.com.au', 'hotmail.co.uk', 'hotmail.fr',
+    'hotmail.de', 'hotmail.it', 'hotmail.es', 'outlook.co.uk',
+    'outlook.fr', 'outlook.de', 'outlook.it', 'outlook.es',
+    'rediffmail.com', 'sina.com', 'qq.com', '163.com', '126.com',
+    'mail.ru', 'inbox.com', 'fastmail.com', 'tutanota.com',
+    'hey.com', 'pm.me'
+}
+def is_business_email(email: str) -> bool:
+    """
+    Check if email is a business email (not personal).
+    Args:
+        email: Email address to validate
+    Returns:
+        True if business email, False if personal email
+    """
+    if not email or '@' not in email:
+        return False
+    domain = email.split('@')[1].lower().strip()
+    return domain not in PERSONAL_EMAIL_DOMAINS
+def validate_business_email(email: str) -> None:
+    """
+    Raise exception if email is not a business email.
+    Args:
+        email: Email address to validate
+    Raises:
+        HTTPException: If email is a personal email domain
+    """
+    if not email:
+        raise HTTPException(
+            status_code=400,
+            detail="Email address is required"
+        )
+    if not is_business_email(email):
+        raise HTTPException(
+            status_code=400,
+            detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
+        )

backend/app/firebase_auth.py ADDED Viewed

	@@ -0,0 +1,92 @@

+"""
+Firebase Authentication utilities.
+"""
+import os
+import json
+import firebase_admin
+from firebase_admin import auth, credentials
+from fastapi import HTTPException
+# Initialize Firebase Admin SDK
+_firebase_initialized = False
+def initialize_firebase():
+    """Initialize Firebase Admin SDK."""
+    global _firebase_initialized
+    if _firebase_initialized:
+        return
+    if not firebase_admin._apps:
+        # Try to get service account from environment variable (JSON string)
+        service_account_json = os.environ.get("FIREBASE_SERVICE_ACCOUNT_JSON")
+        if service_account_json:
+            try:
+                service_account_info = json.loads(service_account_json)
+                cred = credentials.Certificate(service_account_info)
+                firebase_admin.initialize_app(cred)
+                _firebase_initialized = True
+                print("[INFO] Firebase Admin SDK initialized from environment variable")
+                return
+            except json.JSONDecodeError:
+                print("[WARNING] Failed to parse FIREBASE_SERVICE_ACCOUNT_JSON")
+        # Try to get service account from file path
+        service_account_path = os.environ.get("FIREBASE_SERVICE_ACCOUNT_KEY")
+        if service_account_path and os.path.exists(service_account_path):
+            cred = credentials.Certificate(service_account_path)
+            firebase_admin.initialize_app(cred)
+            _firebase_initialized = True
+            print(f"[INFO] Firebase Admin SDK initialized from file: {service_account_path}")
+            return
+        # Try to use default credentials (for Google Cloud environments)
+        try:
+            firebase_admin.initialize_app()
+            _firebase_initialized = True
+            print("[INFO] Firebase Admin SDK initialized with default credentials")
+            return
+        except Exception as e:
+            print(f"[WARNING] Firebase initialization failed: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="Firebase not configured. Please set FIREBASE_SERVICE_ACCOUNT_JSON or FIREBASE_SERVICE_ACCOUNT_KEY environment variable."
+            )
+async def verify_firebase_token(id_token: str) -> dict:
+    """
+    Verify Firebase ID token and return user info.
+    Args:
+        id_token: Firebase ID token from client
+    Returns:
+        Dictionary with user information (uid, email, name, picture)
+    Raises:
+        HTTPException: If token is invalid
+    """
+    initialize_firebase()
+    try:
+        decoded_token = auth.verify_id_token(id_token)
+        return {
+            'uid': decoded_token['uid'],
+            'email': decoded_token.get('email'),
+            'name': decoded_token.get('name'),
+            'picture': decoded_token.get('picture'),
+        }
+    except ValueError as e:
+        raise HTTPException(
+            status_code=401,
+            detail=f"Invalid Firebase token: {str(e)}"
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=401,
+            detail=f"Firebase authentication failed: {str(e)}"
+        )

backend/app/main.py CHANGED Viewed

@@ -1,292 +1,786 @@
-import os
-import time
-from typing import List, Dict
-from fastapi import FastAPI, UploadFile, File, Depends
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.staticfiles import StaticFiles
-from sqlalchemy.orm import Session
-from .db import Base, engine, SessionLocal
-from .models import ExtractionRecord
-from .schemas import ExtractionRecordBase, ExtractionStage
-from .openrouter_client import extract_fields_from_document
-# Ensure data dir exists for SQLite
-os.makedirs("data", exist_ok=True)
-# Create tables
-Base.metadata.create_all(bind=engine)
-app = FastAPI(title="Document Capture Demo – Backend")
-# CORS (for safety we allow all; you can tighten later)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
-@app.get("/ping")
-def ping():
-    """Healthcheck."""
-    return {"status": "ok", "message": "backend alive"}
-def make_stages(total_ms: int, status: str) -> Dict[str, ExtractionStage]:
-    """
-    Build synthetic stage timing data for the History UI.
-    For now we just split total_ms into 4 stages.
-    """
-    if total_ms <= 0:
-        total_ms = 1000
-    return {
-        "uploading": ExtractionStage(
-            time=int(total_ms * 0.15),
-            status="completed",
-            variation="normal",
-        ),
-        "aiAnalysis": ExtractionStage(
-            time=int(total_ms * 0.55),
-            status="completed" if status == "completed" else "failed",
-            variation="normal",
-        ),
-        "dataExtraction": ExtractionStage(
-            time=int(total_ms * 0.2),
-            status="completed" if status == "completed" else "skipped",
-            variation="fast",
-        ),
-        "outputRendering": ExtractionStage(
-            time=int(total_ms * 0.1),
-            status="completed" if status == "completed" else "skipped",
-            variation="normal",
-        ),
-    }
-@app.post("/api/extract")
-async def extract_document(
-    file: UploadFile = File(...),
-    db: Session = Depends(get_db),
-):
-    """
-    Main extraction endpoint used by the Dashboard.
-    1) Read the uploaded file
-    2) Call OpenRouter + Qwen3-VL
-    3) Store a record in SQLite
-    4) Return extraction result + metadata
-    """
-    start = time.time()
-    content = await file.read()
-    content_type = file.content_type or "application/octet-stream"
-    size_mb = len(content) / 1024 / 1024
-    size_str = f"{size_mb:.2f} MB"
-    try:
-        print(f"[INFO] Starting extraction for file: {file.filename}, type: {content_type}, size: {size_str}")
-        extracted = await extract_fields_from_document(content, content_type, file.filename)
-        total_ms = int((time.time() - start) * 1000)
-        print(f"[INFO] Extraction completed. Response keys: {list(extracted.keys())}")
-        print(f"[INFO] Fields extracted: {extracted.get('fields', {})}")
-        confidence = float(extracted.get("confidence", 90))
-        fields = extracted.get("fields", {})
-        # Get full_text for text output
-        full_text = extracted.get("full_text", "")
-        if full_text:
-            full_text_words = len(str(full_text).split())
-            print(f"[INFO] Full text extracted: {full_text_words} words")
-        # Check if fields contain structured data (from table parsing)
-        # If fields is a dict with page_X keys, it's already structured
-        # If fields is empty or simple, add full_text and pages for text display
-        if not fields or (isinstance(fields, dict) and not any(k.startswith("page_") for k in fields.keys())):
-            if full_text:
-                fields["full_text"] = full_text
-            # Also check for pages array
-            pages_data = extracted.get("pages", [])
-            if pages_data and isinstance(pages_data, list):
-                print(f"[INFO] Extracted text from {len(pages_data)} page(s)")
-                fields["pages"] = pages_data
-        # Count fields - if structured data exists, count table rows + metadata
-        if isinstance(fields, dict):
-            # Check if it's structured page data
-            if any(k.startswith("page_") for k in fields.keys()):
-                # Count structured fields (metadata keys + table rows)
-                page_data = list(fields.values())[0] if len(fields) == 1 else fields
-                if isinstance(page_data, dict):
-                    table_rows = page_data.get("table", [])
-                    metadata_keys = len(page_data.get("metadata", {}))
-                    fields_extracted = len(table_rows) + metadata_keys
-                    print(f"[INFO] Structured data: {len(table_rows)} table rows, {metadata_keys} metadata fields")
-                else:
-                    fields_extracted = len(fields)
-            else:
-                # Regular fields count (excluding full_text and pages)
-                fields_extracted = len([k for k in fields.keys() if k not in ["full_text", "pages"]])
-        else:
-            fields_extracted = 0
-        print(f"[INFO] Final stats - confidence: {confidence}, fields_count: {fields_extracted}")
-        status = "completed"
-        error_message = None
-    except Exception as e:
-        import traceback
-        total_ms = int((time.time() - start) * 1000)
-        confidence = 0.0
-        fields = {}
-        fields_extracted = 0
-        status = "failed"
-        error_message = str(e)
-        print(f"[ERROR] Extraction failed: {error_message}")
-        print(f"[ERROR] Traceback: {traceback.format_exc()}")
-    # Save record to DB
-    rec = ExtractionRecord(
-        file_name=file.filename,
-        file_type=content_type,
-        file_size=size_str,
-        status=status,
-        confidence=confidence,
-        fields_extracted=fields_extracted,
-        total_time_ms=total_ms,
-        raw_output=str(fields),
-        error_message=error_message,
-    )
-    db.add(rec)
-    db.commit()
-    db.refresh(rec)
-    stages = make_stages(total_ms, status)
-    # Response shape that frontend will consume
-    return {
-        "id": rec.id,
-        "fileName": rec.file_name,
-        "fileType": rec.file_type,
-        "fileSize": rec.file_size,
-        "status": status,
-        "confidence": confidence,
-        "fieldsExtracted": fields_extracted,
-        "totalTime": total_ms,
-        "fields": fields,
-        "stages": {k: v.dict() for k, v in stages.items()},
-        "errorMessage": error_message,
-    }
-@app.get("/api/history", response_model=List[ExtractionRecordBase])
-def get_history(db: Session = Depends(get_db)):
-    """
-    Used by the History page.
-    Returns last 100 records, with synthetic stage data.
-    """
-    recs = (
-        db.query(ExtractionRecord)
-        .order_by(ExtractionRecord.created_at.desc())
-        .limit(100)
-        .all()
-    )
-    output: List[ExtractionRecordBase] = []
-    for r in recs:
-        stages = make_stages(r.total_time_ms or 1000, r.status or "completed")
-        output.append(
-            ExtractionRecordBase(
-                id=r.id,
-                fileName=r.file_name,
-                fileType=r.file_type or "",
-                fileSize=r.file_size or "",
-                extractedAt=r.created_at,
-                status=r.status or "completed",
-                confidence=r.confidence or 0.0,
-                fieldsExtracted=r.fields_extracted or 0,
-                totalTime=r.total_time_ms or 0,
-                stages=stages,
-                errorMessage=r.error_message,
-            )
-        )
-    return output
-# Static frontend mounting (used after we build React)
-# Dockerfile copies the Vite build into backend/frontend_dist
-# IMPORTANT: API routes must be defined BEFORE this so they take precedence
-frontend_dir = os.path.join(
-    os.path.dirname(os.path.dirname(__file__)), "frontend_dist"
-)
-if os.path.isdir(frontend_dir):
-    # Serve static files (JS, CSS, images, etc.) from assets directory
-    assets_dir = os.path.join(frontend_dir, "assets")
-    if os.path.isdir(assets_dir):
-        app.mount(
-            "/assets",
-            StaticFiles(directory=assets_dir),
-            name="assets",
-        )
-    # Serve static files from root (logo.png, favicon.ico, etc.)
-    # Files in public/ directory are copied to dist/ root during Vite build
-    # These routes must be defined BEFORE the catch-all route
-    @app.get("/logo.png")
-    async def serve_logo():
-        """Serve logo.png from frontend_dist root."""
-        from fastapi.responses import FileResponse
-        logo_path = os.path.join(frontend_dir, "logo.png")
-        if os.path.exists(logo_path):
-            return FileResponse(logo_path, media_type="image/png")
-        from fastapi import HTTPException
-        raise HTTPException(status_code=404)
-    @app.get("/favicon.ico")
-    async def serve_favicon():
-        """Serve favicon.ico from frontend_dist root."""
-        from fastapi.responses import FileResponse
-        favicon_path = os.path.join(frontend_dir, "favicon.ico")
-        if os.path.exists(favicon_path):
-            return FileResponse(favicon_path, media_type="image/x-icon")
-        from fastapi import HTTPException
-        raise HTTPException(status_code=404)
-    # Catch-all route to serve index.html for React Router
-    # This must be last so API routes and static files are matched first
-    @app.get("/{full_path:path}")
-    async def serve_frontend(full_path: str):
-        """
-        Serve React app for all non-API routes.
-        React Router will handle client-side routing.
-        """
-        # Skip API routes, docs, static assets, and known static files
-        if (full_path.startswith("api/") or
-            full_path.startswith("docs") or
-            full_path.startswith("openapi.json") or
-            full_path.startswith("assets/") or
-            full_path in ["logo.png", "favicon.ico"]):
-            from fastapi import HTTPException
-            raise HTTPException(status_code=404)
-        # Serve index.html for all other routes (React Router will handle routing)
-        from fastapi.responses import FileResponse
-        index_path = os.path.join(frontend_dir, "index.html")
-        if os.path.exists(index_path):
-            return FileResponse(index_path)
-        from fastapi import HTTPException
-        raise HTTPException(status_code=404)

+import os
+import time
+from typing import List, Dict, Optional
+from fastapi import FastAPI, UploadFile, File, Depends, Form, HTTPException, Body
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from sqlalchemy.orm import Session
+from pydantic import BaseModel
+from .db import Base, engine, SessionLocal
+from .models import ExtractionRecord, User, ShareToken
+from .schemas import ExtractionRecordBase, ExtractionStage
+from .openrouter_client import extract_fields_from_document
+from .auth import get_current_user, get_db, verify_token
+from .auth_routes import router as auth_router
+from .api_key_auth import get_user_from_api_key
+from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from typing import Optional
+# Allowed file types
+ALLOWED_CONTENT_TYPES = [
+    "application/pdf",
+    "image/png",
+    "image/jpeg",
+    "image/jpg",
+    "image/tiff",
+    "image/tif"
+]
+# Allowed file extensions (for fallback validation)
+ALLOWED_EXTENSIONS = [".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"]
+# Maximum file size: 4 MB
+MAX_FILE_SIZE = 4 * 1024 * 1024  # 4 MB in bytes
+# Ensure data dir exists for SQLite
+os.makedirs("data", exist_ok=True)
+# Create tables
+Base.metadata.create_all(bind=engine)
+app = FastAPI(title="Document Capture Demo – Backend")
+# Include auth routes
+app.include_router(auth_router)
+# CORS (for safety we allow all; you can tighten later)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+async def get_current_user_or_api_key_user(
+    api_key_user: Optional[User] = Depends(get_user_from_api_key),
+    credentials: Optional[HTTPAuthorizationCredentials] = Depends(HTTPBearer(auto_error=False)),
+    db: Session = Depends(get_db),
+) -> User:
+    """
+    Flexible authentication: supports both JWT Bearer token and API key.
+    Tries API key first, then falls back to JWT if no API key provided.
+    """
+    # If API key authentication succeeded, use that
+    if api_key_user:
+        return api_key_user
+    # Otherwise, try JWT authentication
+    if credentials:
+        try:
+            from .auth import verify_token
+            token = credentials.credentials
+            payload = verify_token(token)
+            user_id = int(payload.get("sub"))
+            user = db.query(User).filter(User.id == user_id).first()
+            if user:
+                return user
+        except Exception:
+            pass  # Will raise HTTPException below
+    # If neither worked, raise authentication error
+    raise HTTPException(
+        status_code=401,
+        detail="Authentication required. Provide either a Bearer token or X-API-Key header.",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+@app.get("/ping")
+def ping():
+    """Healthcheck."""
+    return {"status": "ok", "message": "backend alive"}
+def make_stages(total_ms: int, status: str) -> Dict[str, ExtractionStage]:
+    """
+    Build synthetic stage timing data for the History UI.
+    For now we just split total_ms into 4 stages.
+    """
+    if total_ms <= 0:
+        total_ms = 1000
+    return {
+        "uploading": ExtractionStage(
+            time=int(total_ms * 0.15),
+            status="completed",
+            variation="normal",
+        ),
+        "aiAnalysis": ExtractionStage(
+            time=int(total_ms * 0.55),
+            status="completed" if status == "completed" else "failed",
+            variation="normal",
+        ),
+        "dataExtraction": ExtractionStage(
+            time=int(total_ms * 0.2),
+            status="completed" if status == "completed" else "skipped",
+            variation="fast",
+        ),
+        "outputRendering": ExtractionStage(
+            time=int(total_ms * 0.1),
+            status="completed" if status == "completed" else "skipped",
+            variation="normal",
+        ),
+    }
+@app.post("/api/extract")
+async def extract_document(
+    file: UploadFile = File(...),
+    key_fields: Optional[str] = Form(None),
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user_or_api_key_user),
+):
+    """
+    Main extraction endpoint for document parsing.
+    Supports both JWT Bearer token and API key authentication.
+    Authentication methods:
+    1. JWT Bearer token: Header "Authorization: Bearer <token>"
+    2. API Key: Header "X-API-Key: <api_key>"
+    Parameters:
+    - file: Document file (PDF, PNG, JPEG, TIFF) - max 4MB
+    - key_fields: Optional comma-separated list of specific fields to extract (e.g., "Invoice Number,Invoice Date")
+    Returns JSON with extracted fields, text, confidence, and metadata.
+    """
+    start = time.time()
+    content = await file.read()
+    content_type = file.content_type or "application/octet-stream"
+    file_size = len(content)
+    size_mb = file_size / 1024 / 1024
+    size_str = f"{size_mb:.2f} MB"
+    # Convert file content to base64 for storage
+    import base64
+    file_base64 = base64.b64encode(content).decode("utf-8")
+    # Validate file size
+    if file_size > MAX_FILE_SIZE:
+        raise HTTPException(
+            status_code=400,
+            detail=f"File size exceeds 4 MB limit. Your file is {size_mb:.2f} MB."
+        )
+    # Validate file type
+    file_extension = ""
+    if file.filename:
+        file_extension = "." + file.filename.split(".")[-1].lower()
+    is_valid_type = (
+        content_type in ALLOWED_CONTENT_TYPES or
+        file_extension in ALLOWED_EXTENSIONS
+    )
+    if not is_valid_type:
+        raise HTTPException(
+            status_code=400,
+            detail="Only PDF, PNG, JPG, and TIFF files are allowed."
+        )
+    try:
+        print(f"[INFO] Starting extraction for file: {file.filename}, type: {content_type}, size: {size_str}")
+        if key_fields:
+            print(f"[INFO] Key fields requested: {key_fields}")
+        extracted = await extract_fields_from_document(content, content_type, file.filename, key_fields)
+        total_ms = int((time.time() - start) * 1000)
+        print(f"[INFO] Extraction completed. Response keys: {list(extracted.keys())}")
+        print(f"[INFO] Fields extracted: {extracted.get('fields', {})}")
+        confidence = float(extracted.get("confidence", 90))
+        fields = extracted.get("fields", {})
+        # Get Fields from root level (if user provided key_fields)
+        root_fields = extracted.get("Fields", {})
+        # Get full_text for text output
+        full_text = extracted.get("full_text", "")
+        if full_text:
+            full_text_words = len(str(full_text).split())
+            print(f"[INFO] Full text extracted: {full_text_words} words")
+        # Check if fields contain structured data (from table parsing)
+        # If fields is a dict with page_X keys, it's already structured
+        # If fields is empty or simple, add full_text and pages for text display
+        if not fields or (isinstance(fields, dict) and not any(k.startswith("page_") for k in fields.keys())):
+            if full_text:
+                fields["full_text"] = full_text
+            # Also check for pages array
+            pages_data = extracted.get("pages", [])
+            if pages_data and isinstance(pages_data, list):
+                print(f"[INFO] Extracted text from {len(pages_data)} page(s)")
+                fields["pages"] = pages_data
+        # Add Fields at root level if it exists
+        if root_fields:
+            fields["Fields"] = root_fields
+        # Count fields - if structured data exists, count table rows + root Fields
+        if isinstance(fields, dict):
+            # Check if it's structured page data
+            if any(k.startswith("page_") for k in fields.keys()):
+                # Count table rows from all pages
+                table_rows_count = 0
+                for page_key, page_data in fields.items():
+                    if page_key.startswith("page_") and isinstance(page_data, dict):
+                        table_rows = page_data.get("table", [])
+                        if isinstance(table_rows, list):
+                            table_rows_count += len(table_rows)
+                # Count Fields from root level
+                fields_keys = 0
+                if isinstance(root_fields, dict):
+                    fields_keys = len(root_fields)
+                fields_extracted = table_rows_count + fields_keys
+                print(f"[INFO] Structured data: {table_rows_count} table rows, {fields_keys} extracted fields")
+            else:
+                # Regular fields count (excluding full_text, pages, and Fields)
+                fields_extracted = len([k for k in fields.keys() if k not in ["full_text", "pages", "Fields"]])
+                # Add Fields count if it exists
+                if isinstance(root_fields, dict):
+                    fields_extracted += len(root_fields)
+        else:
+            fields_extracted = 0
+        print(f"[INFO] Final stats - confidence: {confidence}, fields_count: {fields_extracted}")
+        status = "completed"
+        error_message = None
+    except Exception as e:
+        import traceback
+        total_ms = int((time.time() - start) * 1000)
+        confidence = 0.0
+        fields = {}
+        fields_extracted = 0
+        status = "failed"
+        error_message = str(e)
+        print(f"[ERROR] Extraction failed: {error_message}")
+        print(f"[ERROR] Traceback: {traceback.format_exc()}")
+    # Save record to DB
+    import json
+    import base64
+    rec = ExtractionRecord(
+        user_id=current_user.id,
+        file_name=file.filename,
+        file_type=content_type,
+        file_size=size_str,
+        status=status,
+        confidence=confidence,
+        fields_extracted=fields_extracted,
+        total_time_ms=total_ms,
+        raw_output=json.dumps(fields),  # Use JSON instead of str() to preserve structure
+        file_base64=file_base64,  # Store base64 encoded file for preview
+        error_message=error_message,
+    )
+    db.add(rec)
+    db.commit()
+    db.refresh(rec)
+    stages = make_stages(total_ms, status)
+    # Response shape that frontend will consume
+    return {
+        "id": rec.id,
+        "fileName": rec.file_name,
+        "fileType": rec.file_type,
+        "fileSize": rec.file_size,
+        "status": status,
+        "confidence": confidence,
+        "fieldsExtracted": fields_extracted,
+        "totalTime": total_ms,
+        "fields": fields,
+        "stages": {k: v.dict() for k, v in stages.items()},
+        "errorMessage": error_message,
+    }
+@app.get("/api/history", response_model=List[ExtractionRecordBase])
+def get_history(
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Used by the History page.
+    Returns last 100 records for the current user, with synthetic stage data.
+    """
+    recs = (
+        db.query(ExtractionRecord)
+        .filter(ExtractionRecord.user_id == current_user.id)
+        .order_by(ExtractionRecord.created_at.desc())
+        .limit(100)
+        .all()
+    )
+    # Deduplicate: if multiple extractions share the same shared_from_extraction_id,
+    # keep only the most recent one (to prevent duplicates when same extraction is shared multiple times)
+    seen_shared_ids = set()
+    deduplicated_recs = []
+    for rec in recs:
+        if rec.shared_from_extraction_id:
+            # This is a shared extraction
+            if rec.shared_from_extraction_id not in seen_shared_ids:
+                seen_shared_ids.add(rec.shared_from_extraction_id)
+                deduplicated_recs.append(rec)
+            # Skip duplicates
+        else:
+            # Original extraction (not shared), always include
+            deduplicated_recs.append(rec)
+    recs = deduplicated_recs
+    output: List[ExtractionRecordBase] = []
+    for r in recs:
+        stages = make_stages(r.total_time_ms or 1000, r.status or "completed")
+        output.append(
+            ExtractionRecordBase(
+                id=r.id,
+                fileName=r.file_name,
+                fileType=r.file_type or "",
+                fileSize=r.file_size or "",
+                extractedAt=r.created_at,
+                status=r.status or "completed",
+                confidence=r.confidence or 0.0,
+                fieldsExtracted=r.fields_extracted or 0,
+                totalTime=r.total_time_ms or 0,
+                stages=stages,
+                errorMessage=r.error_message,
+            )
+        )
+    return output
+@app.get("/api/extraction/{extraction_id}")
+def get_extraction(
+    extraction_id: int,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Get a specific extraction by ID with full fields data.
+    Used when viewing output from History page.
+    """
+    import json
+    rec = (
+        db.query(ExtractionRecord)
+        .filter(
+            ExtractionRecord.id == extraction_id,
+            ExtractionRecord.user_id == current_user.id
+        )
+        .first()
+    )
+    if not rec:
+        from fastapi import HTTPException
+        raise HTTPException(status_code=404, detail="Extraction not found")
+    # Parse the raw_output JSON string back to dict
+    fields = {}
+    if rec.raw_output:
+        try:
+            # Try parsing as JSON first (new format)
+            fields = json.loads(rec.raw_output)
+        except (json.JSONDecodeError, TypeError):
+            # If that fails, try using ast.literal_eval for old str() format (backward compatibility)
+            try:
+                import ast
+                # Only use literal_eval if it looks like a Python dict string
+                if rec.raw_output.strip().startswith('{'):
+                    fields = ast.literal_eval(rec.raw_output)
+                else:
+                    fields = {}
+            except:
+                fields = {}
+    stages = make_stages(rec.total_time_ms or 1000, rec.status or "completed")
+    return {
+        "id": rec.id,
+        "fileName": rec.file_name,
+        "fileType": rec.file_type or "",
+        "fileSize": rec.file_size or "",
+        "status": rec.status or "completed",
+        "confidence": rec.confidence or 0.0,
+        "fieldsExtracted": rec.fields_extracted or 0,
+        "totalTime": rec.total_time_ms or 0,
+        "fields": fields,
+        "fileBase64": rec.file_base64,  # Include base64 encoded file for preview
+        "stages": {k: v.dict() for k, v in stages.items()},
+        "errorMessage": rec.error_message,
+    }
+@app.post("/api/share")
+async def share_extraction(
+    extraction_id: int = Body(...),
+    recipient_emails: List[str] = Body(...),
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Share an extraction with one or more users via email.
+    Creates share tokens and sends emails to recipients.
+    """
+    import secrets
+    from datetime import datetime, timedelta
+    from .brevo_service import send_share_email
+    from .email_validator import validate_business_email
+    # Validate recipient emails list
+    if not recipient_emails or len(recipient_emails) == 0:
+        raise HTTPException(status_code=400, detail="At least one recipient email is required")
+    # Validate each recipient email is a business email
+    for email in recipient_emails:
+        try:
+            validate_business_email(email)
+        except HTTPException:
+            raise  # Re-raise HTTPException from validate_business_email
+    # Get the extraction record
+    extraction = (
+        db.query(ExtractionRecord)
+        .filter(
+            ExtractionRecord.id == extraction_id,
+            ExtractionRecord.user_id == current_user.id
+        )
+        .first()
+    )
+    if not extraction:
+        raise HTTPException(status_code=404, detail="Extraction not found")
+    # Generate share link base URL
+    base_url = os.environ.get("VITE_API_BASE_URL", "https://seth0330-ezofisocr.hf.space")
+    # Process each recipient email
+    successful_shares = []
+    failed_shares = []
+    share_records = []
+    for recipient_email in recipient_emails:
+        recipient_email = recipient_email.strip().lower()
+        # Generate secure share token for this recipient
+        share_token = secrets.token_urlsafe(32)
+        # Create share token record (expires in 30 days)
+        expires_at = datetime.utcnow() + timedelta(days=30)
+        share_record = ShareToken(
+            token=share_token,
+            extraction_id=extraction_id,
+            sender_user_id=current_user.id,
+            recipient_email=recipient_email,
+            expires_at=expires_at,
+        )
+        db.add(share_record)
+        share_records.append((share_record, share_token, recipient_email))
+    # Commit all share tokens
+    try:
+        db.commit()
+        for share_record, share_token, recipient_email in share_records:
+            db.refresh(share_record)
+    except Exception as e:
+        db.rollback()
+        raise HTTPException(status_code=500, detail=f"Failed to create share tokens: {str(e)}")
+    # Send emails to all recipients
+    for share_record, share_token, recipient_email in share_records:
+        share_link = f"{base_url}/share/{share_token}"
+        try:
+            # Get sender's name from current_user, fallback to None if not available
+            sender_name = current_user.name if current_user.name else None
+            await send_share_email(recipient_email, current_user.email, share_link, sender_name)
+            successful_shares.append(recipient_email)
+        except Exception as e:
+            # Log error but continue with other emails
+            print(f"[ERROR] Failed to send share email to {recipient_email}: {str(e)}")
+            failed_shares.append(recipient_email)
+            # Optionally, you could delete the share token if email fails
+            # db.delete(share_record)
+    # Build response message
+    if len(failed_shares) == 0:
+        message = f"Extraction shared successfully with {len(successful_shares)} recipient(s)"
+    elif len(successful_shares) == 0:
+        raise HTTPException(status_code=500, detail=f"Failed to send share emails to all recipients")
+    else:
+        message = f"Extraction shared with {len(successful_shares)} recipient(s). Failed to send to: {', '.join(failed_shares)}"
+    return {
+        "success": True,
+        "message": message,
+        "successful_count": len(successful_shares),
+        "failed_count": len(failed_shares),
+        "successful_emails": successful_shares,
+        "failed_emails": failed_shares if failed_shares else None
+    }
+class ShareLinkRequest(BaseModel):
+    extraction_id: int
+@app.post("/api/share/link")
+async def create_share_link(
+    request: ShareLinkRequest,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Create a shareable link for an extraction without requiring recipient emails.
+    Returns a share link that can be copied and shared manually.
+    """
+    import secrets
+    from datetime import datetime, timedelta
+    # Get the extraction record
+    extraction = (
+        db.query(ExtractionRecord)
+        .filter(
+            ExtractionRecord.id == request.extraction_id,
+            ExtractionRecord.user_id == current_user.id
+        )
+        .first()
+    )
+    if not extraction:
+        raise HTTPException(status_code=404, detail="Extraction not found")
+    # Generate secure share token
+    share_token = secrets.token_urlsafe(32)
+    # Create share token record (expires in 30 days, no specific recipient)
+    expires_at = datetime.utcnow() + timedelta(days=30)
+    share_record = ShareToken(
+        token=share_token,
+        extraction_id=request.extraction_id,
+        sender_user_id=current_user.id,
+        recipient_email=None,  # None for public share links (copyable links)
+        expires_at=expires_at,
+    )
+    db.add(share_record)
+    db.commit()
+    db.refresh(share_record)
+    # Generate share link
+    base_url = os.environ.get("VITE_API_BASE_URL", "https://seth0330-ezofisocr.hf.space")
+    share_link = f"{base_url}/share/{share_token}"
+    return {
+        "success": True,
+        "share_link": share_link,
+        "share_token": share_token,
+        "expires_at": expires_at.isoformat() if expires_at else None
+    }
+@app.get("/api/share/{token}")
+async def access_shared_extraction(
+    token: str,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Access a shared extraction and copy it to the current user's account.
+    This endpoint is called after the user logs in via the share link.
+    """
+    from datetime import datetime
+    import json
+    # Find the share token
+    share = (
+        db.query(ShareToken)
+        .filter(ShareToken.token == token)
+        .first()
+    )
+    if not share:
+        raise HTTPException(status_code=404, detail="Share link not found or expired")
+    # Check if token is expired
+    if share.expires_at and share.expires_at < datetime.utcnow():
+        raise HTTPException(status_code=410, detail="Share link has expired")
+    # Get the original extraction
+    original_extraction = (
+        db.query(ExtractionRecord)
+        .filter(ExtractionRecord.id == share.extraction_id)
+        .first()
+    )
+    if not original_extraction:
+        raise HTTPException(status_code=404, detail="Original extraction not found")
+    # Check if already copied for this user (check by share token to prevent duplicates from same share)
+    # Also check if this specific share token was already used by this user
+    if share.accessed and share.accessed_by_user_id == current_user.id:
+        # This share token was already used by this user, find the extraction
+        existing_copy = (
+            db.query(ExtractionRecord)
+            .filter(
+                ExtractionRecord.user_id == current_user.id,
+                ExtractionRecord.shared_from_extraction_id == original_extraction.id
+            )
+            .order_by(ExtractionRecord.created_at.desc())
+            .first()
+        )
+        if existing_copy:
+            return {
+                "success": True,
+                "extraction_id": existing_copy.id,
+                "message": "Extraction already shared with you"
+            }
+    # Also check if any copy exists for this user from this original extraction
+    existing_copy = (
+        db.query(ExtractionRecord)
+        .filter(
+            ExtractionRecord.user_id == current_user.id,
+            ExtractionRecord.shared_from_extraction_id == original_extraction.id
+        )
+        .first()
+    )
+    if existing_copy:
+        # Already copied, mark this share as accessed and return existing extraction ID
+        share.accessed = True
+        share.accessed_at = datetime.utcnow()
+        share.accessed_by_user_id = current_user.id
+        db.commit()
+        return {
+            "success": True,
+            "extraction_id": existing_copy.id,
+            "message": "Extraction already shared with you"
+        }
+    # Copy extraction to current user's account
+    # Parse the raw_output JSON string back to dict
+    fields = {}
+    if original_extraction.raw_output:
+        try:
+            fields = json.loads(original_extraction.raw_output)
+        except (json.JSONDecodeError, TypeError):
+            try:
+                import ast
+                if original_extraction.raw_output.strip().startswith('{'):
+                    fields = ast.literal_eval(original_extraction.raw_output)
+                else:
+                    fields = {}
+            except:
+                fields = {}
+    # Create new extraction record for the recipient
+    new_extraction = ExtractionRecord(
+        user_id=current_user.id,
+        file_name=original_extraction.file_name,
+        file_type=original_extraction.file_type,
+        file_size=original_extraction.file_size,
+        status=original_extraction.status or "completed",
+        confidence=original_extraction.confidence or 0.0,
+        fields_extracted=original_extraction.fields_extracted or 0,
+        total_time_ms=original_extraction.total_time_ms or 0,
+        raw_output=original_extraction.raw_output,  # Copy the JSON string
+        file_base64=original_extraction.file_base64,  # Copy the base64 file
+        shared_from_extraction_id=original_extraction.id,
+        shared_by_user_id=share.sender_user_id,
+    )
+    db.add(new_extraction)
+    # Mark share as accessed
+    share.accessed = True
+    share.accessed_at = datetime.utcnow()
+    share.accessed_by_user_id = current_user.id
+    db.commit()
+    db.refresh(new_extraction)
+    return {
+        "success": True,
+        "extraction_id": new_extraction.id,
+        "message": "Extraction shared successfully"
+    }
+# Static frontend mounting (used after we build React)
+# Dockerfile copies the Vite build into backend/frontend_dist
+# IMPORTANT: API routes must be defined BEFORE this so they take precedence
+frontend_dir = os.path.join(
+    os.path.dirname(os.path.dirname(__file__)), "frontend_dist"
+)
+if os.path.isdir(frontend_dir):
+    # Serve static files (JS, CSS, images, etc.) from assets directory
+    assets_dir = os.path.join(frontend_dir, "assets")
+    if os.path.isdir(assets_dir):
+        app.mount(
+            "/assets",
+            StaticFiles(directory=assets_dir),
+            name="assets",
+        )
+    # Serve static files from root (logo.png, favicon.ico, etc.)
+    # Files in public/ directory are copied to dist/ root during Vite build
+    # These routes must be defined BEFORE the catch-all route
+    @app.get("/logo.png")
+    async def serve_logo():
+        """Serve logo.png from frontend_dist root."""
+        from fastapi.responses import FileResponse
+        logo_path = os.path.join(frontend_dir, "logo.png")
+        if os.path.exists(logo_path):
+            return FileResponse(logo_path, media_type="image/png")
+        from fastapi import HTTPException
+        raise HTTPException(status_code=404)
+    @app.get("/favicon.ico")
+    async def serve_favicon():
+        """Serve favicon.ico from frontend_dist root."""
+        from fastapi.responses import FileResponse
+        favicon_path = os.path.join(frontend_dir, "favicon.ico")
+        if os.path.exists(favicon_path):
+            return FileResponse(favicon_path, media_type="image/x-icon")
+        from fastapi import HTTPException
+        raise HTTPException(status_code=404)
+    # Catch-all route to serve index.html for React Router
+    # This must be last so API routes and static files are matched first
+    @app.get("/{full_path:path}")
+    async def serve_frontend(full_path: str):
+        """
+        Serve React app for all non-API routes.
+        React Router will handle client-side routing.
+        """
+        # Skip API routes, docs, static assets, and known static files
+        if (full_path.startswith("api/") or
+            full_path.startswith("docs") or
+            full_path.startswith("openapi.json") or
+            full_path.startswith("assets/") or
+            full_path in ["logo.png", "favicon.ico"]):
+            from fastapi import HTTPException
+            raise HTTPException(status_code=404)
+        # Serve index.html for all other routes (React Router will handle routing)
+        from fastapi.responses import FileResponse
+        index_path = os.path.join(frontend_dir, "index.html")
+        if os.path.exists(index_path):
+            return FileResponse(index_path)
+        from fastapi import HTTPException
+        raise HTTPException(status_code=404)

backend/app/models.py CHANGED Viewed

@@ -1,32 +1,136 @@
-from sqlalchemy import Column, Integer, String, Float, DateTime, Text
-from sqlalchemy.sql import func
-from .db import Base
-class ExtractionRecord(Base):
-    """
-    Stores one extraction run so the History page can show past jobs.
-    We’ll fill it from the /api/extract endpoint later.
-    """
-    __tablename__ = "extractions"
-    id = Column(Integer, primary_key=True, index=True)
-    file_name = Column(String, index=True)
-    file_type = Column(String)
-    file_size = Column(String)
-    status = Column(String)              # "completed" | "failed"
-    confidence = Column(Float)           # overall confidence (0–100)
-    fields_extracted = Column(Integer)   # number of fields extracted
-    total_time_ms = Column(Integer)      # total processing time in ms
-    raw_output = Column(Text)            # JSON string from the model
-    error_message = Column(Text, nullable=True)
-    created_at = Column(
-        DateTime(timezone=True),
-        server_default=func.now(),
-    )

+from sqlalchemy import Column, Integer, String, Float, DateTime, Text, ForeignKey, Boolean
+from sqlalchemy.orm import relationship
+from sqlalchemy.sql import func
+from .db import Base
+class User(Base):
+    """
+    Stores user information from Firebase or OTP authentication.
+    """
+    __tablename__ = "users"
+    id = Column(Integer, primary_key=True, index=True)
+    email = Column(String, unique=True, index=True, nullable=False)
+    name = Column(String, nullable=True)
+    picture = Column(String, nullable=True)
+    # Auth method: 'firebase' or 'otp'
+    auth_method = Column(String, default='firebase')
+    # Firebase-specific
+    firebase_uid = Column(String, unique=True, index=True, nullable=True)
+    # OTP-specific
+    email_verified = Column(Boolean, default=False)
+    created_at = Column(
+        DateTime(timezone=True),
+        server_default=func.now(),
+    )
+    # Relationship to extraction records (explicitly specify user_id as the foreign key)
+    # Note: primaryjoin must be specified because ExtractionRecord has multiple foreign keys to User
+    extractions = relationship(
+        "ExtractionRecord",
+        back_populates="user",
+        primaryjoin="User.id == ExtractionRecord.user_id"
+    )
+    # Relationship to API keys
+    api_keys = relationship(
+        "APIKey",
+        back_populates="user",
+        cascade="all, delete-orphan"
+    )
+class ExtractionRecord(Base):
+    """
+    Stores one extraction run so the History page can show past jobs.
+    We'll fill it from the /api/extract endpoint later.
+    """
+    __tablename__ = "extractions"
+    id = Column(Integer, primary_key=True, index=True)
+    user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
+    file_name = Column(String, index=True)
+    file_type = Column(String)
+    file_size = Column(String)
+    status = Column(String)              # "completed" | "failed"
+    confidence = Column(Float)           # overall confidence (0–100)
+    fields_extracted = Column(Integer)   # number of fields extracted
+    total_time_ms = Column(Integer)      # total processing time in ms
+    raw_output = Column(Text)            # JSON string from the model
+    file_base64 = Column(Text, nullable=True)  # Base64 encoded original file for preview
+    error_message = Column(Text, nullable=True)
+    created_at = Column(
+        DateTime(timezone=True),
+        server_default=func.now(),
+    )
+    # Relationship to user (explicitly specify user_id as the foreign key)
+    # Note: primaryjoin must be specified because ExtractionRecord has multiple foreign keys to User
+    user = relationship(
+        "User",
+        back_populates="extractions",
+        primaryjoin="ExtractionRecord.user_id == User.id"
+    )
+    # Track if this extraction was shared (original extraction ID)
+    shared_from_extraction_id = Column(Integer, ForeignKey("extractions.id"), nullable=True, index=True)
+    shared_by_user_id = Column(Integer, ForeignKey("users.id"), nullable=True, index=True)
+class ShareToken(Base):
+    """
+    Stores share tokens for sharing extractions with other users.
+    """
+    __tablename__ = "share_tokens"
+    id = Column(Integer, primary_key=True, index=True)
+    token = Column(String, unique=True, index=True, nullable=False)  # Unique share token
+    extraction_id = Column(Integer, ForeignKey("extractions.id"), nullable=False, index=True)
+    sender_user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
+    recipient_email = Column(String, nullable=True, index=True)  # Nullable for public share links
+    expires_at = Column(DateTime(timezone=True), nullable=True)  # Optional expiration
+    accessed = Column(Boolean, default=False)  # Track if link was accessed
+    accessed_at = Column(DateTime(timezone=True), nullable=True)
+    accessed_by_user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
+    created_at = Column(
+        DateTime(timezone=True),
+        server_default=func.now(),
+    )
+class APIKey(Base):
+    """
+    Stores API keys for external application authentication.
+    API keys are hashed before storage for security.
+    """
+    __tablename__ = "api_keys"
+    id = Column(Integer, primary_key=True, index=True)
+    user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
+    name = Column(String, nullable=False)  # User-friendly name for the API key
+    key_hash = Column(String, unique=True, index=True, nullable=False)  # Hashed API key
+    key_prefix = Column(String, nullable=False)  # First 8 chars of key for display (e.g., "sk_live_")
+    is_active = Column(Boolean, default=True, nullable=False)
+    last_used_at = Column(DateTime(timezone=True), nullable=True)
+    created_at = Column(
+        DateTime(timezone=True),
+        server_default=func.now(),
+    )
+    # Relationship to user
+    user = relationship(
+        "User",
+        back_populates="api_keys"
+    )

backend/app/monday_service.py ADDED Viewed

	@@ -0,0 +1,391 @@

+"""
+Monday.com API service for creating leads with automatic field matching.
+Reference: https://developer.monday.com/api-reference/docs
+"""
+import os
+import httpx
+import json
+from typing import Optional, Dict, Any, List, Tuple
+from difflib import SequenceMatcher
+MONDAY_API_KEY = os.environ.get("MONDAY_API_KEY", "")
+MONDAY_API_URL = "https://api.monday.com/v2"
+MONDAY_BOARD_ID = os.environ.get("MONDAY_BOARD_ID", None)  # Your "New Leads" board ID
+# Cache for board columns to avoid repeated API calls
+_board_columns_cache: Dict[str, List[Dict[str, Any]]] = {}
+def _calculate_similarity(str1: str, str2: str) -> float:
+    """
+    Calculate similarity between two strings using SequenceMatcher.
+    Returns a value between 0.0 and 1.0.
+    """
+    return SequenceMatcher(None, str1.lower(), str2.lower()).ratio()
+def _find_best_column_match(
+    field_name: str,
+    available_columns: List[Dict[str, Any]],
+    min_similarity: float = 0.3
+) -> Optional[Tuple[str, str, float]]:
+    """
+    Find the best matching column for a field name using semantic similarity.
+    Args:
+        field_name: The field name to match (e.g., "first_name", "email")
+        available_columns: List of column dicts with 'id' and 'title' keys
+        min_similarity: Minimum similarity threshold (0.0 to 1.0)
+    Returns:
+        Tuple of (column_id, column_title, similarity_score) or None if no match found
+    """
+    best_match = None
+    best_score = 0.0
+    # Normalize field name for matching
+    normalized_field = field_name.lower().replace("_", " ").replace("-", " ")
+    # Common field name variations
+    field_variations = [
+        normalized_field,
+        field_name.lower(),
+        field_name.replace("_", ""),
+    ]
+    # Add common synonyms
+    synonyms = {
+        "first_name": ["first name", "firstname", "fname", "given name"],
+        "last_name": ["last name", "lastname", "lname", "surname", "family name"],
+        "email": ["email address", "email", "e-mail", "mail"],
+        "phone_number": ["phone", "phone number", "telephone", "mobile", "cell"],
+        "linkedin_url": ["linkedin", "linkedin profile", "linkedin url", "linkedin link"],
+        "title": ["job title", "position", "role", "job"],
+        "headline": ["headline", "tagline", "bio"],
+        "organization_name": ["company", "organization", "org", "company name", "employer"],
+        "organization_website": ["website", "company website", "url", "web"],
+        "organization_address": ["address", "company address", "location"],
+    }
+    if field_name in synonyms:
+        field_variations.extend(synonyms[field_name])
+    for column in available_columns:
+        column_title = column.get("title", "").lower()
+        column_id = column.get("id", "")
+        if not column_title or not column_id:
+            continue
+        # Calculate similarity for each variation
+        for variation in field_variations:
+            score = _calculate_similarity(variation, column_title)
+            if score > best_score:
+                best_score = score
+                best_match = (column_id, column.get("title", ""), score)
+    if best_match and best_score >= min_similarity:
+        return best_match
+    return None
+async def _get_board_columns(board_id: str) -> List[Dict[str, Any]]:
+    """
+    Fetch board columns from Monday.com API.
+    Args:
+        board_id: Monday.com board ID
+    Returns:
+        List of column dictionaries with 'id', 'title', and 'type' keys
+    """
+    # Check cache first
+    if board_id in _board_columns_cache:
+        return _board_columns_cache[board_id]
+    if not MONDAY_API_KEY:
+        print("[WARNING] MONDAY_API_KEY not set, cannot fetch board columns")
+        return []
+    query = """
+    query ($boardId: ID!) {
+        boards(ids: [$boardId]) {
+            columns {
+                id
+                title
+                type
+            }
+        }
+    }
+    """
+    headers = {
+        "Authorization": MONDAY_API_KEY,
+        "Content-Type": "application/json"
+    }
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                MONDAY_API_URL,
+                json={
+                    "query": query,
+                    "variables": {"boardId": board_id}
+                },
+                headers=headers
+            )
+            if response.status_code == 200:
+                result = response.json()
+                if result.get("data") and result["data"].get("boards"):
+                    boards = result["data"]["boards"]
+                    if boards and boards[0].get("columns"):
+                        columns = boards[0]["columns"]
+                        # Cache the result
+                        _board_columns_cache[board_id] = columns
+                        print(f"[INFO] Fetched {len(columns)} columns from Monday.com board {board_id}")
+                        return columns
+                elif result.get("errors"):
+                    print(f"[ERROR] Failed to fetch board columns: {result['errors']}")
+            else:
+                print(f"[ERROR] Failed to fetch board columns: {response.status_code} - {response.text}")
+    except Exception as e:
+        print(f"[ERROR] Exception while fetching board columns: {str(e)}")
+    return []
+def _format_column_value(value: Any, column_type: str, column_id: Optional[str] = None) -> Any:
+    """
+    Format a value according to Monday.com column type.
+    Args:
+        value: The value to format
+        column_type: Monday.com column type (email, phone, link, text, etc.)
+        column_id: Column ID (for special handling)
+    Returns:
+        For email/phone/link: Python dict object
+        For text/other types: Plain string
+    """
+    if value is None:
+        return ""
+    value_str = str(value)
+    if column_type == "email":
+        # Monday.com email format requires dict object (will be JSON encoded later)
+        return {"email": value_str, "text": value_str}
+    elif column_type == "phone":
+        return {"phone": value_str, "countryShortName": "US"}
+    elif column_type == "link":
+        # If it's already a URL, use it; otherwise create a link
+        if value_str.startswith("http://") or value_str.startswith("https://"):
+            return {"url": value_str, "text": value_str}
+        else:
+            return {"url": f"https://{value_str}", "text": value_str}
+    else:
+        # Text, status, and other types - just return the string
+        return value_str
+async def create_monday_lead(
+    email: str,
+    first_name: Optional[str] = None,
+    last_name: Optional[str] = None,
+    phone_number: Optional[str] = None,
+    linkedin_url: Optional[str] = None,
+    title: Optional[str] = None,
+    headline: Optional[str] = None,
+    organization_name: Optional[str] = None,
+    organization_website: Optional[str] = None,
+    organization_address: Optional[str] = None,
+    board_id: Optional[str] = None
+) -> bool:
+    """
+    Create a new lead item in Monday.com board.
+    Args:
+        email: Contact email address (required)
+        first_name: Contact first name
+        last_name: Contact last name
+        phone_number: Phone number
+        linkedin_url: LinkedIn profile URL
+        title: Job title
+        headline: Professional headline
+        organization_name: Company name
+        organization_website: Company website
+        organization_address: Company address
+        board_id: Monday.com board ID as string (defaults to MONDAY_BOARD_ID env var)
+    Returns:
+        True if lead created successfully, False otherwise
+    """
+    if not MONDAY_API_KEY:
+        print("[WARNING] MONDAY_API_KEY not set, skipping Monday.com lead creation")
+        return False
+    target_board_id = board_id or MONDAY_BOARD_ID
+    if not target_board_id:
+        print("[WARNING] MONDAY_BOARD_ID not set, skipping Monday.com lead creation")
+        return False
+    # Prepare item name (use full name or email)
+    item_name = email
+    if first_name and last_name:
+        item_name = f"{first_name} {last_name}"
+    elif first_name:
+        item_name = first_name
+    elif last_name:
+        item_name = last_name
+    # Fetch board columns to automatically match fields
+    print(f"[INFO] Fetching Monday.com board columns for automatic field matching...")
+    board_columns = await _get_board_columns(str(target_board_id))
+    if not board_columns:
+        print("[WARNING] Could not fetch board columns, skipping Monday.com lead creation")
+        return False
+    # Create a mapping of column IDs to column types for formatting
+    column_types = {col["id"]: col.get("type", "text") for col in board_columns}
+    # Prepare data fields to map
+    data_fields = {
+        "email": email,
+        "first_name": first_name,
+        "last_name": last_name,
+        "phone_number": phone_number,
+        "linkedin_url": linkedin_url,
+        "title": title,
+        "headline": headline,
+        "organization_name": organization_name,
+        "organization_website": organization_website,
+        "organization_address": organization_address,
+    }
+    # Automatically match fields to columns using semantic similarity
+    column_values = {}
+    matched_fields = []
+    # Track which columns have been matched to handle duplicates (e.g., first_name and last_name -> Name)
+    column_matches = {}  # column_id -> (field_name, value)
+    for field_name, field_value in data_fields.items():
+        if not field_value:
+            continue
+        match = _find_best_column_match(field_name, board_columns)
+        if match:
+            column_id, column_title, similarity = match
+            column_type = column_types.get(column_id, "text")
+            # Handle special case: if first_name and last_name both match to the same "Name" column
+            if column_id in column_matches:
+                existing_field, existing_value = column_matches[column_id]
+                # If both first_name and last_name match to the same column, combine them
+                if (field_name in ["first_name", "last_name"] and
+                    existing_field in ["first_name", "last_name"] and
+                    field_name != existing_field):
+                    # Combine first and last name
+                    if field_name == "first_name":
+                        combined_value = f"{field_value} {existing_value}"
+                    else:
+                        combined_value = f"{existing_value} {field_value}"
+                    formatted_value = _format_column_value(combined_value, column_type, column_id)
+                    column_values[column_id] = formatted_value
+                    matched_fields.append(f"{existing_field}+{field_name} -> {column_title} (combined)")
+                    print(f"[INFO] Combined '{existing_field}' and '{field_name}' to column '{column_title}' (ID: {column_id})")
+                    continue
+                else:
+                    # Different fields matching to same column - use the one with higher similarity
+                    print(f"[DEBUG] Column '{column_title}' already matched to '{existing_field}', skipping '{field_name}'")
+                    continue
+            formatted_value = _format_column_value(field_value, column_type, column_id)
+            column_values[column_id] = formatted_value
+            column_matches[column_id] = (field_name, field_value)
+            matched_fields.append(f"{field_name} -> {column_title} (similarity: {similarity:.2f})")
+            print(f"[INFO] Matched '{field_name}' to column '{column_title}' (ID: {column_id}, type: {column_type}, value: {formatted_value[:100] if len(str(formatted_value)) > 100 else formatted_value})")
+        else:
+            print(f"[DEBUG] No suitable column match found for '{field_name}' (skipping)")
+    if not column_values:
+        print("[WARNING] No fields could be matched to board columns")
+        return False
+    print(f"[INFO] Successfully matched {len(matched_fields)} fields to Monday.com columns")
+    # Convert column_values to JSON string for GraphQL mutation
+    # Monday.com expects column values as a JSON string where:
+    # - Text columns: plain string values
+    # - Email/Phone/Link columns: dict objects (properly JSON encoded)
+    column_values_json = json.dumps(column_values)
+    print(f"[DEBUG] Monday.com column_values JSON: {column_values_json[:500]}")
+    # GraphQL mutation
+    # Note: Monday.com uses ID! (string) type for board_id, not Int!
+    mutation = """
+    mutation ($boardId: ID!, $itemName: String!, $columnValues: JSON!) {
+        create_item (board_id: $boardId, item_name: $itemName, column_values: $columnValues) {
+            id
+        }
+    }
+    """
+    # Convert board_id to string (Monday.com expects ID! which is a string)
+    board_id_str = str(target_board_id)
+    variables = {
+        "boardId": board_id_str,
+        "itemName": item_name,
+        "columnValues": column_values_json
+    }
+    headers = {
+        "Authorization": MONDAY_API_KEY,
+        "Content-Type": "application/json"
+    }
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                MONDAY_API_URL,
+                json={
+                    "query": mutation,
+                    "variables": variables
+                },
+                headers=headers
+            )
+            if response.status_code == 200:
+                result = response.json()
+                if result.get("data") and result["data"].get("create_item"):
+                    item_id = result["data"]["create_item"].get("id")
+                    print(f"[INFO] Successfully created Monday.com lead: {item_name} (ID: {item_id})")
+                    return True
+                elif result.get("errors"):
+                    errors = result.get("errors", [])
+                    for error in errors:
+                        error_msg = error.get("message", "Unknown error")
+                        error_path = error.get("path", [])
+                        print(f"[ERROR] Monday.com API error: {error_msg}")
+                        if error_path:
+                            print(f"[ERROR] Error path: {error_path}")
+                    # Log full error for debugging
+                    print(f"[DEBUG] Full Monday.com error response: {json.dumps(errors, indent=2)}")
+                    return False
+                else:
+                    print(f"[ERROR] Unexpected Monday.com API response: {result}")
+                    return False
+            else:
+                error_data = response.text
+                print(f"[ERROR] Failed to create Monday.com lead: {response.status_code} - {error_data}")
+                return False
+    except httpx.HTTPStatusError as e:
+        print(f"[ERROR] Monday.com API HTTP error: {e.response.status_code} - {e.response.text}")
+        return False
+    except Exception as e:
+        print(f"[ERROR] Failed to create Monday.com lead: {str(e)}")
+        return False

backend/app/openrouter_client.py CHANGED Viewed

@@ -1,627 +1,862 @@
-import os
-import base64
-import json
-import re
-from io import BytesIO
-from typing import Any, Dict, List, Optional, Tuple
-from openai import OpenAI
-try:
-    import fitz  # PyMuPDF
-    from PIL import Image
-    PDF_SUPPORT = True
-except ImportError as e:
-    PDF_SUPPORT = False
-    print(f"[WARNING] PDF support libraries not available: {e}. PDF conversion will not work.")
-# OCR Model Configuration (from sample code)
-OCR_BASE_URL = os.environ.get("OCR_BASE_URL", "https://od5yev2behke5u-8000.proxy.runpod.net/v1")
-OCR_API_KEY = os.environ.get("OCR_API_KEY", "Ezofis@123")
-OCR_MODEL_NAME = os.environ.get("OCR_MODEL_NAME", "EZOFISOCR")
-# Initialize OpenAI client with OCR endpoint
-ocr_client = OpenAI(
-    base_url=OCR_BASE_URL,
-    api_key=OCR_API_KEY,
-)
-def _pdf_to_images(pdf_bytes: bytes) -> List[bytes]:
-    """
-    Convert PDF pages to PNG images.
-    Returns a list of PNG image bytes, one per page.
-    """
-    if not PDF_SUPPORT:
-        raise RuntimeError("PyMuPDF not installed. Cannot convert PDF to images.")
-    pdf_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
-    images = []
-    print(f"[INFO] PDF has {len(pdf_doc)} page(s)")
-    for page_num in range(len(pdf_doc)):
-        page = pdf_doc[page_num]
-        # Render page to image (zoom factor 2 for better quality)
-        mat = fitz.Matrix(2.0, 2.0)  # 2x zoom for better quality
-        pix = page.get_pixmap(matrix=mat)
-        # Convert to PIL Image then to JPEG bytes (better compression)
-        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
-        img_bytes = BytesIO()
-        img.save(img_bytes, format="JPEG", quality=95)
-        images.append(img_bytes.getvalue())
-        print(f"[INFO] Converted page {page_num + 1} to image ({pix.width}x{pix.height})")
-    pdf_doc.close()
-    return images
-def _image_bytes_to_base64(image_bytes: bytes) -> str:
-    """Convert image bytes to base64 data URL (JPEG format)."""
-    b64 = base64.b64encode(image_bytes).decode("utf-8")
-    data_url = f"data:image/jpeg;base64,{b64}"
-    print(f"[DEBUG] Base64 encoded image: {len(image_bytes)} bytes -> {len(data_url)} chars")
-    return data_url
-def _parse_markdown_table(text: str) -> Optional[Tuple[List[str], List[List[str]]]]:
-    """
-    Parse a markdown table from text.
-    Returns (headers, rows) if table found, None otherwise.
-    Handles various table formats including malformed ones.
-    """
-    lines = [line.strip() for line in text.split('\n')]
-    # Find potential table start (line with multiple | and actual text content)
-    table_start = None
-    for i, line in enumerate(lines):
-        if '|' in line and line.count('|') >= 2:
-            # Skip separator lines (only |, -, :, spaces)
-            if re.match(r'^[\s\|\-:]+$', line):
-                continue
-            # Check if line has meaningful text (not just | characters)
-            cells = [cell.strip() for cell in line.split('|')]
-            if cells and not cells[0]:
-                cells = cells[1:]
-            if cells and not cells[-1]:
-                cells = cells[:-1]
-            # Must have at least 2 columns with some text
-            meaningful_cells = [c for c in cells if len(c) > 0]
-            if len(meaningful_cells) >= 2:
-                table_start = i
-                break
-    if table_start is None:
-        return None
-    # Find table end (first non-empty line without | after table start)
-    table_end = None
-    for i in range(table_start + 1, len(lines)):
-        line = lines[i]
-        if not line:  # Empty line, continue
-            continue
-        if '|' not in line:
-            # Non-empty line without | means table ended
-            table_end = i
-            break
-    if table_end is None:
-        table_end = len(lines)
-    table_lines = lines[table_start:table_end]
-    # Find the actual header row (should have meaningful text, not just | or separators)
-    headers = None
-    header_idx = None
-    for i, line in enumerate(table_lines):
-        if not line or '|' not in line:
-            continue
-        # Skip separator lines (lines with only |, -, :, spaces)
-        if re.match(r'^[\s\|\-:]+$', line):
-            continue
-        # Check if this line has meaningful content (not just | characters)
-        cells = [cell.strip() for cell in line.split('|')]
-        # Remove empty cells at start/end
-        if cells and not cells[0]:
-            cells = cells[1:]
-        if cells and not cells[-1]:
-            cells = cells[:-1]
-        # Header should have at least 3 columns and meaningful text
-        if len(cells) >= 3:
-            # Check if cells have actual text (not just empty or single char)
-            meaningful_cells = [c for c in cells if len(c) > 1]
-            if len(meaningful_cells) >= 3:
-                headers = cells
-                header_idx = i
-                break
-    if not headers or header_idx is None:
-        return None
-    # Parse data rows (skip separator line after header if present)
-    rows = []
-    num_columns = len(headers)
-    for i in range(header_idx + 1, len(table_lines)):
-        line = table_lines[i]
-        if not line:
-            continue
-        # Skip separator lines
-        if re.match(r'^[\s\|\-:]+$', line):
-            continue
-        if '|' not in line:
-            # No more table rows
-            break
-        cells = [cell.strip() for cell in line.split('|')]
-        # Remove empty cells at start/end
-        if cells and not cells[0]:
-            cells = cells[1:]
-        if cells and not cells[-1]:
-            cells = cells[:-1]
-        # Only add rows that match header column count (allow some flexibility)
-        if len(cells) == num_columns or (len(cells) >= num_columns - 1 and len(cells) <= num_columns + 1):
-            # Pad or trim to match header count
-            if len(cells) < num_columns:
-                cells.extend([''] * (num_columns - len(cells)))
-            elif len(cells) > num_columns:
-                cells = cells[:num_columns]
-            # Only add if row has at least one non-empty cell
-            if any(cell for cell in cells):
-                rows.append(cells)
-    if not rows:
-        return None
-    return (headers, rows)
-def _extract_metadata(text: str) -> Dict[str, str]:
-    """
-    Extract metadata from document header text.
-    Looks for title, office, notice number, and description.
-    """
-    metadata = {
-        "title": "",
-        "office": "",
-        "notice_no": "",
-        "description": ""
-    }
-    lines = [line.strip() for line in text.split('\n') if line.strip()]
-    # Extract office (usually first non-empty line)
-    if lines:
-        metadata["office"] = lines[0]
-    # Look for notice number pattern (like "पत्रक सं- 1239" or "सं- 1239")
-    notice_pattern = r'(?:पत्रक\s+)?सं[-\s:]*(\d+)'
-    for line in lines[:10]:  # Check first 10 lines
-        match = re.search(notice_pattern, line)
-        if match:
-            metadata["notice_no"] = match.group(1)
-            break
-    # Look for title - usually in quotes or contains specific keywords
-    # Check for quoted text first
-    quoted_title = re.search(r'["""]([^"""]+)["""]', text[:1000])
-    if quoted_title:
-        metadata["title"] = quoted_title.group(1).strip()
-    else:
-        # Look for title patterns
-        title_keywords = ['सम्पत्ति', 'सूचना', 'विज्ञप्ति', 'नाम परिवर्तन']
-        for line in lines[:5]:
-            if any(keyword in line for keyword in title_keywords):
-                # Extract the title phrase
-                title_match = re.search(r'(सम्पत्ति[^।]*|सूचना[^।]*|विज्ञप्ति[^।]*)', line)
-                if title_match:
-                    metadata["title"] = title_match.group(1).strip()
-                    break
-    # Extract description (text before table, usually contains key phrases)
-    description_keywords = ['नाम परिवर्तन', 'अधिनियम', 'धारा', 'प्रकाशन', 'आवेदन']
-    description_parts = []
-    for i, line in enumerate(lines[:15]):  # Check first 15 lines
-        if any(keyword in line for keyword in description_keywords):
-            description_parts.append(line)
-            # Get a few surrounding lines for context
-            if i > 0:
-                description_parts.insert(0, lines[i-1])
-            if i < len(lines) - 1:
-                description_parts.append(lines[i+1])
-            break
-    if description_parts:
-        description = ' '.join(description_parts).strip()
-        if len(description) > 30:  # Only if substantial
-            # Clean up and limit length
-            description = re.sub(r'\s+', ' ', description)
-            metadata["description"] = description[:300]  # Limit length
-    return metadata
-def _extract_footer_notes(text: str) -> List[str]:
-    """
-    Extract footer notes from document.
-    Usually appears after the table.
-    """
-    notes = []
-    # Find table end
-    lines = text.split('\n')
-    table_end_idx = len(lines)
-    for i, line in enumerate(lines):
-        if '|' in line:
-            # Find last table line
-            j = i + 1
-            while j < len(lines) and ('|' in lines[j] or re.match(r'^[\s\|\-:]+$', lines[j])):
-                j += 1
-            table_end_idx = j
-            break
-    # Extract footer text (after table)
-    footer_lines = lines[table_end_idx:]
-    footer_text = '\n'.join(footer_lines).strip()
-    # Split into sentences/notes
-    # Look for sentences ending with period, exclamation, or specific keywords
-    sentences = re.split(r'[।\.!]\s+', footer_text)
-    for sentence in sentences:
-        sentence = sentence.strip()
-        if len(sentence) > 20:  # Only substantial notes
-            # Clean up
-            sentence = re.sub(r'\s+', ' ', sentence)
-            if sentence:
-                notes.append(sentence)
-    # Limit to most relevant notes (usually 2-4)
-    return notes[:5]
-def _parse_text_with_tables(text: str) -> Dict[str, Any]:
-    """
-    Parse text and extract structured data including tables.
-    Returns structured JSON format with metadata, table, and footer_notes.
-    """
-    result = {
-        "text": text,  # Keep original text
-        "metadata": {},
-        "table": [],
-        "footer_notes": []
-    }
-    # Check if text contains a table
-    table_data = _parse_markdown_table(text)
-    if table_data:
-        headers, rows = table_data
-        print(f"[INFO] Found table with {len(headers)} columns and {len(rows)} rows")
-        # Extract metadata
-        result["metadata"] = _extract_metadata(text)
-        # Map headers to field names using original header text
-        # Keep original language, just make valid JSON keys and handle duplicates
-        header_mapping = {}
-        header_counts = {}  # Track occurrences of each header
-        for i, header in enumerate(headers):
-            header_clean = header.strip()
-            # Create a valid JSON key from the original header
-            # Remove special characters that aren't valid in JSON keys, but keep the text
-            # Replace spaces and special chars with underscores, but preserve the original text
-            header_key = header_clean
-            # Track how many times we've seen this exact header
-            if header_key not in header_counts:
-                header_counts[header_key] = 0
-            header_counts[header_key] += 1
-            # If this header appears multiple times, append a number
-            if header_counts[header_key] > 1:
-                header_key = f"{header_key}_{header_counts[header_key]}"
-            # Clean the key to be valid for JSON (remove/replace problematic characters)
-            # Keep the original text but make it JSON-safe
-            header_key = re.sub(r'[^\w\s\u0900-\u097F]', '', header_key)  # Keep Unicode Hindi chars
-            header_key = re.sub(r'\s+', '_', header_key)  # Replace spaces with underscores
-            # If key is empty after cleaning, use column index
-            if not header_key:
-                header_key = f"column_{i+1}"
-            header_mapping[i] = header_key
-        # Parse table rows - each row becomes a separate section
-        table_rows_dict = {}
-        for idx, row in enumerate(rows, start=1):
-            row_dict = {}
-            for i, header_idx in header_mapping.items():
-                if i < len(row):
-                    row_dict[header_idx] = row[i].strip()
-            if row_dict:
-                # Each row is a separate section: row_1, row_2, etc.
-                table_rows_dict[f"row_{idx}"] = row_dict
-        # Store rows as separate sections instead of array
-        result["table"] = table_rows_dict
-        # Extract footer notes
-        result["footer_notes"] = _extract_footer_notes(text)
-    else:
-        # No table found, just extract basic metadata
-        result["metadata"] = _extract_metadata(text)
-        result["footer_notes"] = _extract_footer_notes(text)
-    return result
-async def _extract_text_with_ocr(image_bytes: bytes, page_num: int, total_pages: int) -> Dict[str, Any]:
-    """
-    Extract text from a single page/image using the OCR model.
-    Returns text output in full_text field, keeps fields empty for now.
-    """
-    # Convert image bytes to base64 data URL
-    data_url = _image_bytes_to_base64(image_bytes)
-    print(f"[INFO] OCR: Processing page {page_num}/{total_pages} with model {OCR_MODEL_NAME}")
-    try:
-        # Use OpenAI client with OCR endpoint (as per sample code)
-        import asyncio
-        loop = asyncio.get_event_loop()
-        # Run the synchronous OpenAI call in executor
-        response = await loop.run_in_executor(
-            None,
-            lambda: ocr_client.chat.completions.create(
-                model=OCR_MODEL_NAME,
-                messages=[
-                    {
-                        "role": "user",
-                        "content": [
-                            {"type": "text", "text": "Extract all text from this image"},
-                            {
-                                "type": "image_url",
-                                "image_url": {
-                                    "url": data_url
-                                }
-                            }
-                        ]
-                    }
-                ],
-            )
-        )
-        # Extract text from response
-        extracted_text = response.choices[0].message.content
-        if not extracted_text:
-            extracted_text = ""
-        print(f"[INFO] OCR: Extracted {len(extracted_text)} characters from page {page_num}")
-        # Calculate confidence based on response quality
-        confidence = _calculate_ocr_confidence(response, extracted_text)
-        # Return text in full_text, keep fields empty for now
-        return {
-            "doc_type": "other",
-            "confidence": confidence,
-            "full_text": extracted_text,
-            "fields": {}  # Keep fields empty for now
-        }
-    except Exception as e:
-        error_msg = str(e)
-        print(f"[ERROR] OCR API error for page {page_num}: {error_msg}")
-        raise RuntimeError(f"OCR API error for page {page_num}: {error_msg}")
-def _calculate_ocr_confidence(response, extracted_text: str) -> float:
-    """
-    Calculate confidence score based on OCR response quality.
-    Checks for explicit confidence in response, or calculates based on heuristics.
-    """
-    # Check if response has explicit confidence score
-    try:
-        # Check response object for confidence-related fields
-        if hasattr(response, 'usage'):
-            # Some models provide usage info that might indicate quality
-            usage = response.usage
-            if hasattr(usage, 'completion_tokens') and usage.completion_tokens > 0:
-                # More tokens might indicate better extraction
-                pass
-        # Check if finish_reason indicates quality
-        if hasattr(response.choices[0], 'finish_reason'):
-            finish_reason = response.choices[0].finish_reason
-            if finish_reason == "stop":
-                # Normal completion - good sign
-                base_confidence = 85.0
-            elif finish_reason == "length":
-                # Response was truncated - lower confidence
-                base_confidence = 70.0
-            else:
-                base_confidence = 75.0
-        else:
-            base_confidence = 85.0
-    except Exception:
-        base_confidence = 85.0
-    # Adjust confidence based on text quality heuristics
-    text_length = len(extracted_text.strip())
-    if text_length == 0:
-        return 0.0
-    elif text_length < 10:
-        # Very short text - might be error or empty
-        return max(30.0, base_confidence - 30.0)
-    elif text_length < 50:
-        # Short text
-        return max(50.0, base_confidence - 15.0)
-    elif text_length > 1000:
-        # Long text - likely good extraction
-        confidence = min(95.0, base_confidence + 10.0)
-    else:
-        confidence = base_confidence
-    # Check for structured content (tables, etc.) - indicates good extraction
-    if '|' in extracted_text and extracted_text.count('|') > 5:
-        # Table detected - boost confidence
-        confidence = min(95.0, confidence + 5.0)
-    # Check for meaningful content (non-whitespace ratio)
-    non_whitespace = len([c for c in extracted_text if not c.isspace()])
-    if text_length > 0:
-        content_ratio = non_whitespace / text_length
-        if content_ratio > 0.8:
-            # High content ratio - good
-            confidence = min(95.0, confidence + 3.0)
-        elif content_ratio < 0.3:
-            # Low content ratio - mostly whitespace
-            confidence = max(50.0, confidence - 10.0)
-    return round(confidence, 1)
-async def extract_fields_from_document(
-    file_bytes: bytes,
-    content_type: str,
-    filename: str,
-) -> Dict[str, Any]:
-    """
-    Extract text from document using OCR model.
-    Processes pages separately for better reliability.
-    Returns text output in full_text, keeps JSON/XML fields empty for now.
-    """
-    # Get raw image bytes for processing
-    if content_type == "application/pdf" or content_type.endswith("/pdf"):
-        if not PDF_SUPPORT:
-            raise RuntimeError("PDF support requires PyMuPDF. Please install it.")
-        # For PDFs, convert to images
-        pdf_images = _pdf_to_images(file_bytes)
-        image_bytes_list = pdf_images
-    else:
-        # For regular images, process the file bytes
-        # Convert to JPEG for consistency
-        try:
-            img = Image.open(BytesIO(file_bytes))
-            if img.mode != "RGB":
-                img = img.convert("RGB")
-            # Resize if too large (max 1920px on longest side)
-            max_size = 1920
-            w, h = img.size
-            if w > max_size or h > max_size:
-                if w > h:
-                    new_w = max_size
-                    new_h = int(h * (max_size / w))
-                else:
-                    new_h = max_size
-                    new_w = int(w * (max_size / h))
-                img = img.resize((new_w, new_h), Image.LANCZOS)
-                print(f"[INFO] Resized image from {w}x{h} to {new_w}x{new_h}")
-            # Convert to JPEG bytes
-            img_bytes = BytesIO()
-            img.save(img_bytes, format="JPEG", quality=95)
-            image_bytes_list = [img_bytes.getvalue()]
-        except Exception as e:
-            # Fallback: use original file bytes
-            print(f"[WARNING] Could not process image with PIL: {e}. Using original bytes.")
-        image_bytes_list = [file_bytes]
-    total_pages = len(image_bytes_list)
-    print(f"[INFO] Processing {total_pages} page(s) with OCR model...")
-    # Process each page separately
-    page_results = []
-    for page_num, img_bytes in enumerate(image_bytes_list):
-        print(f"[INFO] Processing page {page_num + 1}/{total_pages}...")
-        try:
-            page_result = await _extract_text_with_ocr(img_bytes, page_num + 1, total_pages)
-            page_results.append({
-                "page_number": page_num + 1,
-                "text": page_result.get("full_text", ""),
-                "fields": page_result.get("fields", {}),
-                "confidence": page_result.get("confidence", 0),
-                "doc_type": page_result.get("doc_type", "other"),
-            })
-            print(f"[INFO] Page {page_num + 1} processed successfully")
-        except Exception as e:
-            print(f"[ERROR] Failed to process page {page_num + 1}: {e}")
-            page_results.append({
-                "page_number": page_num + 1,
-                "text": "",
-                "fields": {},
-                "confidence": 0,
-                "error": str(e)
-            })
-    # Combine results from all pages
-    combined_full_text = "\n\n".join([f"=== PAGE {p['page_number']} ===\n\n{p['text']}" for p in page_results if p.get("text")])
-    # Parse each page for tables and structure the output
-    structured_pages = {}
-    for page_result in page_results:
-        if page_result.get("text"):
-            page_num = page_result.get("page_number", 1)
-            page_text = page_result.get("text", "")
-            # Parse text for tables and structure
-            parsed_data = _parse_text_with_tables(page_text)
-            # Build structured page output
-            page_key = f"page_{page_num}"
-            structured_pages[page_key] = {
-                "text": parsed_data["text"],
-                "metadata": parsed_data["metadata"],
-                "table": parsed_data["table"],
-                "footer_notes": parsed_data["footer_notes"],
-                "confidence": page_result.get("confidence", 0),
-                "doc_type": page_result.get("doc_type", "other")
-            }
-    # If we have structured pages, use them; otherwise keep fields empty
-    if structured_pages:
-        # Always return pages with page_X keys (even for single page)
-        combined_fields = structured_pages
-    else:
-        combined_fields = {}
-    # Calculate average confidence
-    confidences = [p.get("confidence", 0) for p in page_results if p.get("confidence", 0) > 0]
-    avg_confidence = sum(confidences) / len(confidences) if confidences else 0
-    # Determine doc_type from first successful page
-    doc_type = "other"
-    for page_result in page_results:
-        if page_result.get("doc_type") and page_result["doc_type"] != "other":
-            doc_type = page_result["doc_type"]
-            break
-    return {
-        "doc_type": doc_type,
-        "confidence": avg_confidence,
-        "full_text": combined_full_text,
-        "fields": combined_fields,  # Now contains structured data with tables
-        "pages": page_results
-    }

+import os
+import base64
+import json
+import re
+import time
+import asyncio
+from io import BytesIO
+from typing import Any, Dict, List, Optional, Tuple
+import httpx
+try:
+    import fitz  # PyMuPDF
+    from PIL import Image
+    PDF_SUPPORT = True
+except ImportError as e:
+    PDF_SUPPORT = False
+    print(f"[WARNING] PDF support libraries not available: {e}. PDF conversion will not work.")
+# RunPod Serverless OCR Configuration
+RUNPOD_ENDPOINT = os.environ.get("RUNPOD_ENDPOINT", "https://api.runpod.ai/v2/j2jvf8t6n0rk5c/run")
+RUNPOD_API_KEY = os.environ.get("RUNPOD_API_KEY", "rpa_0UJOK33ZO7SID9B3ASFSKKPUHNPBQC5Z2128RB4O4qi9ts")
+# Extract endpoint ID from endpoint URL for status polling
+# URL format: https://api.runpod.ai/v2/{endpoint_id}/run
+_endpoint_id = RUNPOD_ENDPOINT.split("/v2/")[1].split("/")[0] if "/v2/" in RUNPOD_ENDPOINT else None
+RUNPOD_STATUS_ENDPOINT = f"https://api.runpod.ai/v2/{_endpoint_id}/status" if _endpoint_id else None
+def _pdf_to_images(pdf_bytes: bytes) -> List[bytes]:
+    """
+    Convert PDF pages to PNG images.
+    Returns a list of PNG image bytes, one per page.
+    """
+    if not PDF_SUPPORT:
+        raise RuntimeError("PyMuPDF not installed. Cannot convert PDF to images.")
+    pdf_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+    images = []
+    print(f"[INFO] PDF has {len(pdf_doc)} page(s)")
+    for page_num in range(len(pdf_doc)):
+        page = pdf_doc[page_num]
+        # Render page to image (zoom factor 2 for better quality)
+        mat = fitz.Matrix(2.0, 2.0)  # 2x zoom for better quality
+        pix = page.get_pixmap(matrix=mat)
+        # Convert to PIL Image
+        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        # Resize if too large to avoid GPU memory issues (max 1920px on longest side)
+        max_size = 1920
+        w, h = img.size
+        if w > max_size or h > max_size:
+            if w > h:
+                new_w = max_size
+                new_h = int(h * (max_size / w))
+            else:
+                new_h = max_size
+                new_w = int(w * (max_size / h))
+            img = img.resize((new_w, new_h), Image.LANCZOS)
+            print(f"[INFO] Resized page {page_num + 1} from {w}x{h} to {new_w}x{new_h}")
+        else:
+            print(f"[INFO] Converted page {page_num + 1} to image ({w}x{h})")
+        # Convert to JPEG bytes (better compression)
+        img_bytes = BytesIO()
+        img.save(img_bytes, format="JPEG", quality=95)
+        images.append(img_bytes.getvalue())
+    pdf_doc.close()
+    return images
+def _image_bytes_to_base64(image_bytes: bytes) -> str:
+    """Convert image bytes to base64 data URL (JPEG format)."""
+    b64 = base64.b64encode(image_bytes).decode("utf-8")
+    data_url = f"data:image/jpeg;base64,{b64}"
+    print(f"[DEBUG] Base64 encoded image: {len(image_bytes)} bytes -> {len(data_url)} chars")
+    return data_url
+def _parse_markdown_table(text: str) -> Optional[Tuple[List[str], List[List[str]]]]:
+    """
+    Parse a markdown table from text.
+    Returns (headers, rows) if table found, None otherwise.
+    Handles various table formats including malformed ones.
+    """
+    lines = [line.strip() for line in text.split('\n')]
+    # Find potential table start (line with multiple | and actual text content)
+    table_start = None
+    for i, line in enumerate(lines):
+        if '|' in line and line.count('|') >= 2:
+            # Skip separator lines (only |, -, :, spaces)
+            if re.match(r'^[\s\|\-:]+$', line):
+                continue
+            # Check if line has meaningful text (not just | characters)
+            cells = [cell.strip() for cell in line.split('|')]
+            if cells and not cells[0]:
+                cells = cells[1:]
+            if cells and not cells[-1]:
+                cells = cells[:-1]
+            # Must have at least 2 columns with some text
+            meaningful_cells = [c for c in cells if len(c) > 0]
+            if len(meaningful_cells) >= 2:
+                table_start = i
+                break
+    if table_start is None:
+        return None
+    # Find table end (first non-empty line without | after table start)
+    table_end = None
+    for i in range(table_start + 1, len(lines)):
+        line = lines[i]
+        if not line:  # Empty line, continue
+            continue
+        if '|' not in line:
+            # Non-empty line without | means table ended
+            table_end = i
+            break
+    if table_end is None:
+        table_end = len(lines)
+    table_lines = lines[table_start:table_end]
+    # Find the actual header row (should have meaningful text, not just | or separators)
+    headers = None
+    header_idx = None
+    for i, line in enumerate(table_lines):
+        if not line or '|' not in line:
+            continue
+        # Skip separator lines (lines with only |, -, :, spaces)
+        if re.match(r'^[\s\|\-:]+$', line):
+            continue
+        # Check if this line has meaningful content (not just | characters)
+        cells = [cell.strip() for cell in line.split('|')]
+        # Remove empty cells at start/end
+        if cells and not cells[0]:
+            cells = cells[1:]
+        if cells and not cells[-1]:
+            cells = cells[:-1]
+        # Header should have at least 3 columns and meaningful text
+        if len(cells) >= 3:
+            # Check if cells have actual text (not just empty or single char)
+            meaningful_cells = [c for c in cells if len(c) > 1]
+            if len(meaningful_cells) >= 3:
+                headers = cells
+                header_idx = i
+                break
+    if not headers or header_idx is None:
+        return None
+    # Parse data rows (skip separator line after header if present)
+    rows = []
+    num_columns = len(headers)
+    for i in range(header_idx + 1, len(table_lines)):
+        line = table_lines[i]
+        if not line:
+            continue
+        # Skip separator lines
+        if re.match(r'^[\s\|\-:]+$', line):
+            continue
+        if '|' not in line:
+            # No more table rows
+            break
+        cells = [cell.strip() for cell in line.split('|')]
+        # Remove empty cells at start/end
+        if cells and not cells[0]:
+            cells = cells[1:]
+        if cells and not cells[-1]:
+            cells = cells[:-1]
+        # Only add rows that match header column count (allow some flexibility)
+        if len(cells) == num_columns or (len(cells) >= num_columns - 1 and len(cells) <= num_columns + 1):
+            # Pad or trim to match header count
+            if len(cells) < num_columns:
+                cells.extend([''] * (num_columns - len(cells)))
+            elif len(cells) > num_columns:
+                cells = cells[:num_columns]
+            # Only add if row has at least one non-empty cell
+            if any(cell for cell in cells):
+                rows.append(cells)
+    if not rows:
+        return None
+    return (headers, rows)
+def _extract_metadata(text: str) -> Dict[str, str]:
+    """
+    Extract metadata from document header text.
+    Looks for title, office, notice number, and description.
+    """
+    metadata = {
+        "title": "",
+        "office": "",
+        "notice_no": "",
+        "description": ""
+    }
+    lines = [line.strip() for line in text.split('\n') if line.strip()]
+    # Extract office (usually first non-empty line)
+    if lines:
+        metadata["office"] = lines[0]
+    # Look for notice number pattern (like "पत्रक सं- 1239" or "सं- 1239")
+    notice_pattern = r'(?:पत्रक\s+)?सं[-\s:]*(\d+)'
+    for line in lines[:10]:  # Check first 10 lines
+        match = re.search(notice_pattern, line)
+        if match:
+            metadata["notice_no"] = match.group(1)
+            break
+    # Look for title - usually in quotes or contains specific keywords
+    # Check for quoted text first
+    quoted_title = re.search(r'["""]([^"""]+)["""]', text[:1000])
+    if quoted_title:
+        metadata["title"] = quoted_title.group(1).strip()
+    else:
+        # Look for title patterns
+        title_keywords = ['सम्पत्ति', 'सूचना', 'विज्ञप्ति', 'नाम परिवर्तन']
+        for line in lines[:5]:
+            if any(keyword in line for keyword in title_keywords):
+                # Extract the title phrase
+                title_match = re.search(r'(सम्पत्ति[^।]*|सूचना[^।]*|विज्ञप्ति[^।]*)', line)
+                if title_match:
+                    metadata["title"] = title_match.group(1).strip()
+                    break
+    # Extract description (text before table, usually contains key phrases)
+    description_keywords = ['नाम परिवर्तन', 'अधिनियम', 'धारा', 'प्रकाशन', 'आवेदन']
+    description_parts = []
+    for i, line in enumerate(lines[:15]):  # Check first 15 lines
+        if any(keyword in line for keyword in description_keywords):
+            description_parts.append(line)
+            # Get a few surrounding lines for context
+            if i > 0:
+                description_parts.insert(0, lines[i-1])
+            if i < len(lines) - 1:
+                description_parts.append(lines[i+1])
+            break
+    if description_parts:
+        description = ' '.join(description_parts).strip()
+        if len(description) > 30:  # Only if substantial
+            # Clean up and limit length
+            description = re.sub(r'\s+', ' ', description)
+            metadata["description"] = description[:300]  # Limit length
+    return metadata
+def _parse_model_response(response_text: str) -> Tuple[str, Dict[str, Any]]:
+    """
+    Parse model response to extract text and metadata.
+    The model may return text and metadata in various formats.
+    Returns: (extracted_text, metadata_dict)
+    """
+    metadata = {}
+    text = response_text
+    # Try to find JSON metadata section
+    # Look for METADATA: or metadata: section
+    metadata_patterns = [
+        r'METADATA:\s*\n?\s*({.*?})(?:\n\n|\nTEXT|$)',
+        r'metadata:\s*\n?\s*({.*?})(?:\n\n|\nTEXT|$)',
+        r'METADATA:\s*\n?\s*```json\s*({.*?})\s*```',
+        r'METADATA:\s*\n?\s*```\s*({.*?})\s*```',
+    ]
+    for pattern in metadata_patterns:
+        match = re.search(pattern, response_text, re.DOTALL | re.IGNORECASE)
+        if match:
+            try:
+                metadata_json = match.group(1).strip()
+                metadata = json.loads(metadata_json)
+                # Remove metadata section from text
+                text = response_text[:match.start()] + response_text[match.end():]
+                break
+            except (json.JSONDecodeError, IndexError):
+                continue
+    # If no JSON found, try to extract metadata from structured text format
+    if not metadata:
+        # Look for key-value pairs in METADATA section
+        metadata_section = re.search(r'METADATA:\s*\n(.*?)(?:\n\n|\nTEXT|$)', response_text, re.DOTALL | re.IGNORECASE)
+        if metadata_section:
+            metadata_text = metadata_section.group(1)
+            # Parse key-value pairs
+            for line in metadata_text.split('\n'):
+                if ':' in line:
+                    parts = line.split(':', 1)
+                    if len(parts) == 2:
+                        key = parts[0].strip().lower().replace(' ', '_')
+                        value = parts[1].strip()
+                        if value:
+                            metadata[key] = value
+    # Extract TEXT section if present
+    text_match = re.search(r'TEXT:\s*\n(.*?)(?:\n\nMETADATA|$)', response_text, re.DOTALL | re.IGNORECASE)
+    if text_match:
+        text = text_match.group(1).strip()
+    else:
+        # If no TEXT section, remove METADATA section if found
+        text = re.sub(r'METADATA:.*', '', response_text, flags=re.DOTALL | re.IGNORECASE).strip()
+    # Clean up text
+    text = text.strip()
+    # Clean up metadata - remove empty values
+    metadata = {k: v for k, v in metadata.items() if v and str(v).strip()}
+    return text, metadata
+def _extract_footer_notes(text: str) -> List[str]:
+    """
+    Extract footer notes from document.
+    Usually appears after the table.
+    """
+    notes = []
+    # Find table end
+    lines = text.split('\n')
+    table_end_idx = len(lines)
+    for i, line in enumerate(lines):
+        if '|' in line:
+            # Find last table line
+            j = i + 1
+            while j < len(lines) and ('|' in lines[j] or re.match(r'^[\s\|\-:]+$', lines[j])):
+                j += 1
+            table_end_idx = j
+            break
+    # Extract footer text (after table)
+    footer_lines = lines[table_end_idx:]
+    footer_text = '\n'.join(footer_lines).strip()
+    # Split into sentences/notes
+    # Look for sentences ending with period, exclamation, or specific keywords
+    sentences = re.split(r'[।\.!]\s+', footer_text)
+    for sentence in sentences:
+        sentence = sentence.strip()
+        if len(sentence) > 20:  # Only substantial notes
+            # Clean up
+            sentence = re.sub(r'\s+', ' ', sentence)
+            if sentence:
+                notes.append(sentence)
+    # Limit to most relevant notes (usually 2-4)
+    return notes[:5]
+def _parse_text_with_tables(text: str, page_metadata: Dict[str, Any] = None) -> Dict[str, Any]:
+    """
+    Parse text and extract structured data including tables.
+    Uses model-extracted metadata if provided, otherwise falls back to basic extraction.
+    Returns structured JSON format with metadata, table, and footer_notes.
+    """
+    result = {
+        "text": text,  # Keep original text
+        "metadata": page_metadata if page_metadata else {},
+        "table": [],
+        "footer_notes": []
+    }
+    # Check if text contains a table
+    table_data = _parse_markdown_table(text)
+    if table_data:
+        headers, rows = table_data
+        print(f"[INFO] Found table with {len(headers)} columns and {len(rows)} rows")
+        # Use provided metadata or extract basic metadata as fallback
+        if not result["metadata"]:
+            result["metadata"] = _extract_metadata(text)
+        # Map headers to field names using original header text
+        # Keep original language, just make valid JSON keys and handle duplicates
+        header_mapping = {}
+        header_counts = {}  # Track occurrences of each header
+        for i, header in enumerate(headers):
+            header_clean = header.strip()
+            # Create a valid JSON key from the original header
+            # Remove special characters that aren't valid in JSON keys, but keep the text
+            # Replace spaces and special chars with underscores, but preserve the original text
+            header_key = header_clean
+            # Track how many times we've seen this exact header
+            if header_key not in header_counts:
+                header_counts[header_key] = 0
+            header_counts[header_key] += 1
+            # If this header appears multiple times, append a number
+            if header_counts[header_key] > 1:
+                header_key = f"{header_key}_{header_counts[header_key]}"
+            # Clean the key to be valid for JSON (remove/replace problematic characters)
+            # Keep the original text but make it JSON-safe
+            header_key = re.sub(r'[^\w\s\u0900-\u097F]', '', header_key)  # Keep Unicode Hindi chars
+            header_key = re.sub(r'\s+', '_', header_key)  # Replace spaces with underscores
+            # If key is empty after cleaning, use column index
+            if not header_key:
+                header_key = f"column_{i+1}"
+            header_mapping[i] = header_key
+        # Parse table rows - each row becomes a separate section
+        table_rows_dict = {}
+        for idx, row in enumerate(rows, start=1):
+            row_dict = {}
+            for i, header_idx in header_mapping.items():
+                if i < len(row):
+                    row_dict[header_idx] = row[i].strip()
+            if row_dict:
+                # Each row is a separate section: row_1, row_2, etc.
+                table_rows_dict[f"row_{idx}"] = row_dict
+        # Store rows as separate sections instead of array
+        result["table"] = table_rows_dict
+        # Extract footer notes
+        result["footer_notes"] = _extract_footer_notes(text)
+    else:
+        # No table found, just extract basic metadata
+        result["metadata"] = _extract_metadata(text)
+        result["footer_notes"] = _extract_footer_notes(text)
+    return result
+async def _poll_runpod_job(job_id: str, client: httpx.AsyncClient, max_wait_time: int = 300) -> Dict[str, Any]:
+    """
+    Poll RunPod job status until completion.
+    Returns the final job result with output.
+    """
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {RUNPOD_API_KEY}"
+    }
+    start_time = time.time()
+    poll_interval = 2  # Poll every 2 seconds
+    while True:
+        # Check timeout
+        elapsed = time.time() - start_time
+        if elapsed > max_wait_time:
+            raise RuntimeError(f"Job {job_id} timed out after {max_wait_time} seconds")
+        # Poll job status
+        status_url = f"{RUNPOD_STATUS_ENDPOINT}/{job_id}"
+        response = await client.get(status_url, headers=headers)
+        response.raise_for_status()
+        status_result = response.json()
+        status = status_result.get("status", "").upper()
+        if status == "COMPLETED":
+            print(f"[INFO] Job {job_id} completed successfully")
+            return status_result
+        elif status == "FAILED":
+            error_msg = status_result.get("error", "Unknown error")
+            raise RuntimeError(f"Job {job_id} failed: {error_msg}")
+        elif status in ["IN_QUEUE", "IN_PROGRESS"]:
+            print(f"[INFO] Job {job_id} status: {status}, waiting...")
+            await asyncio.sleep(poll_interval)
+        else:
+            # Unknown status, wait and retry
+            print(f"[INFO] Job {job_id} status: {status}, waiting...")
+            await asyncio.sleep(poll_interval)
+async def _extract_text_with_ocr(image_bytes: bytes, page_num: int, total_pages: int, custom_prompt: str = None) -> Dict[str, Any]:
+    """
+    Extract text and metadata from a single page/image using the RunPod serverless OCR model.
+    Uses model-driven extraction to identify and extract metadata fields dynamically.
+    Returns text output in full_text field and extracted metadata.
+    Args:
+        image_bytes: Image bytes to process
+        page_num: Page number
+        total_pages: Total number of pages
+        custom_prompt: Optional custom prompt for field extraction
+    """
+    # Convert image bytes to base64
+    image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+    print(f"[INFO] OCR: Processing page {page_num}/{total_pages} with RunPod endpoint")
+    try:
+        # Use custom prompt if provided, otherwise use default
+        if custom_prompt:
+            metadata_prompt = custom_prompt
+        else:
+            # Default prompt for general text extraction
+            metadata_prompt = """Extract all text from this image."""
+        # Prepare request payload for RunPod
+        # RunPod serverless endpoints expect image_base64, image_url, or image_path
+        payload = {
+            "input": {
+                "prompt": metadata_prompt,
+                "image_base64": image_base64  # Base64 encoded image
+            }
+        }
+        # Make HTTP request to RunPod endpoint
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {RUNPOD_API_KEY}"
+        }
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            # Submit job
+            response = await client.post(
+                RUNPOD_ENDPOINT,
+                headers=headers,
+                json=payload
+            )
+            response.raise_for_status()
+            result = response.json()
+            # Check if this is an async job (has job ID and status)
+            job_id = result.get("id")
+            status = result.get("status", "").upper()
+            if job_id and status in ["IN_QUEUE", "IN_PROGRESS"]:
+                # This is an async job, need to poll for completion
+                print(f"[INFO] Job submitted with ID: {job_id}, status: {status}")
+                if not RUNPOD_STATUS_ENDPOINT:
+                    raise RuntimeError("RunPod status endpoint not configured. Cannot poll async job.")
+                # Poll until completion
+                result = await _poll_runpod_job(job_id, client)
+            # Extract text from RunPod response
+            # RunPod serverless typically returns: {"id": "...", "status": "...", "output": "..."}
+            # The output might be a string or a dict depending on the model
+            extracted_text = ""
+            if "output" in result:
+                output = result["output"]
+                if isinstance(output, str):
+                    extracted_text = output
+                elif isinstance(output, dict):
+                    # If output is a dict, try common fields
+                    extracted_text = output.get("text", output.get("result", output.get("content", "")))
+                    if not extracted_text and isinstance(output.get("text"), str):
+                        extracted_text = output["text"]
+                elif isinstance(output, list) and len(output) > 0:
+                    # If output is a list, take the first element
+                    extracted_text = str(output[0])
+            elif "result" in result:
+                extracted_text = str(result["result"])
+            elif "text" in result:
+                extracted_text = str(result["text"])
+            else:
+                # Fallback: convert entire response to string
+                extracted_text = str(result)
+            if not extracted_text:
+                extracted_text = ""
+            print(f"[INFO] OCR: Extracted {len(extracted_text)} characters from page {page_num}")
+            # Parse model response to extract text and metadata
+            parsed_text, parsed_metadata = _parse_model_response(extracted_text)
+            # Calculate confidence based on response quality
+            # Create a mock response object for compatibility with confidence calculation
+            mock_response = type('obj', (object,), {
+                'choices': [type('obj', (object,), {'finish_reason': 'stop'})()],
+                'usage': type('obj', (object,), {'completion_tokens': len(parsed_text.split())})()
+            })()
+            confidence = _calculate_ocr_confidence(mock_response, parsed_text)
+            # Determine document type from metadata if available
+            doc_type = parsed_metadata.get("document_type", "other")
+            if doc_type == "other" and parsed_metadata.get("title"):
+                # Try to infer from title
+                title_lower = parsed_metadata.get("title", "").lower()
+                if any(kw in title_lower for kw in ["tender", "bid", "quotation"]):
+                    doc_type = "tender"
+                elif any(kw in title_lower for kw in ["recruitment", "appointment", "vacancy"]):
+                    doc_type = "recruitment"
+                elif any(kw in title_lower for kw in ["notice", "notification", "circular"]):
+                    doc_type = "notice"
+            # Return text and extracted metadata
+            return {
+                "doc_type": doc_type,
+                "confidence": confidence,
+                "full_text": parsed_text,
+                "fields": parsed_metadata if parsed_metadata else {}  # Model-extracted metadata
+            }
+    except httpx.HTTPStatusError as e:
+        error_msg = f"HTTP {e.response.status_code}: {e.response.text}"
+        print(f"[ERROR] OCR API HTTP error for page {page_num}: {error_msg}")
+        raise RuntimeError(f"OCR API error for page {page_num}: {error_msg}")
+    except Exception as e:
+        error_msg = str(e)
+        print(f"[ERROR] OCR API error for page {page_num}: {error_msg}")
+        raise RuntimeError(f"OCR API error for page {page_num}: {error_msg}")
+def _calculate_ocr_confidence(response, extracted_text: str) -> float:
+    """
+    Calculate confidence score based on OCR response quality.
+    Returns a score from 0-100, with higher scores for better extraction quality.
+    """
+    # Start with a higher base confidence for successful extractions
+    base_confidence = 92.0
+    # Adjust confidence based on text quality heuristics
+    text_length = len(extracted_text.strip())
+    if text_length == 0:
+        return 0.0
+    elif text_length < 10:
+        # Very short text - might be error or empty
+        return max(30.0, base_confidence - 40.0)
+    elif text_length < 50:
+        # Short text - might be incomplete
+        return max(60.0, base_confidence - 20.0)
+    elif text_length > 1000:
+        # Long text - likely good extraction
+        confidence = min(100.0, base_confidence + 5.0)
+    elif text_length > 500:
+        # Medium-long text - good extraction
+        confidence = min(100.0, base_confidence + 3.0)
+    else:
+        confidence = base_confidence
+    # Check for structured content (tables, etc.) - indicates good extraction
+    if '|' in extracted_text and extracted_text.count('|') > 5:
+        # Table detected - boost confidence significantly
+        confidence = min(100.0, confidence + 6.0)
+    # Check for meaningful content (non-whitespace ratio)
+    non_whitespace = len([c for c in extracted_text if not c.isspace()])
+    if text_length > 0:
+        content_ratio = non_whitespace / text_length
+        if content_ratio > 0.85:
+            # Very high content ratio - excellent extraction
+            confidence = min(100.0, confidence + 5.0)
+        elif content_ratio > 0.75:
+            # High content ratio - good extraction
+            confidence = min(100.0, confidence + 3.0)
+        elif content_ratio > 0.6:
+            # Moderate content ratio - decent extraction
+            confidence = min(100.0, confidence + 1.0)
+        elif content_ratio < 0.3:
+            # Low content ratio - mostly whitespace
+            confidence = max(60.0, confidence - 15.0)
+    # Check for common OCR quality indicators
+    # Presence of numbers, dates, and structured patterns indicates good extraction
+    has_numbers = any(c.isdigit() for c in extracted_text)
+    has_letters = any(c.isalpha() for c in extracted_text)
+    has_punctuation = any(c in '.,;:!?()[]{}' for c in extracted_text)
+    if has_numbers and has_letters and has_punctuation:
+        # Well-structured text with mixed content - high confidence
+        confidence = min(100.0, confidence + 2.0)
+    # Cap at 100% and ensure minimum quality threshold
+    return round(min(100.0, max(0.0, confidence)), 1)
+async def extract_fields_from_document(
+    file_bytes: bytes,
+    content_type: str,
+    filename: str,
+    key_fields: str = None,
+) -> Dict[str, Any]:
+    """
+    Extract text from document using OCR model.
+    Processes pages separately for better reliability.
+    Returns text output in full_text, keeps JSON/XML fields empty for now.
+    """
+    # Get raw image bytes for processing
+    if content_type == "application/pdf" or content_type.endswith("/pdf"):
+        if not PDF_SUPPORT:
+            raise RuntimeError("PDF support requires PyMuPDF. Please install it.")
+        # For PDFs, convert to images
+        pdf_images = _pdf_to_images(file_bytes)
+        image_bytes_list = pdf_images
+    else:
+        # For regular images, process the file bytes
+        # Convert to JPEG for consistency
+        try:
+            img = Image.open(BytesIO(file_bytes))
+            if img.mode != "RGB":
+                img = img.convert("RGB")
+            # Resize if too large (max 1920px on longest side)
+            max_size = 1920
+            w, h = img.size
+            if w > max_size or h > max_size:
+                if w > h:
+                    new_w = max_size
+                    new_h = int(h * (max_size / w))
+                else:
+                    new_h = max_size
+                    new_w = int(w * (max_size / h))
+                img = img.resize((new_w, new_h), Image.LANCZOS)
+                print(f"[INFO] Resized image from {w}x{h} to {new_w}x{new_h}")
+            # Convert to JPEG bytes
+            img_bytes = BytesIO()
+            img.save(img_bytes, format="JPEG", quality=95)
+            image_bytes_list = [img_bytes.getvalue()]
+        except Exception as e:
+            # Fallback: use original file bytes
+            print(f"[WARNING] Could not process image with PIL: {e}. Using original bytes.")
+        image_bytes_list = [file_bytes]
+    total_pages = len(image_bytes_list)
+    print(f"[INFO] Processing {total_pages} page(s) with OCR model...")
+    # Process each page separately
+    page_results = []
+    for page_num, img_bytes in enumerate(image_bytes_list):
+        print(f"[INFO] Processing page {page_num + 1}/{total_pages}...")
+        try:
+            page_result = await _extract_text_with_ocr(img_bytes, page_num + 1, total_pages, None)
+            page_results.append({
+                "page_number": page_num + 1,
+                "text": page_result.get("full_text", ""),
+                "fields": page_result.get("fields", {}),
+                "confidence": page_result.get("confidence", 0),
+                "doc_type": page_result.get("doc_type", "other"),
+            })
+            print(f"[INFO] Page {page_num + 1} processed successfully")
+        except Exception as e:
+            print(f"[ERROR] Failed to process page {page_num + 1}: {e}")
+            page_results.append({
+                "page_number": page_num + 1,
+                "text": "",
+                "fields": {},
+                "confidence": 0,
+                "error": str(e)
+            })
+    # Combine results from all pages
+    combined_full_text = "\n\n".join([f"=== PAGE {p['page_number']} ===\n\n{p['text']}" for p in page_results if p.get("text")])
+    # Extract user-specified fields if key_fields provided
+    extracted_fields = {}
+    if key_fields and key_fields.strip():
+        # Parse user input: "Invoice Number, Invoice Date, PO Number" -> ['Invoice Number', 'Invoice Date', 'PO Number']
+        field_list = [f.strip() for f in key_fields.split(',') if f.strip()]
+        if field_list:
+            print(f"[INFO] Extracting user-specified fields: {field_list}")
+            # Format fields as JSON array string for prompt
+            fields_json = json.dumps(field_list)
+            custom_prompt = f"Extract the following fields from this image and return as JSON: {fields_json}. Return only a valid JSON object with the field names as keys and their extracted values."
+            # Run second OCR pass on first page (usually has most metadata) with custom prompt
+            if image_bytes_list and len(image_bytes_list) > 0:
+                try:
+                    print("[INFO] Running second OCR pass for field extraction...")
+                    field_result = await _extract_text_with_ocr(image_bytes_list[0], 1, 1, custom_prompt)
+                    field_text = field_result.get("full_text", "")
+                    # Try to parse JSON from the response
+                    try:
+                        # Look for JSON in the response
+                        json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', field_text, re.DOTALL)
+                        if json_match:
+                            extracted_fields = json.loads(json_match.group(0))
+                            print(f"[INFO] Successfully extracted {len(extracted_fields)} fields from second OCR pass")
+                        else:
+                            # Try parsing the entire response as JSON
+                            extracted_fields = json.loads(field_text)
+                            print(f"[INFO] Successfully extracted {len(extracted_fields)} fields from second OCR pass")
+                    except json.JSONDecodeError:
+                        print(f"[WARNING] Could not parse JSON from field extraction response: {field_text[:200]}")
+                        extracted_fields = {}
+                except Exception as e:
+                    print(f"[WARNING] Field extraction failed: {e}")
+                    extracted_fields = {}
+    # Parse each page for tables and structure the output
+    structured_pages = {}
+    for page_result in page_results:
+        if page_result.get("text"):
+            page_num = page_result.get("page_number", 1)
+            page_text = page_result.get("text", "")
+            # Parse text for tables and structure
+            parsed_data = _parse_text_with_tables(page_text, {})
+            # Build structured page output (without Fields - moved to root level)
+            page_key = f"page_{page_num}"
+            structured_pages[page_key] = {
+                "text": parsed_data["text"],
+                "table": parsed_data["table"],
+                "footer_notes": parsed_data["footer_notes"],
+                "confidence": page_result.get("confidence", 0),
+                "doc_type": page_result.get("doc_type", "other")
+            }
+    # If we have structured pages, use them; otherwise keep fields empty
+    if structured_pages:
+        # Always return pages with page_X keys (even for single page)
+        combined_fields = structured_pages
+    else:
+        combined_fields = {}
+    # Calculate average confidence
+    confidences = [p.get("confidence", 0) for p in page_results if p.get("confidence", 0) > 0]
+    avg_confidence = sum(confidences) / len(confidences) if confidences else 0
+    # Determine doc_type from first successful page
+    doc_type = "other"
+    for page_result in page_results:
+        if page_result.get("doc_type") and page_result["doc_type"] != "other":
+            doc_type = page_result["doc_type"]
+            break
+    # Build return object - add Fields at root level only if extracted_fields is not empty
+    return_obj = {
+        "doc_type": doc_type,
+        "confidence": avg_confidence,
+        "full_text": combined_full_text,
+        "fields": combined_fields,  # Now contains structured data with tables
+        "pages": page_results
+    }
+    # Add Fields at root level only if user provided key_fields and extraction succeeded
+    if extracted_fields:
+        return_obj["Fields"] = extracted_fields
+    return return_obj

backend/app/otp_service.py ADDED Viewed

	@@ -0,0 +1,197 @@

+"""
+OTP (One-Time Password) service for email-based authentication.
+"""
+import random
+import string
+from datetime import datetime, timedelta
+from typing import Dict, Optional
+from sqlalchemy.orm import Session
+from fastapi import HTTPException
+from .models import User
+from .brevo_service import send_otp_email
+# Store OTPs in memory (in production, use Redis or database)
+otp_store: Dict[str, dict] = {}
+def generate_otp(length: int = 6) -> str:
+    """
+    Generate a random OTP code.
+    Args:
+        length: Length of OTP (default: 6)
+    Returns:
+        Random OTP string
+    """
+    return ''.join(random.choices(string.digits, k=length))
+async def request_otp(email: str, db: Session) -> dict:
+    """
+    Generate and send OTP to email using Brevo.
+    Args:
+        email: Email address to send OTP to
+        db: Database session
+    Returns:
+        Dictionary with success message
+    """
+    # Generate OTP
+    otp = generate_otp()
+    expires_at = datetime.utcnow() + timedelta(minutes=10)
+    # Store OTP (in production, use Redis or database with TTL)
+    otp_store[email.lower()] = {
+        'otp': otp,
+        'expires_at': expires_at,
+        'attempts': 0,
+        'max_attempts': 5
+    }
+    # Send OTP via Brevo
+    try:
+        await send_otp_email(email, otp)
+        print(f"[INFO] OTP generated and sent to {email}")
+    except Exception as e:
+        # Remove OTP from store if email sending failed
+        if email.lower() in otp_store:
+            del otp_store[email.lower()]
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to send OTP email: {str(e)}"
+        )
+    return {
+        "message": "OTP sent to your email address",
+        "expires_in_minutes": 10
+    }
+async def verify_otp(email: str, otp: str, db: Session) -> User:
+    """
+    Verify OTP and return/create user.
+    Args:
+        email: Email address
+        otp: OTP code to verify
+        db: Database session
+    Returns:
+        User object
+    Raises:
+        HTTPException: If OTP is invalid, expired, or max attempts exceeded
+    """
+    email_lower = email.lower()
+    stored = otp_store.get(email_lower)
+    if not stored:
+        raise HTTPException(
+            status_code=400,
+            detail="OTP not found. Please request a new OTP."
+        )
+    # Check if expired
+    if datetime.utcnow() > stored['expires_at']:
+        del otp_store[email_lower]
+        raise HTTPException(
+            status_code=400,
+            detail="OTP has expired. Please request a new OTP."
+        )
+    # Check max attempts
+    if stored['attempts'] >= stored['max_attempts']:
+        del otp_store[email_lower]
+        raise HTTPException(
+            status_code=400,
+            detail="Maximum verification attempts exceeded. Please request a new OTP."
+        )
+    # Verify OTP
+    if stored['otp'] != otp:
+        stored['attempts'] += 1
+        remaining_attempts = stored['max_attempts'] - stored['attempts']
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid OTP. {remaining_attempts} attempt(s) remaining."
+        )
+    # OTP verified successfully
+    # Get or create user
+    user = db.query(User).filter(User.email == email_lower).first()
+    if not user:
+        user = User(
+            email=email_lower,
+            auth_method='otp',
+            email_verified=True
+        )
+        db.add(user)
+        db.commit()
+        db.refresh(user)
+        print(f"[INFO] New user created via OTP: {email_lower}")
+        # Enrich contact data from Apollo.io and update Brevo + Monday.com
+        try:
+            from .apollo_service import enrich_contact_by_email
+            from .brevo_service import create_brevo_contact, BREVO_TRIAL_LIST_ID
+            from .monday_service import create_monday_lead
+            # Enrich contact data from Apollo.io
+            enriched_data = await enrich_contact_by_email(email_lower)
+            # Use enriched data if available
+            first_name = enriched_data.get("first_name") if enriched_data else None
+            last_name = enriched_data.get("last_name") if enriched_data else None
+            org_name = enriched_data.get("organization_name") if enriched_data else None
+            # Fallback to email domain if Apollo didn't provide organization
+            if not org_name:
+                org_domain = email_lower.split('@')[1] if '@' in email_lower else None
+                org_name = org_domain.split('.')[0].capitalize() if org_domain else None
+            # Update Brevo contact with enriched data
+            await create_brevo_contact(
+                email=email_lower,
+                first_name=first_name,
+                last_name=last_name,
+                organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
+                phone_number=enriched_data.get("phone_number") if enriched_data else None,
+                linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
+                title=enriched_data.get("title") if enriched_data else None,
+                headline=enriched_data.get("headline") if enriched_data else None,
+                organization_website=enriched_data.get("organization_website") if enriched_data else None,
+                organization_address=enriched_data.get("organization_address") if enriched_data else None,
+                list_id=BREVO_TRIAL_LIST_ID
+            )
+            # Create lead in Monday.com
+            await create_monday_lead(
+                email=email_lower,
+                first_name=first_name,
+                last_name=last_name,
+                phone_number=enriched_data.get("phone_number") if enriched_data else None,
+                linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
+                title=enriched_data.get("title") if enriched_data else None,
+                headline=enriched_data.get("headline") if enriched_data else None,
+                organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
+                organization_website=enriched_data.get("organization_website") if enriched_data else None,
+                organization_address=enriched_data.get("organization_address") if enriched_data else None,
+            )
+        except Exception as e:
+            # Don't fail user creation if integrations fail
+            print(f"[WARNING] Failed to enrich/update contact for {email_lower}: {str(e)}")
+    else:
+        user.email_verified = True
+        if user.auth_method != 'otp':
+            user.auth_method = 'otp'
+        db.commit()
+        print(f"[INFO] User verified via OTP: {email_lower}")
+    # Remove OTP from store after successful verification
+    del otp_store[email_lower]
+    return user

backend/app/schemas.py CHANGED Viewed

@@ -1,26 +1,26 @@
-from pydantic import BaseModel
-from typing import Dict, Optional
-from datetime import datetime
-class ExtractionStage(BaseModel):
-    time: int
-    status: str
-    variation: str
-class ExtractionRecordBase(BaseModel):
-    id: int
-    fileName: str
-    fileType: str
-    fileSize: str
-    extractedAt: datetime
-    status: str
-    confidence: float
-    fieldsExtracted: int
-    totalTime: int
-    stages: Dict[str, ExtractionStage]
-    errorMessage: Optional[str] = None
-    class Config:
-        orm_mode = True

+from pydantic import BaseModel
+from typing import Dict, Optional
+from datetime import datetime
+class ExtractionStage(BaseModel):
+    time: int
+    status: str
+    variation: str
+class ExtractionRecordBase(BaseModel):
+    id: int
+    fileName: str
+    fileType: str
+    fileSize: str
+    extractedAt: datetime
+    status: str
+    confidence: float
+    fieldsExtracted: int
+    totalTime: int
+    stages: Dict[str, ExtractionStage]
+    errorMessage: Optional[str] = None
+    class Config:
+        from_attributes = True

backend/requirements.txt CHANGED Viewed

@@ -1,11 +1,15 @@
-fastapi
-uvicorn[standard]
-python-multipart
-pydantic
-sqlalchemy
-httpx
-python-dotenv
-pymupdf
-pillow
-huggingface-hub
-openai

+fastapi
+uvicorn[standard]
+python-multipart
+pydantic[email]
+sqlalchemy
+httpx
+python-dotenv
+pymupdf
+pillow
+huggingface-hub
+openai
+firebase-admin
+pyjwt
+python-jose[cryptography]
+email-validator

frontend/build-env.sh ADDED Viewed

	@@ -0,0 +1,22 @@

+#!/bin/sh
+# Script to create .env file from environment variables for Vite build
+# This is used in Docker build when environment variables are available
+# Debug: Check if variables are set (without exposing values)
+echo "Checking environment variables..."
+[ -z "$VITE_FIREBASE_API_KEY" ] && echo "WARNING: VITE_FIREBASE_API_KEY is not set" || echo "✓ VITE_FIREBASE_API_KEY is set"
+[ -z "$VITE_FIREBASE_AUTH_DOMAIN" ] && echo "WARNING: VITE_FIREBASE_AUTH_DOMAIN is not set" || echo "✓ VITE_FIREBASE_AUTH_DOMAIN is set"
+[ -z "$VITE_FIREBASE_PROJECT_ID" ] && echo "WARNING: VITE_FIREBASE_PROJECT_ID is not set" || echo "✓ VITE_FIREBASE_PROJECT_ID is set"
+cat > .env << EOF
+VITE_FIREBASE_API_KEY=${VITE_FIREBASE_API_KEY:-}
+VITE_FIREBASE_AUTH_DOMAIN=${VITE_FIREBASE_AUTH_DOMAIN:-}
+VITE_FIREBASE_PROJECT_ID=${VITE_FIREBASE_PROJECT_ID:-}
+VITE_FIREBASE_STORAGE_BUCKET=${VITE_FIREBASE_STORAGE_BUCKET:-}
+VITE_FIREBASE_MESSAGING_SENDER_ID=${VITE_FIREBASE_MESSAGING_SENDER_ID:-}
+VITE_FIREBASE_APP_ID=${VITE_FIREBASE_APP_ID:-}
+VITE_API_BASE_URL=${VITE_API_BASE_URL:-}
+EOF
+echo "Created .env file with environment variables"

frontend/index.html CHANGED Viewed

@@ -1,12 +1,13 @@
-<!doctype html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8" />
-    <title>Document Capture Demo</title>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  </head>
-  <body class="bg-[#FAFAFA]">
-    <div id="root"></div>
-    <script type="module" src="/src/main.jsx"></script>
-  </body>
-</html>

+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/png" href="/logo.png" />
+    <title>EZOFIS AI - VRP Document Intelligence</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  </head>
+  <body class="bg-[#FAFAFA]">
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>

frontend/package.json CHANGED Viewed

@@ -1,25 +1,26 @@
-{
-  "name": "document-capture-demo",
-  "version": "1.0.0",
-  "private": true,
-  "scripts": {
-    "dev": "vite",
-    "build": "vite build",
-    "preview": "vite preview"
-  },
-  "dependencies": {
-    "react": "^18.3.1",
-    "react-dom": "^18.3.1",
-    "react-router-dom": "^6.26.2",
-    "framer-motion": "^11.0.0",
-    "lucide-react": "^0.471.0",
-    "pdfjs-dist": "^4.0.379"
-  },
-  "devDependencies": {
-    "@vitejs/plugin-react": "^4.1.0",
-    "autoprefixer": "^10.4.20",
-    "postcss": "^8.4.47",
-    "tailwindcss": "^3.4.14",
-    "vite": "^5.4.0"
-  }
-}

+{
+  "name": "document-capture-demo",
+  "version": "1.0.0",
+  "private": true,
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1",
+    "react-router-dom": "^6.26.2",
+    "framer-motion": "^11.0.0",
+    "lucide-react": "^0.471.0",
+    "pdfjs-dist": "^4.0.379",
+    "firebase": "^10.7.1"
+  },
+  "devDependencies": {
+    "@vitejs/plugin-react": "^4.1.0",
+    "autoprefixer": "^10.4.20",
+    "postcss": "^8.4.47",
+    "tailwindcss": "^3.4.14",
+    "vite": "^5.4.0"
+  }
+}

frontend/postcss.config.cjs CHANGED Viewed

@@ -1,6 +1,6 @@
-module.exports = {
-  plugins: {
-    tailwindcss: {},
-    autoprefixer: {}
-  }
-};

+module.exports = {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {}
+  }
+};

frontend/src/App.jsx CHANGED Viewed

@@ -1,30 +1,106 @@
-// frontend/src/App.jsx
-import React from "react";
-import { Routes, Route } from "react-router-dom";
-import Layout from "./Layout";
-import Dashboard from "./pages/Dashboard";
-import History from "./pages/History";
-export default function App() {
-  return (
-    <Routes>
-      <Route
-        path="/"
-        element={
-          <Layout currentPageName="Dashboard">
-            <Dashboard />
-          </Layout>
-        }
-      />
-      <Route
-        path="/history"
-        element={
-          <Layout currentPageName="History">
-            <History />
-          </Layout>
-        }
-      />
-    </Routes>
-  );
-}

+// frontend/src/App.jsx
+import React, { useEffect } from "react";
+import { Routes, Route, useNavigate, useSearchParams } from "react-router-dom";
+import { AuthProvider, useAuth } from "./contexts/AuthContext";
+import Layout from "./Layout";
+import Dashboard from "./pages/Dashboard";
+import History from "./pages/History";
+import ShareHandler from "./pages/ShareHandler";
+import LoginForm from "./components/auth/LoginForm";
+// Auth callback handler component
+function AuthCallback() {
+  const [searchParams] = useSearchParams();
+  const { handleAuthCallback } = useAuth();
+  const navigate = useNavigate();
+  useEffect(() => {
+    const token = searchParams.get("token");
+    if (token) {
+      handleAuthCallback(token);
+      navigate("/");
+    } else {
+      navigate("/");
+    }
+  }, [searchParams, handleAuthCallback, navigate]);
+  return (
+    <div className="min-h-screen flex items-center justify-center">
+      <div className="text-center">
+        <p className="text-slate-600">Completing authentication...</p>
+      </div>
+    </div>
+  );
+}
+// Protected route wrapper
+function ProtectedRoute({ children }) {
+  const { isAuthenticated, loading } = useAuth();
+  if (loading) {
+    return (
+      <div className="min-h-screen flex items-center justify-center">
+        <div className="text-center">
+          <div className="h-16 w-16 mx-auto rounded-2xl bg-indigo-100 flex items-center justify-center mb-4 animate-pulse">
+            <div className="h-8 w-8 rounded-lg bg-indigo-600"></div>
+          </div>
+          <p className="text-slate-600">Loading...</p>
+        </div>
+      </div>
+    );
+  }
+  if (!isAuthenticated) {
+    return <LoginForm />;
+  }
+  return children;
+}
+function AppRoutes() {
+  return (
+    <Routes>
+      <Route
+        path="/auth/callback"
+        element={<AuthCallback />}
+      />
+      <Route
+        path="/share/:token"
+        element={
+          <ProtectedRoute>
+            <ShareHandler />
+          </ProtectedRoute>
+        }
+      />
+      <Route
+        path="/"
+        element={
+          <ProtectedRoute>
+            <Layout currentPageName="Dashboard">
+              <Dashboard />
+            </Layout>
+          </ProtectedRoute>
+        }
+      />
+      <Route
+        path="/history"
+        element={
+          <ProtectedRoute>
+            <Layout currentPageName="History">
+              <History />
+            </Layout>
+          </ProtectedRoute>
+        }
+      />
+    </Routes>
+  );
+}
+export default function App() {
+  return (
+    <AuthProvider>
+      <AppRoutes />
+    </AuthProvider>
+  );
+}

frontend/src/Layout.jsx CHANGED Viewed

@@ -1,143 +1,179 @@
-// frontend/src/Layout.jsx
-import React, { useState } from "react";
-import { Link } from "react-router-dom";
-import { createPageUrl } from "./utils";
-import {
-  LayoutDashboard,
-  History as HistoryIcon,
-  ChevronLeft,
-  Sparkles,
-} from "lucide-react";
-import { cn } from "@/lib/utils";
-// Import logo - Vite will process this and handle the path correctly
-// For production, the logo should be in frontend/public/logo.png
-// Vite will copy it to dist/logo.png during build
-const logoPath = "/logo.png";
-export default function Layout({ children, currentPageName }) {
-  const [collapsed, setCollapsed] = useState(false);
-  const navItems = [
-    { name: "Dashboard", icon: LayoutDashboard, page: "Dashboard" },
-    { name: "History", icon: HistoryIcon, page: "History" },
-  ];
-  return (
-    <div className="min-h-screen bg-[#FAFAFA] flex">
-      {/* Sidebar */}
-      <aside
-        className={cn(
-          "fixed left-0 top-0 h-screen bg-white border-r border-slate-200/80 z-50 transition-all duration-300 ease-out flex flex-col",
-          collapsed ? "w-[72px]" : "w-[260px]"
-        )}
-      >
-        {/* Logo */}
-        <div
-          className={cn(
-            "h-16 flex items-center border-b border-slate-100 px-4",
-            collapsed ? "justify-center" : "justify-between"
-          )}
-        >
-          <Link to={createPageUrl("Dashboard")} className="flex items-center gap-3">
-            <div className="h-9 w-9 flex items-center justify-center flex-shrink-0">
-              <img
-                src={logoPath}
-                alt="EZOFIS AI Logo"
-                className="h-full w-full object-contain"
-                onError={(e) => {
-                  // Fallback: hide image and show placeholder if logo not found
-                  e.target.style.display = 'none';
-                }}
-              />
-            </div>
-            {!collapsed && (
-              <div className="flex flex-col">
-                <span className="font-semibold text-slate-900 tracking-tight">EZOFIS AI</span>
-                <span className="text-[10px] text-slate-400 font-medium tracking-wide uppercase">
-                  Agentic Extract
-                </span>
-              </div>
-            )}
-          </Link>
-          {!collapsed && (
-            <button
-              onClick={() => setCollapsed(true)}
-              className="h-7 w-7 rounded-lg hover:bg-slate-100 flex items-center justify-center text-slate-400 hover:text-slate-600 transition-colors"
-            >
-              <ChevronLeft className="h-4 w-4" />
-            </button>
-          )}
-        </div>
-        {/* Navigation */}
-        <nav className="flex-1 p-3 space-y-1">
-          {navItems.map((item) => {
-            const isActive = currentPageName === item.page;
-            return (
-              <Link
-                key={item.name}
-                to={createPageUrl(item.page)}
-                className={cn(
-                  "flex items-center gap-3 px-3 py-2.5 rounded-xl transition-all duration-200 group",
-                  isActive
-                    ? "bg-gradient-to-r from-indigo-50 to-violet-50 text-indigo-600"
-                    : "text-slate-500 hover:bg-slate-50 hover:text-slate-700"
-                )}
-              >
-                <item.icon
-                  className={cn(
-                    "h-5 w-5 flex-shrink-0",
-                    isActive ? "text-indigo-600" : "text-slate-400 group-hover:text-slate-600"
-                  )}
-                />
-                {!collapsed && (
-                  <span className="font-medium text-sm">{item.name}</span>
-                )}
-              </Link>
-            );
-          })}
-        </nav>
-        {/* Collapse Toggle (when collapsed) */}
-        {collapsed && (
-          <button
-            onClick={() => setCollapsed(false)}
-            className="m-3 h-10 rounded-xl bg-slate-50 hover:bg-slate-100 flex items-center justify-center text-slate-400 hover:text-slate-600 transition-colors"
-          >
-            <ChevronLeft className="h-4 w-4 rotate-180" />
-          </button>
-        )}
-        {/* Pro Badge */}
-        {!collapsed && (
-          <div className="p-3">
-            <div className="p-4 rounded-2xl bg-gradient-to-br from-slate-900 to-slate-800 text-white">
-              <div className="flex items-center gap-2 mb-2">
-                <Sparkles className="h-4 w-4 text-amber-400" />
-                <span className="text-xs font-semibold tracking-wide">DEPLOY THIS AGENT</span>
-              </div>
-              <p className="text-xs text-slate-400 mb-3">
-                Unlock batch extractions &amp; API access
-              </p>
-              <button className="w-full py-2 px-3 rounded-lg bg-white text-slate-900 text-sm font-semibold hover:bg-slate-100 transition-colors">
-                Talk to us
-              </button>
-            </div>
-          </div>
-        )}
-      </aside>
-      {/* Main Content */}
-      <main
-        className={cn(
-          "flex-1 transition-all duration-300",
-          collapsed ? "ml-[72px]" : "ml-[260px]"
-        )}
-      >
-        {children}
-      </main>
-    </div>
-  );
-}

+// frontend/src/Layout.jsx
+import React, { useState } from "react";
+import { Link } from "react-router-dom";
+import { createPageUrl } from "./utils";
+import {
+  LayoutDashboard,
+  History as HistoryIcon,
+  ChevronLeft,
+  Sparkles,
+  LogOut,
+  User,
+} from "lucide-react";
+import { cn } from "@/lib/utils";
+import { useAuth } from "./contexts/AuthContext";
+// Import logo - Vite will process this and handle the path correctly
+// For production, the logo should be in frontend/public/logo.png
+// Vite will copy it to dist/logo.png during build
+const logoPath = "/logo.png";
+export default function Layout({ children, currentPageName }) {
+  const [collapsed, setCollapsed] = useState(false);
+  const { user, logout } = useAuth();
+  const navItems = [
+    { name: "Dashboard", icon: LayoutDashboard, page: "Dashboard" },
+    { name: "History", icon: HistoryIcon, page: "History" },
+  ];
+  return (
+    <div className="min-h-screen bg-[#FAFAFA] flex">
+      {/* Sidebar */}
+      <aside
+        className={cn(
+          "fixed left-0 top-0 h-screen bg-white border-r border-slate-200/80 z-50 transition-all duration-300 ease-out flex flex-col",
+          collapsed ? "w-[72px]" : "w-[260px]"
+        )}
+      >
+        {/* Logo */}
+        <div
+          className={cn(
+            "h-16 flex items-center border-b border-slate-100 px-4",
+            collapsed ? "justify-center" : "justify-between"
+          )}
+        >
+          <Link to={createPageUrl("Dashboard")} className="flex items-center gap-3">
+            <div className="h-9 w-9 flex items-center justify-center flex-shrink-0">
+              <img
+                src={logoPath}
+                alt="EZOFIS AI Logo"
+                className="h-full w-full object-contain"
+                onError={(e) => {
+                  // Fallback: hide image and show placeholder if logo not found
+                  e.target.style.display = 'none';
+                }}
+              />
+            </div>
+            {!collapsed && (
+              <div className="flex flex-col">
+                <span className="font-semibold text-slate-900 tracking-tight">EZOFIS AI</span>
+                <span className="text-[10px] text-slate-400 font-medium tracking-wide uppercase">
+                  VRP Intelligence
+                </span>
+              </div>
+            )}
+          </Link>
+          {!collapsed && (
+            <button
+              onClick={() => setCollapsed(true)}
+              className="h-7 w-7 rounded-lg hover:bg-slate-100 flex items-center justify-center text-slate-400 hover:text-slate-600 transition-colors"
+            >
+              <ChevronLeft className="h-4 w-4" />
+            </button>
+          )}
+        </div>
+        {/* Navigation */}
+        <nav className="flex-1 p-3 space-y-1">
+          {navItems.map((item) => {
+            const isActive = currentPageName === item.page;
+            return (
+              <Link
+                key={item.name}
+                to={createPageUrl(item.page)}
+                className={cn(
+                  "flex items-center gap-3 px-3 py-2.5 rounded-xl transition-all duration-200 group",
+                  isActive
+                    ? "bg-gradient-to-r from-indigo-50 to-violet-50 text-indigo-600"
+                    : "text-slate-500 hover:bg-slate-50 hover:text-slate-700"
+                )}
+              >
+                <item.icon
+                  className={cn(
+                    "h-5 w-5 flex-shrink-0",
+                    isActive ? "text-indigo-600" : "text-slate-400 group-hover:text-slate-600"
+                  )}
+                />
+                {!collapsed && (
+                  <span className="font-medium text-sm">{item.name}</span>
+                )}
+              </Link>
+            );
+          })}
+        </nav>
+        {/* Collapse Toggle (when collapsed) */}
+        {collapsed && (
+          <button
+            onClick={() => setCollapsed(false)}
+            className="m-3 h-10 rounded-xl bg-slate-50 hover:bg-slate-100 flex items-center justify-center text-slate-400 hover:text-slate-600 transition-colors"
+          >
+            <ChevronLeft className="h-4 w-4 rotate-180" />
+          </button>
+        )}
+        {/* Pro Badge */}
+        {!collapsed && (
+          <div className="p-3">
+            <div className="p-4 rounded-2xl bg-gradient-to-br from-slate-900 to-slate-800 text-white">
+              <div className="flex items-center gap-2 mb-2">
+                <Sparkles className="h-4 w-4 text-amber-400" />
+                <span className="text-xs font-semibold tracking-wide">DEPLOY CUSTOM AGENT</span>
+              </div>
+              <p className="text-xs text-slate-400 mb-3">
+              Batch extractions, custom model, field mapping, complex lineitems, tables, workflows,  &amp; API access
+              </p>
+              <button className="w-full py-2 px-3 rounded-lg bg-white text-slate-900 text-sm font-semibold hover:bg-slate-100 transition-colors">
+                Book a Custom Demo
+              </button>
+            </div>
+          </div>
+        )}
+        {/* User Profile */}
+        {!collapsed && user && (
+          <div className="p-3 border-t border-slate-200">
+            <div className="flex items-center gap-3 p-3 rounded-xl bg-slate-50 hover:bg-slate-100 transition-colors">
+              {user.picture ? (
+                <img
+                  src={user.picture}
+                  alt={user.name || user.email}
+                  className="h-10 w-10 rounded-lg object-cover"
+                />
+              ) : (
+                <div className="h-10 w-10 rounded-lg bg-indigo-100 flex items-center justify-center">
+                  <User className="h-5 w-5 text-indigo-600" />
+                </div>
+              )}
+              <div className="flex-1 min-w-0">
+                <p className="text-sm font-medium text-slate-900 truncate">
+                  {user.name || "User"}
+                </p>
+                <p className="text-xs text-slate-500 truncate">{user.email}</p>
+              </div>
+            </div>
+            <button
+              onClick={logout}
+              className="mt-2 w-full flex items-center gap-2 px-3 py-2 rounded-xl text-sm text-slate-600 hover:bg-red-50 hover:text-red-600 transition-colors"
+            >
+              <LogOut className="h-4 w-4" />
+              <span>Sign Out</span>
+            </button>
+          </div>
+        )}
+      </aside>
+      {/* Main Content */}
+      <main
+        className={cn(
+          "flex-1 transition-all duration-300",
+          collapsed ? "ml-[72px]" : "ml-[260px]"
+        )}
+      >
+        {children}
+      </main>
+    </div>
+  );
+}

frontend/src/components/ErrorBoundary.jsx CHANGED Viewed

@@ -1,72 +1,72 @@
-import React from "react";
-class ErrorBoundary extends React.Component {
-  constructor(props) {
-    super(props);
-    this.state = { hasError: false, error: null };
-  }
-  static getDerivedStateFromError(error) {
-    return { hasError: true, error };
-  }
-  componentDidCatch(error, errorInfo) {
-    console.error("Error caught by boundary:", error, errorInfo);
-  }
-  render() {
-    if (this.state.hasError) {
-      return (
-        <div className="min-h-screen bg-[#FAFAFA] flex items-center justify-center p-8">
-          <div className="max-w-md w-full bg-white rounded-2xl border border-red-200 p-8 shadow-lg">
-            <div className="text-center">
-              <div className="h-16 w-16 mx-auto rounded-full bg-red-100 flex items-center justify-center mb-4">
-                <svg
-                  className="h-8 w-8 text-red-600"
-                  fill="none"
-                  viewBox="0 0 24 24"
-                  stroke="currentColor"
-                >
-                  <path
-                    strokeLinecap="round"
-                    strokeLinejoin="round"
-                    strokeWidth={2}
-                    d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"
-                  />
-                </svg>
-              </div>
-              <h2 className="text-xl font-bold text-slate-900 mb-2">
-                Something went wrong
-              </h2>
-              <p className="text-sm text-slate-600 mb-6">
-                The application encountered an error. Please refresh the page or contact support if the problem persists.
-              </p>
-              <button
-                onClick={() => window.location.reload()}
-                className="px-6 py-2 bg-indigo-600 text-white rounded-lg font-semibold hover:bg-indigo-700 transition-colors"
-              >
-                Refresh Page
-              </button>
-              {process.env.NODE_ENV === "development" && this.state.error && (
-                <details className="mt-6 text-left">
-                  <summary className="text-sm text-slate-500 cursor-pointer mb-2">
-                    Error Details (Development Only)
-                  </summary>
-                  <pre className="text-xs bg-slate-100 p-4 rounded-lg overflow-auto max-h-64">
-                    {this.state.error.toString()}
-                    {this.state.error.stack}
-                  </pre>
-                </details>
-              )}
-            </div>
-          </div>
-        </div>
-      );
-    }
-    return this.props.children;
-  }
-}
-export default ErrorBoundary;

+import React from "react";
+class ErrorBoundary extends React.Component {
+  constructor(props) {
+    super(props);
+    this.state = { hasError: false, error: null };
+  }
+  static getDerivedStateFromError(error) {
+    return { hasError: true, error };
+  }
+  componentDidCatch(error, errorInfo) {
+    console.error("Error caught by boundary:", error, errorInfo);
+  }
+  render() {
+    if (this.state.hasError) {
+      return (
+        <div className="min-h-screen bg-[#FAFAFA] flex items-center justify-center p-8">
+          <div className="max-w-md w-full bg-white rounded-2xl border border-red-200 p-8 shadow-lg">
+            <div className="text-center">
+              <div className="h-16 w-16 mx-auto rounded-full bg-red-100 flex items-center justify-center mb-4">
+                <svg
+                  className="h-8 w-8 text-red-600"
+                  fill="none"
+                  viewBox="0 0 24 24"
+                  stroke="currentColor"
+                >
+                  <path
+                    strokeLinecap="round"
+                    strokeLinejoin="round"
+                    strokeWidth={2}
+                    d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"
+                  />
+                </svg>
+              </div>
+              <h2 className="text-xl font-bold text-slate-900 mb-2">
+                Something went wrong
+              </h2>
+              <p className="text-sm text-slate-600 mb-6">
+                The application encountered an error. Please refresh the page or contact support if the problem persists.
+              </p>
+              <button
+                onClick={() => window.location.reload()}
+                className="px-6 py-2 bg-indigo-600 text-white rounded-lg font-semibold hover:bg-indigo-700 transition-colors"
+              >
+                Refresh Page
+              </button>
+              {process.env.NODE_ENV === "development" && this.state.error && (
+                <details className="mt-6 text-left">
+                  <summary className="text-sm text-slate-500 cursor-pointer mb-2">
+                    Error Details (Development Only)
+                  </summary>
+                  <pre className="text-xs bg-slate-100 p-4 rounded-lg overflow-auto max-h-64">
+                    {this.state.error.toString()}
+                    {this.state.error.stack}
+                  </pre>
+                </details>
+              )}
+            </div>
+          </div>
+        </div>
+      );
+    }
+    return this.props.children;
+  }
+}
+export default ErrorBoundary;

frontend/src/components/ExportButtons.jsx CHANGED Viewed

@@ -1,320 +1,692 @@
-import React, { useState } from "react";
-import { motion, AnimatePresence } from "framer-motion";
-import {
-  Download,
-  Braces,
-  FileCode2,
-  Check,
-  Share2,
-  FileJson,
-  Copy,
-  Mail,
-  Link2,
-} from "lucide-react";
-import { Button } from "@/components/ui/button";
-import {
-  DropdownMenu,
-  DropdownMenuContent,
-  DropdownMenuItem,
-  DropdownMenuSeparator,
-  DropdownMenuTrigger,
-} from "@/components/ui/dropdown-menu";
-import { cn } from "@/lib/utils";
-// Helper functions from ExtractionOutput
-function prepareFieldsForOutput(fields, format = "json") {
-  if (!fields || typeof fields !== "object") {
-    return fields;
-  }
-  const output = { ...fields };
-  // Remove full_text from top-level if pages array exists (to avoid duplication)
-  if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
-    delete output.full_text;
-    // Clean up each page: remove full_text from page.fields (it duplicates page.text)
-    output.pages = output.pages.map(page => {
-      const cleanedPage = { ...page };
-      if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
-        const cleanedFields = { ...cleanedPage.fields };
-        // Remove full_text from page fields (duplicates page.text)
-        delete cleanedFields.full_text;
-        cleanedPage.fields = cleanedFields;
-      }
-      return cleanedPage;
-    });
-  }
-  // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
-  if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
-    // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
-    const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text"));
-    output.pages.forEach((page, idx) => {
-      const pageNum = page.page_number || idx + 1;
-      const pageFields = page.fields || {};
-      // Remove duplicate fields from page.fields:
-      // 1. Remove full_text (duplicates page.text)
-      // 2. Remove fields that match top-level fields (already shown at root)
-      const cleanedPageFields = {};
-      for (const [key, value] of Object.entries(pageFields)) {
-        // Skip full_text and fields that match top-level exactly
-        if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
-          cleanedPageFields[key] = value;
-        }
-      }
-      const pageObj = {
-        text: page.text || "",
-        confidence: page.confidence || 0,
-        doc_type: page.doc_type || "other"
-      };
-      // Only add fields if there are unique page-specific fields
-      if (Object.keys(cleanedPageFields).length > 0) {
-        pageObj.fields = cleanedPageFields;
-      }
-      output[`page_${pageNum}`] = pageObj;
-    });
-    // Remove pages array - we now have page_1, page_2, etc. as separate fields
-    delete output.pages;
-  }
-  return output;
-}
-function escapeXML(str) {
-  return str
-    .replace(/&/g, "&amp;")
-    .replace(/</g, "&lt;")
-    .replace(/>/g, "&gt;")
-    .replace(/"/g, "&quot;")
-    .replace(/'/g, "&apos;");
-}
-function objectToXML(obj, rootName = "extraction") {
-  // Prepare fields - remove full_text if pages exist
-  const preparedObj = prepareFieldsForOutput(obj, "xml");
-  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
-  const convert = (obj, indent = "  ") => {
-    for (const [key, value] of Object.entries(obj)) {
-      if (value === null || value === undefined) continue;
-      // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
-      if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
-        continue;
-      }
-      if (Array.isArray(value)) {
-        value.forEach((item) => {
-          xml += `${indent}<${key}>\n`;
-          if (typeof item === "object") {
-            convert(item, indent + "  ");
-          } else {
-            xml += `${indent}  ${escapeXML(String(item))}\n`;
-          }
-          xml += `${indent}</${key}>\n`;
-        });
-      } else if (typeof value === "object") {
-        xml += `${indent}<${key}>\n`;
-        convert(value, indent + "  ");
-        xml += `${indent}</${key}>\n`;
-      } else {
-        xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
-      }
-    }
-  };
-  convert(preparedObj);
-  xml += `</${rootName}>`;
-  return xml;
-}
-export default function ExportButtons({ isComplete, extractionResult }) {
-  const [downloading, setDownloading] = useState(null);
-  const [copied, setCopied] = useState(false);
-  const handleDownload = (format) => {
-    if (!extractionResult || !extractionResult.fields) {
-      console.error("No extraction data available");
-      return;
-    }
-    setDownloading(format);
-    try {
-      const fields = extractionResult.fields;
-      let content = "";
-      let filename = "";
-      let mimeType = "";
-      if (format === "json") {
-        const preparedFields = prepareFieldsForOutput(fields, "json");
-        content = JSON.stringify(preparedFields, null, 2);
-        filename = `extraction_${new Date().toISOString().split('T')[0]}.json`;
-        mimeType = "application/json";
-      } else if (format === "xml") {
-        content = objectToXML(fields);
-        filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`;
-        mimeType = "application/xml";
-      }
-      // Create blob and download
-      const blob = new Blob([content], { type: mimeType });
-      const url = URL.createObjectURL(blob);
-      const link = document.createElement("a");
-      link.href = url;
-      link.download = filename;
-      document.body.appendChild(link);
-      link.click();
-      document.body.removeChild(link);
-      URL.revokeObjectURL(url);
-      setDownloading(null);
-    } catch (error) {
-      console.error("Download error:", error);
-      setDownloading(null);
-    }
-  };
-  const handleCopyLink = () => {
-    setCopied(true);
-    setTimeout(() => setCopied(false), 2000);
-  };
-  if (!isComplete) return null;
-  return (
-    <motion.div
-      initial={{ opacity: 0, y: 20 }}
-      animate={{ opacity: 1, y: 0 }}
-      className="flex items-center gap-3"
-    >
-      {/* JSON Download */}
-      <Button
-        onClick={() => handleDownload("json")}
-        disabled={downloading === "json"}
-        className={cn(
-          "h-11 px-5 rounded-xl font-semibold transition-all duration-200",
-          "bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700",
-          "shadow-lg shadow-indigo-500/25 hover:shadow-xl hover:shadow-indigo-500/30",
-          "text-white"
-        )}
-      >
-        <AnimatePresence mode="wait">
-          {downloading === "json" ? (
-            <motion.div
-              key="loading"
-              initial={{ opacity: 0, scale: 0.8 }}
-              animate={{ opacity: 1, scale: 1 }}
-              exit={{ opacity: 0, scale: 0.8 }}
-              className="flex items-center gap-2"
-            >
-              <motion.div
-                animate={{ rotate: 360 }}
-                transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
-              >
-                <Download className="h-4 w-4" />
-              </motion.div>
-              Downloading...
-            </motion.div>
-          ) : (
-            <motion.div
-              key="default"
-              initial={{ opacity: 0, scale: 0.8 }}
-              animate={{ opacity: 1, scale: 1 }}
-              exit={{ opacity: 0, scale: 0.8 }}
-              className="flex items-center gap-2"
-            >
-              <Braces className="h-4 w-4" />
-              Download JSON
-            </motion.div>
-          )}
-        </AnimatePresence>
-      </Button>
-      {/* XML Download */}
-      <Button
-        onClick={() => handleDownload("xml")}
-        disabled={downloading === "xml"}
-        variant="outline"
-        className={cn(
-          "h-11 px-5 rounded-xl font-semibold transition-all duration-200",
-          "border-2 border-slate-200 hover:border-slate-300",
-          "hover:bg-slate-50"
-        )}
-      >
-        <AnimatePresence mode="wait">
-          {downloading === "xml" ? (
-            <motion.div
-              key="loading"
-              initial={{ opacity: 0, scale: 0.8 }}
-              animate={{ opacity: 1, scale: 1 }}
-              exit={{ opacity: 0, scale: 0.8 }}
-              className="flex items-center gap-2"
-            >
-              <motion.div
-                animate={{ rotate: 360 }}
-                transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
-              >
-                <Download className="h-4 w-4" />
-              </motion.div>
-              Downloading...
-            </motion.div>
-          ) : (
-            <motion.div
-              key="default"
-              initial={{ opacity: 0, scale: 0.8 }}
-              animate={{ opacity: 1, scale: 1 }}
-              exit={{ opacity: 0, scale: 0.8 }}
-              className="flex items-center gap-2"
-            >
-              <FileCode2 className="h-4 w-4" />
-              Download XML
-            </motion.div>
-          )}
-        </AnimatePresence>
-      </Button>
-      {/* More Options Dropdown */}
-      <DropdownMenu>
-        <DropdownMenuTrigger asChild>
-          <Button variant="ghost" className="h-11 w-11 rounded-xl">
-            <Share2 className="h-4 w-4" />
-          </Button>
-        </DropdownMenuTrigger>
-        <DropdownMenuContent align="end" className="w-48 rounded-xl p-2">
-          <DropdownMenuItem
-            className="rounded-lg cursor-pointer"
-            onClick={handleCopyLink}
-          >
-            {copied ? (
-              <Check className="h-4 w-4 mr-2 text-emerald-500" />
-            ) : (
-              <Link2 className="h-4 w-4 mr-2" />
-            )}
-            {copied ? "Link copied!" : "Copy share link"}
-          </DropdownMenuItem>
-          <DropdownMenuItem className="rounded-lg cursor-pointer">
-            <Copy className="h-4 w-4 mr-2" />
-            Copy to clipboard
-          </DropdownMenuItem>
-          <DropdownMenuSeparator />
-          <DropdownMenuItem className="rounded-lg cursor-pointer">
-            <Mail className="h-4 w-4 mr-2" />
-            Send via email
-          </DropdownMenuItem>
-          <DropdownMenuItem className="rounded-lg cursor-pointer">
-            <FileJson className="h-4 w-4 mr-2" />
-            Export to Google Sheets
-          </DropdownMenuItem>
-        </DropdownMenuContent>
-      </DropdownMenu>
-    </motion.div>
-  );
-}

+import React, { useState } from "react";
+import { motion, AnimatePresence } from "framer-motion";
+import {
+  Download,
+  Braces,
+  FileCode2,
+  Check,
+  Share2,
+  FileText,
+  Link2,
+  Mail,
+} from "lucide-react";
+import { Button } from "@/components/ui/button";
+import {
+  DropdownMenu,
+  DropdownMenuContent,
+  DropdownMenuItem,
+  DropdownMenuSeparator,
+  DropdownMenuTrigger,
+} from "@/components/ui/dropdown-menu";
+import { cn } from "@/lib/utils";
+import ShareModal from "@/components/ShareModal";
+import ShareLinkModal from "@/components/ShareLinkModal";
+import { shareExtraction, createShareLink } from "@/services/api";
+// Helper functions from ExtractionOutput
+function prepareFieldsForOutput(fields, format = "json") {
+  if (!fields || typeof fields !== "object") {
+    return fields;
+  }
+  const output = { ...fields };
+  // Extract Fields from root level if it exists
+  const rootFields = output.Fields;
+  // Remove Fields from output temporarily (will be added back at top)
+  delete output.Fields;
+  // Remove full_text from top-level if pages array exists (to avoid duplication)
+  if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
+    delete output.full_text;
+    // Clean up each page: remove full_text from page.fields (it duplicates page.text)
+    output.pages = output.pages.map(page => {
+      const cleanedPage = { ...page };
+      if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
+        const cleanedFields = { ...cleanedPage.fields };
+        // Remove full_text from page fields (duplicates page.text)
+        delete cleanedFields.full_text;
+        cleanedPage.fields = cleanedFields;
+      }
+      return cleanedPage;
+    });
+  }
+  // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
+  if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
+    // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
+    const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields"));
+    output.pages.forEach((page, idx) => {
+      const pageNum = page.page_number || idx + 1;
+      const pageFields = page.fields || {};
+      // Remove duplicate fields from page.fields:
+      // 1. Remove full_text (duplicates page.text)
+      // 2. Remove fields that match top-level fields (already shown at root)
+      const cleanedPageFields = {};
+      for (const [key, value] of Object.entries(pageFields)) {
+        // Skip full_text and fields that match top-level exactly
+        if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
+          cleanedPageFields[key] = value;
+        }
+      }
+      const pageObj = {
+        text: page.text || "",
+        confidence: page.confidence || 0,
+        doc_type: page.doc_type || "other"
+      };
+      // Add table and footer_notes if they exist
+      if (page.table && Array.isArray(page.table) && page.table.length > 0) {
+        pageObj.table = page.table;
+      }
+      if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) {
+        pageObj.footer_notes = page.footer_notes;
+      }
+      // Only add fields if there are unique page-specific fields
+      if (Object.keys(cleanedPageFields).length > 0) {
+        pageObj.fields = cleanedPageFields;
+      }
+      output[`page_${pageNum}`] = pageObj;
+    });
+    // Remove pages array - we now have page_1, page_2, etc. as separate fields
+    delete output.pages;
+  }
+  // Handle page_X structure (from backend) - remove Fields from page objects if they exist
+  if (output && typeof output === "object") {
+    const pageKeys = Object.keys(output).filter(k => k.startsWith("page_"));
+    for (const pageKey of pageKeys) {
+      const pageData = output[pageKey];
+      if (pageData && typeof pageData === "object") {
+        // Remove Fields from page objects (it's now at root level)
+        delete pageData.Fields;
+        delete pageData.metadata;
+      }
+    }
+  }
+  // Rebuild output with Fields at the top (only if it exists and is not empty)
+  const finalOutput = {};
+  if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) {
+    finalOutput.Fields = rootFields;
+  }
+  // Add all other keys
+  Object.keys(output).forEach(key => {
+    finalOutput[key] = output[key];
+  });
+  return finalOutput;
+}
+function escapeXML(str) {
+  return str
+    .replace(/&/g, "&amp;")
+    .replace(/</g, "&lt;")
+    .replace(/>/g, "&gt;")
+    .replace(/"/g, "&quot;")
+    .replace(/'/g, "&apos;");
+}
+function objectToXML(obj, rootName = "extraction") {
+  // Prepare fields - remove full_text if pages exist
+  const preparedObj = prepareFieldsForOutput(obj, "xml");
+  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
+  const convert = (obj, indent = "  ") => {
+    for (const [key, value] of Object.entries(obj)) {
+      if (value === null || value === undefined) continue;
+      // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
+      if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
+        continue;
+      }
+      if (Array.isArray(value)) {
+        value.forEach((item) => {
+          xml += `${indent}<${key}>\n`;
+          if (typeof item === "object") {
+            convert(item, indent + "  ");
+          } else {
+            xml += `${indent}  ${escapeXML(String(item))}\n`;
+          }
+          xml += `${indent}</${key}>\n`;
+        });
+      } else if (typeof value === "object") {
+        xml += `${indent}<${key}>\n`;
+        convert(value, indent + "  ");
+        xml += `${indent}</${key}>\n`;
+      } else {
+        xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
+      }
+    }
+  };
+  convert(preparedObj);
+  xml += `</${rootName}>`;
+  return xml;
+}
+export default function ExportButtons({ isComplete, extractionResult }) {
+  const [downloading, setDownloading] = useState(null);
+  const [copied, setCopied] = useState(false);
+  const [isShareModalOpen, setIsShareModalOpen] = useState(false);
+  const [isShareLinkModalOpen, setIsShareLinkModalOpen] = useState(false);
+  const [shareLink, setShareLink] = useState("");
+  const [isGeneratingLink, setIsGeneratingLink] = useState(false);
+  // Helper function to extract text from fields (same as in ExtractionOutput)
+  const extractTextFromFields = (fields) => {
+    if (!fields || typeof fields !== "object") {
+      return "";
+    }
+    // Check for page_X structure first (preferred format)
+    const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_"));
+    if (pageKeys.length > 0) {
+      // Get text from first page (or combine all pages)
+      const pageTexts = pageKeys.map(key => {
+        const page = fields[key];
+        if (page && page.text) {
+          return page.text;
+        }
+        return "";
+      }).filter(text => text);
+      if (pageTexts.length > 0) {
+        return pageTexts.join("\n\n");
+      }
+    }
+    // Fallback to full_text
+    if (fields.full_text) {
+      return fields.full_text;
+    }
+    return "";
+  };
+  // Helper function to escape HTML
+  const escapeHtml = (text) => {
+    if (!text) return '';
+    const div = document.createElement('div');
+    div.textContent = text;
+    return div.innerHTML;
+  };
+  // Helper function to convert pipe-separated tables to HTML tables
+  const convertPipeTablesToHTML = (text) => {
+    if (!text) return text;
+    const lines = text.split('\n');
+    const result = [];
+    let i = 0;
+    while (i < lines.length) {
+      const line = lines[i];
+      // Check if this line looks like a table row (has multiple pipes)
+      if (line.includes('|') && line.split('|').length >= 3) {
+        // Check if it's a separator line (only |, -, :, spaces)
+        const isSeparator = /^[\s|\-:]+$/.test(line.trim());
+        if (!isSeparator) {
+          // Start of a table - collect all table rows
+          const tableRows = [];
+          let j = i;
+          // Collect header row
+          const headerLine = lines[j];
+          const headerCells = headerLine.split('|').map(cell => cell.trim()).filter(cell => cell || cell === '');
+          // Remove empty cells at start/end
+          if (headerCells.length > 0 && !headerCells[0]) headerCells.shift();
+          if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop();
+          if (headerCells.length >= 2) {
+            tableRows.push(headerCells);
+            j++;
+            // Skip separator line if present
+            if (j < lines.length && /^[\s|\-:]+$/.test(lines[j].trim())) {
+              j++;
+            }
+            // Collect data rows
+            while (j < lines.length) {
+              const rowLine = lines[j];
+              if (!rowLine.trim()) break; // Empty line ends table
+              // Check if it's still a table row
+              if (rowLine.includes('|') && rowLine.split('|').length >= 2) {
+                const isRowSeparator = /^[\s|\-:]+$/.test(rowLine.trim());
+                if (!isRowSeparator) {
+                  const rowCells = rowLine.split('|').map(cell => cell.trim());
+                  // Remove empty cells at start/end
+                  if (rowCells.length > 0 && !rowCells[0]) rowCells.shift();
+                  if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop();
+                  tableRows.push(rowCells);
+                  j++;
+                } else {
+                  j++;
+                }
+              } else {
+                break; // Not a table row anymore
+              }
+            }
+            // Convert to HTML table
+            if (tableRows.length > 0) {
+              let htmlTable = '<table class="border-collapse border border-gray-300 w-full my-4">\n<thead>\n<tr>';
+              // Header row
+              tableRows[0].forEach(cell => {
+                htmlTable += `<th class="border border-gray-300 px-4 py-2 bg-gray-100 font-semibold text-left">${escapeHtml(cell)}</th>`;
+              });
+              htmlTable += '</tr>\n</thead>\n<tbody>\n';
+              // Data rows
+              for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) {
+                htmlTable += '<tr>';
+                tableRows[rowIdx].forEach((cell, colIdx) => {
+                  // Use header cell count to ensure alignment
+                  const cellContent = cell || '';
+                  htmlTable += `<td class="border border-gray-300 px-4 py-2">${escapeHtml(cellContent)}</td>`;
+                });
+                htmlTable += '</tr>\n';
+              }
+              htmlTable += '</tbody>\n</table>';
+              result.push(htmlTable);
+              i = j;
+              continue;
+            }
+          }
+        }
+      }
+      // Not a table row, add as-is
+      result.push(line);
+      i++;
+    }
+    return result.join('\n');
+  };
+  // Helper function to render markdown to HTML (same as in ExtractionOutput)
+  const renderMarkdownToHTML = (text) => {
+    if (!text) return "";
+    let html = text;
+    // FIRST: Convert pipe-separated tables to HTML tables
+    html = convertPipeTablesToHTML(html);
+    // Convert LaTeX-style superscripts/subscripts FIRST
+    html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '<sup>$1</sup>');
+    html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '<sup>$1</sup>');
+    html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '<sub>$1</sub>');
+    html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '<sub>$1</sub>');
+    // Protect HTML table blocks
+    const htmlBlocks = [];
+    let htmlBlockIndex = 0;
+    html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => {
+      const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`;
+      htmlBlocks[htmlBlockIndex] = match;
+      htmlBlockIndex++;
+      return placeholder;
+    });
+    // Convert markdown headers
+    html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>');
+    html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>');
+    html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>');
+    // Convert markdown bold/italic
+    html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
+    html = html.replace(/\*(.*?)\*/g, '<em>$1</em>');
+    // Convert markdown links
+    html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>');
+    // Process line breaks
+    const parts = html.split(/(__HTML_BLOCK_\d+__)/);
+    const processedParts = parts.map((part) => {
+      if (part.match(/^__HTML_BLOCK_\d+__$/)) {
+        const blockIndex = parseInt(part.match(/\d+/)[0]);
+        return htmlBlocks[blockIndex];
+      } else {
+        let processed = part;
+        processed = processed.replace(/\n\n+/g, '</p><p>');
+        processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2');
+        if (processed.trim() && !processed.trim().startsWith('<')) {
+          processed = '<p>' + processed + '</p>';
+        }
+        return processed;
+      }
+    });
+    html = processedParts.join('');
+    html = html.replace(/<p><\/p>/g, '');
+    html = html.replace(/<p>\s*<br>\s*<\/p>/g, '');
+    html = html.replace(/<p>\s*<\/p>/g, '');
+    return html;
+  };
+  const handleDownload = async (format) => {
+    if (!extractionResult || !extractionResult.fields) {
+      console.error("No extraction data available");
+      return;
+    }
+    setDownloading(format);
+    try {
+      const fields = extractionResult.fields;
+      let content = "";
+      let filename = "";
+      let mimeType = "";
+      if (format === "json") {
+        const preparedFields = prepareFieldsForOutput(fields, "json");
+        content = JSON.stringify(preparedFields, null, 2);
+        filename = `extraction_${new Date().toISOString().split('T')[0]}.json`;
+        mimeType = "application/json";
+      } else if (format === "xml") {
+        content = objectToXML(fields);
+        filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`;
+        mimeType = "application/xml";
+      } else if (format === "docx") {
+        // For DOCX, create a Word-compatible HTML document that preserves layout
+        // Extract text and convert to HTML (same as text viewer)
+        const textContent = extractTextFromFields(fields);
+        const htmlContent = renderMarkdownToHTML(textContent);
+        // Create a Word-compatible HTML document with proper MIME type
+        // Word can open HTML files with .docx extension if we use the right MIME type
+        const wordHTML = `<!DOCTYPE html>
+<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns="http://www.w3.org/TR/REC-html40">
+<head>
+  <meta charset="UTF-8">
+  <meta name="ProgId" content="Word.Document">
+  <meta name="Generator" content="Microsoft Word">
+  <meta name="Originator" content="Microsoft Word">
+  <!--[if gte mso 9]><xml>
+   <w:WordDocument>
+    <w:View>Print</w:View>
+    <w:Zoom>100</w:Zoom>
+    <w:DoNotOptimizeForBrowser/>
+   </w:WordDocument>
+  </xml><![endif]-->
+  <title>Document Extraction</title>
+  <style>
+    @page {
+      size: 8.5in 11in;
+      margin: 1in;
+    }
+    body {
+      font-family: 'Calibri', 'Arial', sans-serif;
+      font-size: 11pt;
+      line-height: 1.6;
+      margin: 0;
+      color: #333;
+    }
+    h1 {
+      font-size: 18pt;
+      font-weight: bold;
+      color: #0f172a;
+      margin-top: 24pt;
+      margin-bottom: 12pt;
+      page-break-after: avoid;
+    }
+    h2 {
+      font-size: 16pt;
+      font-weight: 600;
+      color: #0f172a;
+      margin-top: 20pt;
+      margin-bottom: 10pt;
+      page-break-after: avoid;
+    }
+    h3 {
+      font-size: 14pt;
+      font-weight: 600;
+      color: #1e293b;
+      margin-top: 16pt;
+      margin-bottom: 8pt;
+      page-break-after: avoid;
+    }
+    p {
+      margin-top: 6pt;
+      margin-bottom: 6pt;
+    }
+    table {
+      width: 100%;
+      border-collapse: collapse;
+      margin: 12pt 0;
+      font-size: 10pt;
+      page-break-inside: avoid;
+    }
+    table th {
+      background-color: #f8fafc;
+      border: 1pt solid #cbd5e1;
+      padding: 6pt;
+      text-align: left;
+      font-weight: 600;
+      color: #0f172a;
+    }
+    table td {
+      border: 1pt solid #cbd5e1;
+      padding: 6pt;
+      color: #334155;
+    }
+    table tr:nth-child(even) {
+      background-color: #f8fafc;
+    }
+    sup {
+      font-size: 0.75em;
+      vertical-align: super;
+      line-height: 0;
+    }
+    sub {
+      font-size: 0.75em;
+      vertical-align: sub;
+      line-height: 0;
+    }
+    strong {
+      font-weight: 600;
+    }
+    em {
+      font-style: italic;
+    }
+    a {
+      color: #4f46e5;
+      text-decoration: underline;
+    }
+  </style>
+</head>
+<body>
+${htmlContent}
+</body>
+</html>`;
+        content = wordHTML;
+        filename = `extraction_${new Date().toISOString().split('T')[0]}.doc`;
+        mimeType = "application/msword";
+      }
+      // Create blob and download
+      const blob = new Blob([content], { type: mimeType });
+      const url = URL.createObjectURL(blob);
+      const link = document.createElement("a");
+      link.href = url;
+      link.download = filename;
+      document.body.appendChild(link);
+      link.click();
+      document.body.removeChild(link);
+      URL.revokeObjectURL(url);
+      setDownloading(null);
+    } catch (error) {
+      console.error("Download error:", error);
+      setDownloading(null);
+    }
+  };
+  const handleCopyLink = async () => {
+    if (!extractionResult?.id) return;
+    setIsGeneratingLink(true);
+    setIsShareLinkModalOpen(true);
+    setShareLink("");
+    try {
+      const result = await createShareLink(extractionResult.id);
+      if (result.success && result.share_link) {
+        setShareLink(result.share_link);
+      } else {
+        throw new Error("Failed to generate share link");
+      }
+    } catch (err) {
+      console.error("Failed to create share link:", err);
+      setShareLink("");
+      // Still show modal but with error state
+    } finally {
+      setIsGeneratingLink(false);
+    }
+  };
+  const handleShare = async (extractionId, recipientEmail) => {
+    await shareExtraction(extractionId, recipientEmail);
+  };
+  if (!isComplete) return null;
+  return (
+    <motion.div
+      initial={{ opacity: 0, y: 20 }}
+      animate={{ opacity: 1, y: 0 }}
+      className="flex items-center gap-3"
+    >
+      {/* Export Options Dropdown */}
+      <DropdownMenu>
+        <DropdownMenuTrigger asChild>
+          <Button
+            variant="ghost"
+            className="h-11 w-11 rounded-xl hover:bg-slate-100"
+            disabled={downloading !== null}
+          >
+            {downloading ? (
+              <motion.div
+                animate={{ rotate: 360 }}
+                transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
+              >
+                <Download className="h-4 w-4" />
+              </motion.div>
+            ) : (
+              <Share2 className="h-4 w-4" />
+            )}
+          </Button>
+        </DropdownMenuTrigger>
+        <DropdownMenuContent align="end" className="w-56 rounded-xl p-2">
+          <DropdownMenuItem
+            className="rounded-lg cursor-pointer"
+            onClick={() => setIsShareModalOpen(true)}
+          >
+            <Mail className="h-4 w-4 mr-2 text-indigo-600" />
+            Share output
+          </DropdownMenuItem>
+          <DropdownMenuItem
+            className="rounded-lg cursor-pointer"
+            onClick={handleCopyLink}
+          >
+            <Link2 className="h-4 w-4 mr-2 text-indigo-600" />
+            Copy share link
+          </DropdownMenuItem>
+          <DropdownMenuSeparator />
+          <DropdownMenuItem
+            className="rounded-lg cursor-pointer"
+            onClick={() => handleDownload("docx")}
+            disabled={downloading === "docx"}
+          >
+            {downloading === "docx" ? (
+              <motion.div
+                animate={{ rotate: 360 }}
+                transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
+                className="h-4 w-4 mr-2"
+              >
+                <Download className="h-4 w-4" />
+              </motion.div>
+            ) : (
+              <FileText className="h-4 w-4 mr-2 text-blue-600" />
+            )}
+            Download Docx
+          </DropdownMenuItem>
+          <DropdownMenuItem
+            className="rounded-lg cursor-pointer"
+            onClick={() => handleDownload("json")}
+            disabled={downloading === "json"}
+          >
+            {downloading === "json" ? (
+              <motion.div
+                animate={{ rotate: 360 }}
+                transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
+                className="h-4 w-4 mr-2"
+              >
+                <Download className="h-4 w-4" />
+              </motion.div>
+            ) : (
+              <Braces className="h-4 w-4 mr-2 text-indigo-600" />
+            )}
+            Download JSON
+          </DropdownMenuItem>
+          <DropdownMenuItem
+            className="rounded-lg cursor-pointer"
+            onClick={() => handleDownload("xml")}
+            disabled={downloading === "xml"}
+          >
+            {downloading === "xml" ? (
+              <motion.div
+                animate={{ rotate: 360 }}
+                transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
+                className="h-4 w-4 mr-2"
+              >
+                <Download className="h-4 w-4" />
+              </motion.div>
+            ) : (
+              <FileCode2 className="h-4 w-4 mr-2 text-slate-600" />
+            )}
+            Download XML
+          </DropdownMenuItem>
+        </DropdownMenuContent>
+      </DropdownMenu>
+      {/* Share Modal */}
+      <ShareModal
+        isOpen={isShareModalOpen}
+        onClose={() => setIsShareModalOpen(false)}
+        onShare={handleShare}
+        extractionId={extractionResult?.id}
+      />
+      {/* Share Link Modal */}
+      <ShareLinkModal
+        isOpen={isShareLinkModalOpen}
+        onClose={() => {
+          setIsShareLinkModalOpen(false);
+          setShareLink("");
+        }}
+        shareLink={shareLink}
+        isLoading={isGeneratingLink}
+      />
+    </motion.div>
+  );
+}

frontend/src/components/ShareLinkModal.jsx ADDED Viewed

	@@ -0,0 +1,141 @@

+import React, { useState, useEffect } from "react";
+import { motion, AnimatePresence } from "framer-motion";
+import { X, Copy, Check, Loader2 } from "lucide-react";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+export default function ShareLinkModal({ isOpen, onClose, shareLink, isLoading }) {
+  const [copied, setCopied] = useState(false);
+  useEffect(() => {
+    if (!isOpen) {
+      setCopied(false);
+    }
+  }, [isOpen]);
+  const handleCopy = async () => {
+    if (!shareLink) return;
+    try {
+      await navigator.clipboard.writeText(shareLink);
+      setCopied(true);
+      setTimeout(() => setCopied(false), 2000);
+    } catch (err) {
+      // Fallback for older browsers
+      const textArea = document.createElement("textarea");
+      textArea.value = shareLink;
+      textArea.style.position = "fixed";
+      textArea.style.opacity = "0";
+      document.body.appendChild(textArea);
+      textArea.select();
+      try {
+        document.execCommand("copy");
+        setCopied(true);
+        setTimeout(() => setCopied(false), 2000);
+      } catch (fallbackErr) {
+        console.error("Failed to copy:", fallbackErr);
+      }
+      document.body.removeChild(textArea);
+    }
+  };
+  if (!isOpen) return null;
+  return (
+    <AnimatePresence>
+      <div className="fixed inset-0 z-50 flex items-center justify-center">
+        {/* Backdrop */}
+        <motion.div
+          initial={{ opacity: 0 }}
+          animate={{ opacity: 1 }}
+          exit={{ opacity: 0 }}
+          className="absolute inset-0 bg-black/50 backdrop-blur-sm"
+          onClick={onClose}
+        />
+        {/* Modal */}
+        <motion.div
+          initial={{ opacity: 0, scale: 0.95, y: 20 }}
+          animate={{ opacity: 1, scale: 1, y: 0 }}
+          exit={{ opacity: 0, scale: 0.95, y: 20 }}
+          className="relative z-10 w-full max-w-md mx-4 bg-white rounded-2xl shadow-2xl overflow-hidden"
+          onClick={(e) => e.stopPropagation()}
+        >
+          {/* Header */}
+          <div className="px-6 py-4 border-b border-slate-200 flex items-center justify-between">
+            <h2 className="text-xl font-semibold text-slate-900">Copy Share Link</h2>
+            <button
+              onClick={onClose}
+              disabled={isLoading}
+              className="p-2 rounded-lg hover:bg-slate-100 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
+            >
+              <X className="h-5 w-5 text-slate-500" />
+            </button>
+          </div>
+          {/* Content */}
+          <div className="px-6 py-6">
+            {isLoading ? (
+              <div className="text-center py-8">
+                <Loader2 className="h-8 w-8 mx-auto mb-4 text-indigo-600 animate-spin" />
+                <p className="text-sm text-slate-600">Generating share link...</p>
+              </div>
+            ) : shareLink ? (
+              <div className="space-y-4">
+                <div>
+                  <label className="block text-sm font-medium text-slate-700 mb-2">
+                    Share Link
+                  </label>
+                  <div className="flex gap-2">
+                    <Input
+                      type="text"
+                      value={shareLink}
+                      readOnly
+                      className="flex-1 h-12 rounded-xl border-slate-200 bg-slate-50 text-sm font-mono"
+                    />
+                    <Button
+                      onClick={handleCopy}
+                      className="h-12 px-4 rounded-xl bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700"
+                    >
+                      {copied ? (
+                        <>
+                          <Check className="h-4 w-4 mr-2" />
+                          Copied!
+                        </>
+                      ) : (
+                        <>
+                          <Copy className="h-4 w-4 mr-2" />
+                          Copy
+                        </>
+                      )}
+                    </Button>
+                  </div>
+                </div>
+                <p className="text-xs text-slate-500">
+                  Share this link with anyone you want to give access to this extraction. They'll need to sign in to view it.
+                </p>
+              </div>
+            ) : (
+              <div className="text-center py-8">
+                <p className="text-sm text-slate-600">No share link available</p>
+              </div>
+            )}
+            <div className="pt-4 mt-6 border-t border-slate-200">
+              <Button
+                type="button"
+                variant="outline"
+                onClick={onClose}
+                disabled={isLoading}
+                className="w-full h-11 rounded-xl"
+              >
+                Close
+              </Button>
+            </div>
+          </div>
+        </motion.div>
+      </div>
+    </AnimatePresence>
+  );
+}

frontend/src/components/ShareModal.jsx ADDED Viewed

	@@ -0,0 +1,197 @@

+import React, { useState } from "react";
+import { motion, AnimatePresence } from "framer-motion";
+import { X, Mail, Send, Loader2 } from "lucide-react";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+export default function ShareModal({ isOpen, onClose, onShare, extractionId }) {
+  const [email, setEmail] = useState("");
+  const [isLoading, setIsLoading] = useState(false);
+  const [error, setError] = useState("");
+  const [success, setSuccess] = useState(false);
+  const [successMessage, setSuccessMessage] = useState("");
+  const handleSubmit = async (e) => {
+    e.preventDefault();
+    setError("");
+    setSuccess(false);
+    // Parse and validate multiple emails (comma or semicolon separated)
+    if (!email.trim()) {
+      setError("Please enter at least one recipient email address");
+      return;
+    }
+    // Split by comma or semicolon, trim each email, and filter out empty strings
+    const emailList = email
+      .split(/[,;]/)
+      .map((e) => e.trim())
+      .filter((e) => e.length > 0);
+    if (emailList.length === 0) {
+      setError("Please enter at least one recipient email address");
+      return;
+    }
+    // Validate each email
+    const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
+    const invalidEmails = emailList.filter((e) => !emailRegex.test(e));
+    if (invalidEmails.length > 0) {
+      setError(`Invalid email address(es): ${invalidEmails.join(", ")}`);
+      return;
+    }
+    setIsLoading(true);
+    try {
+      const result = await onShare(extractionId, emailList);
+      setSuccessMessage(result?.message || `Successfully shared with ${emailList.length} recipient(s)`);
+      setSuccess(true);
+      setEmail("");
+      // Close modal after 2 seconds
+      setTimeout(() => {
+        setSuccess(false);
+        setSuccessMessage("");
+        onClose();
+      }, 2000);
+    } catch (err) {
+      setError(err.message || "Failed to share extraction. Please try again.");
+    } finally {
+      setIsLoading(false);
+    }
+  };
+  const handleClose = () => {
+    if (!isLoading) {
+      setEmail("");
+      setError("");
+      setSuccess(false);
+      onClose();
+    }
+  };
+  if (!isOpen) return null;
+  return (
+    <AnimatePresence>
+      <div className="fixed inset-0 z-50 flex items-center justify-center">
+        {/* Backdrop */}
+        <motion.div
+          initial={{ opacity: 0 }}
+          animate={{ opacity: 1 }}
+          exit={{ opacity: 0 }}
+          className="absolute inset-0 bg-black/50 backdrop-blur-sm"
+          onClick={handleClose}
+        />
+        {/* Modal */}
+        <motion.div
+          initial={{ opacity: 0, scale: 0.95, y: 20 }}
+          animate={{ opacity: 1, scale: 1, y: 0 }}
+          exit={{ opacity: 0, scale: 0.95, y: 20 }}
+          className="relative z-10 w-full max-w-md mx-4 bg-white rounded-2xl shadow-2xl overflow-hidden"
+          onClick={(e) => e.stopPropagation()}
+        >
+          {/* Header */}
+          <div className="px-6 py-4 border-b border-slate-200 flex items-center justify-between">
+            <h2 className="text-xl font-semibold text-slate-900">Share Output</h2>
+            <button
+              onClick={handleClose}
+              disabled={isLoading}
+              className="p-2 rounded-lg hover:bg-slate-100 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
+            >
+              <X className="h-5 w-5 text-slate-500" />
+            </button>
+          </div>
+          {/* Content */}
+          <div className="px-6 py-6">
+            {success ? (
+              <motion.div
+                initial={{ opacity: 0, scale: 0.9 }}
+                animate={{ opacity: 1, scale: 1 }}
+                className="text-center py-8"
+              >
+                <div className="w-16 h-16 mx-auto mb-4 rounded-full bg-emerald-100 flex items-center justify-center">
+                  <Send className="h-8 w-8 text-emerald-600" />
+                </div>
+                <h3 className="text-lg font-semibold text-slate-900 mb-2">
+                  Share Sent Successfully!
+                </h3>
+                <p className="text-sm text-slate-600">
+                  {successMessage || "The recipient(s) will receive an email with a link to view the extraction."}
+                </p>
+              </motion.div>
+            ) : (
+              <form onSubmit={handleSubmit} className="space-y-4">
+                <div>
+                  <label
+                    htmlFor="recipient-email"
+                    className="block text-sm font-medium text-slate-700 mb-2"
+                  >
+                    Recipient Email(s)
+                  </label>
+                  <p className="text-xs text-slate-500 mb-2">
+                    Separate multiple emails with commas or semicolons
+                  </p>
+                  <div className="relative">
+                    <Mail className="absolute left-3 top-1/2 -translate-y-1/2 h-5 w-5 text-slate-400" />
+                    <Input
+                      id="recipient-email"
+                      type="text"
+                      value={email}
+                      onChange={(e) => setEmail(e.target.value)}
+                      placeholder="Enter email addresses (comma or semicolon separated)"
+                      className="pl-10 h-12 rounded-xl border-slate-200 focus:border-indigo-500 focus:ring-indigo-500"
+                      disabled={isLoading}
+                      autoFocus
+                    />
+                  </div>
+                  {error && (
+                    <motion.p
+                      initial={{ opacity: 0, y: -10 }}
+                      animate={{ opacity: 1, y: 0 }}
+                      className="mt-2 text-sm text-red-600"
+                    >
+                      {error}
+                    </motion.p>
+                  )}
+                </div>
+                <div className="pt-4 flex gap-3">
+                  <Button
+                    type="button"
+                    variant="outline"
+                    onClick={handleClose}
+                    disabled={isLoading}
+                    className="flex-1 h-11 rounded-xl"
+                  >
+                    Cancel
+                  </Button>
+                  <Button
+                    type="submit"
+                    disabled={isLoading || !email.trim()}
+                    className="flex-1 h-11 rounded-xl bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700"
+                  >
+                    {isLoading ? (
+                      <>
+                        <Loader2 className="h-4 w-4 mr-2 animate-spin" />
+                        Sending...
+                      </>
+                    ) : (
+                      <>
+                        <Send className="h-4 w-4 mr-2" />
+                        Send
+                      </>
+                    )}
+                  </Button>
+                </div>
+              </form>
+            )}
+          </div>
+        </motion.div>
+      </div>
+    </AnimatePresence>
+  );
+}

frontend/src/components/auth/LoginForm.jsx ADDED Viewed

	@@ -0,0 +1,512 @@

+import React, { useState } from "react";
+import { motion } from "framer-motion";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { Separator } from "@/components/ui/separator";
+import {
+  Zap,
+  Target,
+  Upload,
+  CheckCircle2,
+  ArrowRight,
+  Mail,
+  Sparkles,
+  Shield,
+  Globe,
+  AlertCircle,
+  Loader2,
+} from "lucide-react";
+import { useAuth } from "@/contexts/AuthContext";
+export default function LoginForm() {
+  const { firebaseLogin, requestOTP, verifyOTP } = useAuth();
+  const [email, setEmail] = useState("");
+  const [showOtp, setShowOtp] = useState(false);
+  const [otp, setOtp] = useState(["", "", "", "", "", ""]);
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState("");
+  // Business email validation
+  const PERSONAL_EMAIL_DOMAINS = [
+    "gmail.com",
+    "yahoo.com",
+    "hotmail.com",
+    "outlook.com",
+    "aol.com",
+    "icloud.com",
+    "mail.com",
+    "protonmail.com",
+    "yandex.com",
+    "zoho.com",
+    "gmx.com",
+    "live.com",
+    "msn.com",
+  ];
+  const isBusinessEmail = (email) => {
+    if (!email || !email.includes("@")) return false;
+    const domain = email.split("@")[1].toLowerCase();
+    return !PERSONAL_EMAIL_DOMAINS.includes(domain);
+  };
+  const handleGoogleLogin = async () => {
+    setLoading(true);
+    setError("");
+    try {
+      await firebaseLogin();
+    } catch (err) {
+      setError(err.message || "Failed to sign in with Google");
+    } finally {
+      setLoading(false);
+    }
+  };
+  const handleEmailSubmit = async (e) => {
+    e.preventDefault();
+    setLoading(true);
+    setError("");
+    if (!email) {
+      setError("Please enter your email address");
+      setLoading(false);
+      return;
+    }
+    if (!isBusinessEmail(email)) {
+      setError("Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, etc.) are not permitted.");
+      setLoading(false);
+      return;
+    }
+    try {
+      await requestOTP(email);
+      setShowOtp(true);
+    } catch (err) {
+      setError(err.message || "Failed to send OTP");
+    } finally {
+      setLoading(false);
+    }
+  };
+  const handleOtpChange = (index, value) => {
+    if (value.length <= 1 && /^\d*$/.test(value)) {
+      const newOtp = [...otp];
+      newOtp[index] = value;
+      setOtp(newOtp);
+      setError("");
+      // Auto-focus next input
+      if (value && index < 5) {
+        const nextInput = document.getElementById(`otp-${index + 1}`);
+        nextInput?.focus();
+      }
+    }
+  };
+  const handleOtpPaste = (e, startIndex = 0) => {
+    e.preventDefault();
+    const pastedData = e.clipboardData.getData("text");
+    // Extract only digits from pasted content
+    const digits = pastedData.replace(/\D/g, "").slice(0, 6);
+    if (digits.length > 0) {
+      const newOtp = [...otp];
+      // Fill the OTP array with pasted digits starting from the current field
+      for (let i = 0; i < digits.length && (startIndex + i) < 6; i++) {
+        newOtp[startIndex + i] = digits[i];
+      }
+      setOtp(newOtp);
+      setError("");
+      // Focus on the next empty input or the last input if all are filled
+      const nextEmptyIndex = Math.min(startIndex + digits.length, 5);
+      const nextInput = document.getElementById(`otp-${nextEmptyIndex}`);
+      nextInput?.focus();
+    }
+  };
+  const handleOtpKeyDown = (index, e) => {
+    if (e.key === "Backspace" && !otp[index] && index > 0) {
+      const prevInput = document.getElementById(`otp-${index - 1}`);
+      prevInput?.focus();
+    }
+  };
+  const handleOtpVerify = async (e) => {
+    e.preventDefault();
+    setLoading(true);
+    setError("");
+    const otpString = otp.join("");
+    if (otpString.length !== 6) {
+      setError("Please enter a valid 6-digit OTP");
+      setLoading(false);
+      return;
+    }
+    try {
+      await verifyOTP(email, otpString);
+      // Success - user will be redirected by AuthContext
+    } catch (err) {
+      setError(err.message || "Invalid OTP. Please try again.");
+      setOtp(["", "", "", "", "", ""]);
+    } finally {
+      setLoading(false);
+    }
+  };
+  const features = [
+    {
+      icon: Zap,
+      title: "Lightning Fast",
+      description: "Process documents in seconds and get outputs for ERP ingestion",
+      color: "text-amber-500",
+      bg: "bg-amber-50",
+    },
+    {
+      icon: Target,
+      title: "100% Accuracy",
+      description: "Industry-leading extraction with Visual Reasoning Processor",
+      color: "text-emerald-500",
+      bg: "bg-emerald-50",
+    },
+    {
+      icon: Globe,
+      title: "Any Format, Any Language",
+      description: "PDF, images, scanned docs — multi-lingual support included",
+      color: "text-blue-500",
+      bg: "bg-blue-50",
+    },
+  ];
+  const supportedFormats = [
+    { ext: "PDF", color: "bg-red-500" },
+    { ext: "PNG", color: "bg-blue-500" },
+    { ext: "JPG", color: "bg-green-500" },
+    { ext: "TIFF", color: "bg-purple-500" },
+  ];
+  return (
+    <div className="min-h-screen bg-gradient-to-br from-slate-50 via-white to-blue-50 flex">
+      {/* Left Side - Product Showcase */}
+      <div className="hidden lg:flex lg:w-[56%] flex-col justify-between p-8 relative overflow-hidden">
+        {/* Background Elements */}
+        <div className="absolute top-0 right-0 w-96 h-96 bg-blue-100/40 rounded-full blur-3xl -translate-y-1/2 translate-x-1/2" />
+        <div className="absolute bottom-0 left-0 w-80 h-80 bg-emerald-100/40 rounded-full blur-3xl translate-y-1/2 -translate-x-1/2" />
+        {/* Logo & Brand */}
+        <motion.div
+          initial={{ opacity: 0, y: -20 }}
+          animate={{ opacity: 1, y: 0 }}
+          className="relative z-10 mb-6"
+        >
+          <div className="flex items-center gap-3">
+            <div className="h-12 w-12 flex items-center justify-center flex-shrink-0">
+              <img
+                src="/logo.png"
+                alt="EZOFIS AI Logo"
+                className="h-full w-full object-contain"
+                onError={(e) => {
+                  // Fallback: hide image if logo not found
+                  e.target.style.display = 'none';
+                }}
+              />
+            </div>
+            <div>
+              <h1 className="text-2xl font-bold text-slate-900 tracking-tight">EZOFISOCR</h1>
+              <p className="text-sm text-slate-500 font-medium">VRP Intelligence</p>
+            </div>
+          </div>
+        </motion.div>
+        {/* Main Content */}
+        <motion.div
+          initial={{ opacity: 0, y: 20 }}
+          animate={{ opacity: 1, y: 0 }}
+          transition={{ delay: 0.1 }}
+          className="relative z-10 space-y-5 flex-1 flex flex-col justify-center ml-24 xl:ml-36"
+        >
+          <div className="space-y-3">
+            <h2 className="text-3xl xl:text-4xl font-bold text-slate-900 leading-tight">
+              Pure Agentic
+              <span className="block text-transparent bg-clip-text bg-gradient-to-r from-blue-600 to-indigo-600">
+                Document Intelligence
+              </span>
+            </h2>
+            <p className="text-base text-slate-600 max-w-lg leading-relaxed">
+              Deterministic, layout-aware extraction (without LLM) using our proprietary{" "}
+              <span className="font-semibold text-slate-800">Visual Reasoning Processor (VRP)</span>
+            </p>
+          </div>
+          {/* Product Preview Card */}
+          <motion.div
+            initial={{ opacity: 0, scale: 0.95 }}
+            animate={{ opacity: 1, scale: 1 }}
+            transition={{ delay: 0.3 }}
+            className="bg-white rounded-2xl border border-slate-200/80 shadow-xl shadow-slate-200/50 p-4 max-w-lg"
+          >
+            <div className="border-2 border-dashed border-slate-200 rounded-xl p-5 text-center bg-slate-50/50">
+              <div className="w-12 h-12 rounded-full bg-slate-100 flex items-center justify-center mx-auto mb-3">
+                <Upload className="w-5 h-5 text-slate-400" />
+              </div>
+              <p className="text-slate-700 font-medium mb-1 text-sm">Drop a document to extract data</p>
+              <p className="text-xs text-slate-400">Invoices, purchase orders, delivery notes, receipts, and operational documents</p>
+              <div className="flex items-center justify-center gap-2 mt-3">
+                {supportedFormats.map((format, i) => (
+                  <span key={i} className={`${format.color} text-white text-xs font-bold px-2 py-1 rounded`}>
+                    {format.ext}
+                  </span>
+                ))}
+              </div>
+            </div>
+            <div className="flex items-center justify-between mt-3 pt-3 border-t border-slate-100">
+              <div className="flex items-center gap-2">
+                <div className="w-2 h-2 rounded-full bg-emerald-500 animate-pulse" />
+                <span className="text-xs text-slate-600">Ready to extract</span>
+              </div>
+              <div className="flex items-center gap-1 text-emerald-600">
+                <CheckCircle2 className="w-3.5 h-3.5" />
+                <span className="text-xs font-semibold">99.8% Accuracy</span>
+              </div>
+            </div>
+          </motion.div>
+          {/* Features */}
+          <div className="grid gap-3">
+            {features.map((feature, index) => (
+              <motion.div
+                key={feature.title}
+                initial={{ opacity: 0, x: -20 }}
+                animate={{ opacity: 1, x: 0 }}
+                transition={{ delay: 0.4 + index * 0.1 }}
+                className="flex items-start gap-3 group"
+              >
+                <div
+                  className={`w-9 h-9 rounded-xl ${feature.bg} flex items-center justify-center flex-shrink-0 group-hover:scale-110 transition-transform`}
+                >
+                  <feature.icon className={`w-4 h-4 ${feature.color}`} />
+                </div>
+                <div>
+                  <h3 className="font-semibold text-slate-900 text-sm">{feature.title}</h3>
+                  <p className="text-xs text-slate-500">{feature.description}</p>
+                </div>
+              </motion.div>
+            ))}
+          </div>
+        </motion.div>
+        {/* Trust Badge */}
+        <motion.div
+          initial={{ opacity: 0 }}
+          animate={{ opacity: 1 }}
+          transition={{ delay: 0.6 }}
+          className="relative z-10 flex items-center gap-3 text-xs text-slate-500 mt-6"
+        >
+          <Shield className="w-4 h-4" />
+          <span>Enterprise-grade security • SOC 2 Compliant • GDPR Ready</span>
+        </motion.div>
+      </div>
+      {/* Right Side - Sign In Form */}
+      <div className="w-full lg:w-[44%] flex items-center justify-center p-6 sm:p-10">
+        <motion.div
+          initial={{ opacity: 0, y: 20 }}
+          animate={{ opacity: 1, y: 0 }}
+          transition={{ delay: 0.2 }}
+          className="w-full max-w-md"
+        >
+          {/* Mobile Logo */}
+          <div className="lg:hidden flex items-center justify-center gap-3 mb-8">
+            <div className="h-12 w-12 flex items-center justify-center flex-shrink-0">
+              <img
+                src="/logo.png"
+                alt="EZOFIS AI Logo"
+                className="h-full w-full object-contain"
+                onError={(e) => {
+                  // Fallback: hide image if logo not found
+                  e.target.style.display = 'none';
+                }}
+              />
+            </div>
+            <div>
+              <h1 className="text-2xl font-bold text-slate-900 tracking-tight">EZOFISOCR</h1>
+              <p className="text-sm text-slate-500 font-medium">VRP Intelligence</p>
+            </div>
+          </div>
+          <div className="bg-white rounded-3xl border border-slate-200/80 shadow-2xl shadow-slate-200/50 p-8 sm:p-10">
+            <div className="text-center mb-8">
+              <h2 className="text-2xl font-bold text-slate-900 mb-2">
+                {showOtp ? "Enter verification code" : "Secure Access"}
+              </h2>
+              <p className="text-slate-500">
+                {showOtp ? `We sent a code to ${email}` : "Access your document intelligence workspace"}
+              </p>
+            </div>
+            {/* Error Message */}
+            {error && (
+              <motion.div
+                initial={{ opacity: 0, y: -10 }}
+                animate={{ opacity: 1, y: 0 }}
+                className="mb-6 p-3 bg-red-50 border border-red-200 rounded-xl flex items-start gap-2 text-sm text-red-700"
+              >
+                <AlertCircle className="h-4 w-4 flex-shrink-0 mt-0.5" />
+                <p>{error}</p>
+              </motion.div>
+            )}
+            {!showOtp ? (
+              <>
+                {/* Google Sign In */}
+                <Button
+                  onClick={handleGoogleLogin}
+                  disabled={loading}
+                  variant="outline"
+                  className="w-full h-12 text-base font-medium border-slate-200 hover:bg-slate-50 hover:border-slate-300 transition-all group"
+                >
+                  {loading ? (
+                    <Loader2 className="w-5 h-5 mr-3 animate-spin" />
+                  ) : (
+                    <svg className="w-5 h-5 mr-3" viewBox="0 0 24 24">
+                      <path fill="#4285F4" d="M22.56 12.25c0-.78-.07-1.53-.2-2.25H12v4.26h5.92c-.26 1.37-1.04 2.53-2.21 3.31v2.77h3.57c2.08-1.92 3.28-4.74 3.28-8.09z" />
+                      <path fill="#34A853" d="M12 23c2.97 0 5.46-.98 7.28-2.66l-3.57-2.77c-.98.66-2.23 1.06-3.71 1.06-2.86 0-5.29-1.93-6.16-4.53H2.18v2.84C3.99 20.53 7.7 23 12 23z" />
+                      <path fill="#FBBC05" d="M5.84 14.09c-.22-.66-.35-1.36-.35-2.09s.13-1.43.35-2.09V7.07H2.18C1.43 8.55 1 10.22 1 12s.43 3.45 1.18 4.93l2.85-2.22.81-.62z" />
+                      <path fill="#EA4335" d="M12 5.38c1.62 0 3.06.56 4.21 1.64l3.15-3.15C17.45 2.09 14.97 1 12 1 7.7 1 3.99 3.47 2.18 7.07l3.66 2.84c.87-2.6 3.3-4.53 6.16-4.53z" />
+                    </svg>
+                  )}
+                  Continue with Google
+                  <ArrowRight className="w-4 h-4 ml-auto opacity-0 -translate-x-2 group-hover:opacity-100 group-hover:translate-x-0 transition-all" />
+                </Button>
+                <div className="relative my-8">
+                  <Separator />
+                  <span className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2 bg-white px-4 text-sm text-slate-400">
+                    or continue with email
+                  </span>
+                </div>
+                {/* Email Input */}
+                <form onSubmit={handleEmailSubmit} className="space-y-4">
+                  <div className="relative">
+                    <Mail className="absolute left-4 top-1/2 -translate-y-1/2 w-5 h-5 text-slate-400" />
+                    <Input
+                      type="email"
+                      placeholder="name@company.com"
+                      value={email}
+                      onChange={(e) => {
+                        setEmail(e.target.value);
+                        setError("");
+                      }}
+                      className="h-12 pl-12 text-base border-slate-200 focus:border-blue-500 focus:ring-blue-500"
+                    />
+                  </div>
+                  <Button
+                    type="submit"
+                    disabled={loading}
+                    className="w-full h-12 text-base font-medium bg-gradient-to-r from-blue-600 to-indigo-600 hover:from-blue-700 hover:to-indigo-700 shadow-lg shadow-blue-500/25 transition-all"
+                  >
+                    {loading ? (
+                      <>
+                        <Loader2 className="w-4 h-4 mr-2 animate-spin" />
+                        Sending...
+                      </>
+                    ) : (
+                      <>
+                        Continue with Email
+                        <ArrowRight className="w-4 h-4 ml-2" />
+                      </>
+                    )}
+                  </Button>
+                </form>
+              </>
+            ) : (
+              /* OTP Input */
+              <form onSubmit={handleOtpVerify} className="space-y-6">
+                <div className="flex justify-center gap-2">
+                  {otp.map((digit, index) => (
+                    <Input
+                      key={index}
+                      id={`otp-${index}`}
+                      type="text"
+                      inputMode="numeric"
+                      maxLength={1}
+                      value={digit}
+                      onChange={(e) => handleOtpChange(index, e.target.value)}
+                      onKeyDown={(e) => handleOtpKeyDown(index, e)}
+                      onPaste={(e) => handleOtpPaste(e, index)}
+                      className="w-12 h-14 text-center text-xl font-semibold border-slate-200 focus:border-blue-500 focus:ring-blue-500"
+                    />
+                  ))}
+                </div>
+                <Button
+                  type="submit"
+                  disabled={loading || otp.join("").length !== 6}
+                  className="w-full h-12 text-base font-medium bg-gradient-to-r from-blue-600 to-indigo-600 hover:from-blue-700 hover:to-indigo-700 shadow-lg shadow-blue-500/25"
+                >
+                  {loading ? (
+                    <>
+                      <Loader2 className="w-4 h-4 mr-2 animate-spin" />
+                      Verifying...
+                    </>
+                  ) : (
+                    <>
+                      Verify & Sign In
+                      <ArrowRight className="w-4 h-4 ml-2" />
+                    </>
+                  )}
+                </Button>
+                <button
+                  type="button"
+                  onClick={() => {
+                    setShowOtp(false);
+                    setOtp(["", "", "", "", "", ""]);
+                    setError("");
+                  }}
+                  className="w-full text-sm text-slate-500 hover:text-slate-700 transition-colors"
+                >
+                  ← Back to sign in options
+                </button>
+              </form>
+            )}
+            {/* Notice */}
+            <div className="mt-8 pt-6 border-t border-slate-100">
+              <div className="flex items-start gap-2 text-xs text-slate-400 mb-4">
+                <Shield className="w-4 h-4 flex-shrink-0 mt-0.5" />
+                <span>Only business email addresses are allowed</span>
+              </div>
+              <p className="text-xs text-slate-400 text-center leading-relaxed">
+                By signing in, you agree to our{" "}
+                <a href="#" className="text-blue-600 hover:underline">
+                  Terms of Service
+                </a>{" "}
+                and{" "}
+                <a href="#" className="text-blue-600 hover:underline">
+                  Privacy Policy
+                </a>
+              </p>
+            </div>
+          </div>
+          {/* Mobile Features */}
+          <div className="lg:hidden mt-8 space-y-4">
+            {features.map((feature) => (
+              <div key={feature.title} className="flex items-center gap-3 text-sm">
+                <div className={`w-8 h-8 rounded-lg ${feature.bg} flex items-center justify-center`}>
+                  <feature.icon className={`w-4 h-4 ${feature.color}`} />
+                </div>
+                <span className="text-slate-600">{feature.title}</span>
+              </div>
+            ))}
+          </div>
+        </motion.div>
+      </div>
+    </div>
+  );
+}

frontend/src/components/ocr/DocumentPreview.jsx CHANGED Viewed

@@ -1,236 +1,229 @@
-import React, { useState, useEffect, useRef } from "react";
-import { motion } from "framer-motion";
-import { FileText, ZoomIn, ZoomOut, RotateCw, Maximize2 } from "lucide-react";
-import { Button } from "@/components/ui/button";
-export default function DocumentPreview({ file, isProcessing }) {
-  const [previewUrls, setPreviewUrls] = useState([]);
-  const [zoom, setZoom] = useState(100);
-  const [rotation, setRotation] = useState(0);
-  const objectUrlsRef = useRef([]);
-  useEffect(() => {
-    if (!file) {
-      // Cleanup previous URLs
-      objectUrlsRef.current.forEach((url) => {
-        if (url && url.startsWith("blob:")) {
-          URL.revokeObjectURL(url);
-        }
-      });
-      objectUrlsRef.current = [];
-      setPreviewUrls([]);
-      return;
-    }
-    const loadPreview = async () => {
-      const urls = [];
-      const newObjectUrls = [];
-      // Check if it's a PDF
-      if (file.type === "application/pdf" || file.name?.toLowerCase().endsWith(".pdf")) {
-        try {
-          // Use pdf.js to render PDF pages
-          const pdfjsLib = await import("pdfjs-dist");
-          // Configure worker - use jsdelivr CDN which is more reliable
-          // This will use the same version as the installed package
-          const version = pdfjsLib.version || "4.0.379";
-          pdfjsLib.GlobalWorkerOptions.workerSrc = `https://cdn.jsdelivr.net/npm/pdfjs-dist@${version}/build/pdf.worker.min.mjs`;
-          const arrayBuffer = await file.arrayBuffer();
-          const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
-          const numPages = pdf.numPages;
-          for (let pageNum = 1; pageNum <= numPages; pageNum++) {
-            const page = await pdf.getPage(pageNum);
-            const viewport = page.getViewport({ scale: 2.0 });
-            const canvas = document.createElement("canvas");
-            const context = canvas.getContext("2d");
-            canvas.height = viewport.height;
-            canvas.width = viewport.width;
-            await page.render({
-              canvasContext: context,
-              viewport: viewport,
-            }).promise;
-            urls.push(canvas.toDataURL("image/jpeg", 0.95));
-          }
-        } catch (error) {
-          console.error("Error loading PDF:", error);
-          // Fallback: show error message
-          urls.push(null);
-        }
-      } else {
-        // For images, create object URL
-        const url = URL.createObjectURL(file);
-        urls.push(url);
-        newObjectUrls.push(url);
-      }
-      // Cleanup old object URLs
-      objectUrlsRef.current.forEach((url) => {
-        if (url && url.startsWith("blob:")) {
-          URL.revokeObjectURL(url);
-        }
-      });
-      objectUrlsRef.current = newObjectUrls;
-      setPreviewUrls(urls);
-    };
-    loadPreview();
-    // Cleanup function - revoke object URLs when component unmounts or file changes
-    return () => {
-      objectUrlsRef.current.forEach((url) => {
-        if (url && url.startsWith("blob:")) {
-          URL.revokeObjectURL(url);
-        }
-      });
-      objectUrlsRef.current = [];
-    };
-  }, [file]);
-  return (
-    <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden">
-      {/* Header */}
-      <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100">
-        <div className="flex items-center gap-3">
-          <div className="h-8 w-8 rounded-lg bg-indigo-50 flex items-center justify-center">
-            <FileText className="h-4 w-4 text-indigo-600" />
-          </div>
-          <div>
-            <h3 className="font-semibold text-slate-800 text-sm">Document Preview</h3>
-            <p className="text-xs text-slate-400">{file?.name || "No file selected"}</p>
-          </div>
-        </div>
-        {file && (
-          <div className="flex items-center gap-1">
-            <Button
-              variant="ghost"
-              size="icon"
-              className="h-8 w-8 text-slate-400 hover:text-slate-600"
-              onClick={() => setZoom(Math.max(50, zoom - 25))}
-            >
-              <ZoomOut className="h-4 w-4" />
-            </Button>
-            <span className="text-xs text-slate-500 w-12 text-center">{zoom}%</span>
-            <Button
-              variant="ghost"
-              size="icon"
-              className="h-8 w-8 text-slate-400 hover:text-slate-600"
-              onClick={() => setZoom(Math.min(200, zoom + 25))}
-            >
-              <ZoomIn className="h-4 w-4" />
-            </Button>
-            <div className="w-px h-4 bg-slate-200 mx-2" />
-            <Button
-              variant="ghost"
-              size="icon"
-              className="h-8 w-8 text-slate-400 hover:text-slate-600"
-              onClick={() => setRotation((rotation + 90) % 360)}
-            >
-              <RotateCw className="h-4 w-4" />
-            </Button>
-            <Button
-              variant="ghost"
-              size="icon"
-              className="h-8 w-8 text-slate-400 hover:text-slate-600"
-              onClick={() => {
-                setZoom(100);
-                setRotation(0);
-              }}
-            >
-              <Maximize2 className="h-4 w-4" />
-            </Button>
-          </div>
-        )}
-      </div>
-      {/* Preview Area */}
-      <div className="flex-1 p-6 bg-slate-50/50 overflow-auto">
-        {!file ? (
-          <div className="h-full flex items-center justify-center">
-            <div className="text-center">
-              <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
-                <FileText className="h-10 w-10 text-slate-300" />
-              </div>
-              <p className="text-slate-400 text-sm">Upload a document to preview</p>
-            </div>
-          </div>
-        ) : previewUrls.length === 0 ? (
-          <div className="h-full flex items-center justify-center">
-            <div className="text-center">
-              <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
-                <FileText className="h-10 w-10 text-slate-300" />
-              </div>
-              <p className="text-slate-400 text-sm">Loading preview...</p>
-            </div>
-          </div>
-        ) : (
-          <div className="space-y-4">
-            {previewUrls.map((url, index) => (
-              <motion.div
-                key={index}
-                initial={{ opacity: 0, y: 20 }}
-                animate={{ opacity: 1, y: 0 }}
-                transition={{ delay: index * 0.1 }}
-                className="relative bg-white rounded-xl shadow-sm border border-slate-200 overflow-hidden flex items-center justify-center"
-                style={{
-                  minHeight: "400px",
-                }}
-              >
-                {url ? (
-                  <img
-                    src={url}
-                    alt={`Page ${index + 1}`}
-                    className="w-full h-auto"
-                    style={{
-                      transform: `scale(${zoom / 100}) rotate(${rotation}deg)`,
-                      maxWidth: "100%",
-                      objectFit: "contain",
-                      transition: "transform 0.2s ease",
-                    }}
-                  />
-                ) : (
-                  <div className="p-8 text-center">
-                    <p className="text-slate-400 text-sm">Unable to load preview</p>
-                  </div>
-                )}
-                {/* Processing overlay */}
-                {isProcessing && (
-                  <motion.div
-                    initial={{ opacity: 0 }}
-                    animate={{ opacity: 1 }}
-                    className="absolute inset-0 bg-indigo-600/5 backdrop-blur-[1px] pointer-events-none"
-                  >
-                    <motion.div
-                      initial={{ top: 0 }}
-                      animate={{ top: "100%" }}
-                      transition={{
-                        duration: 2,
-                        repeat: Infinity,
-                        ease: "linear",
-                      }}
-                      className="absolute left-0 right-0 h-1 bg-gradient-to-r from-transparent via-indigo-500 to-transparent"
-                    />
-                  </motion.div>
-                )}
-                {/* Page number */}
-                {previewUrls.length > 1 && (
-                  <div className="absolute bottom-3 right-3 text-xs text-slate-400 bg-white/90 px-2 py-1 rounded">
-                    Page {index + 1}
-                  </div>
-                )}
-              </motion.div>
-            ))}
-          </div>
-        )}
-      </div>
-    </div>
-  );
-}

+import React, { useState, useEffect, useRef } from "react";
+import { motion } from "framer-motion";
+import { FileText, ZoomIn, ZoomOut, RotateCw } from "lucide-react";
+import { Button } from "@/components/ui/button";
+export default function DocumentPreview({ file, isProcessing, isFromHistory = false }) {
+  const [previewUrls, setPreviewUrls] = useState([]);
+  const [zoom, setZoom] = useState(100);
+  const [rotation, setRotation] = useState(0);
+  const objectUrlsRef = useRef([]);
+  useEffect(() => {
+    if (!file) {
+      // Cleanup previous URLs
+      objectUrlsRef.current.forEach((url) => {
+        if (url && url.startsWith("blob:")) {
+          URL.revokeObjectURL(url);
+        }
+      });
+      objectUrlsRef.current = [];
+      setPreviewUrls([]);
+      return;
+    }
+    const loadPreview = async () => {
+      const urls = [];
+      const newObjectUrls = [];
+      // Check if it's a PDF
+      if (file.type === "application/pdf" || file.name?.toLowerCase().endsWith(".pdf")) {
+        try {
+          // Use pdf.js to render PDF pages
+          const pdfjsLib = await import("pdfjs-dist");
+          // Configure worker - use jsdelivr CDN which is more reliable
+          // This will use the same version as the installed package
+          const version = pdfjsLib.version || "4.0.379";
+          pdfjsLib.GlobalWorkerOptions.workerSrc = `https://cdn.jsdelivr.net/npm/pdfjs-dist@${version}/build/pdf.worker.min.mjs`;
+          const arrayBuffer = await file.arrayBuffer();
+          const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
+          const numPages = pdf.numPages;
+          for (let pageNum = 1; pageNum <= numPages; pageNum++) {
+            const page = await pdf.getPage(pageNum);
+            const viewport = page.getViewport({ scale: 2.0 });
+            const canvas = document.createElement("canvas");
+            const context = canvas.getContext("2d");
+            canvas.height = viewport.height;
+            canvas.width = viewport.width;
+            await page.render({
+              canvasContext: context,
+              viewport: viewport,
+            }).promise;
+            urls.push(canvas.toDataURL("image/jpeg", 0.95));
+          }
+        } catch (error) {
+          console.error("Error loading PDF:", error);
+          // Fallback: show error message
+          urls.push(null);
+        }
+      } else {
+        // For images, create object URL
+        const url = URL.createObjectURL(file);
+        urls.push(url);
+        newObjectUrls.push(url);
+      }
+      // Cleanup old object URLs
+      objectUrlsRef.current.forEach((url) => {
+        if (url && url.startsWith("blob:")) {
+          URL.revokeObjectURL(url);
+        }
+      });
+      objectUrlsRef.current = newObjectUrls;
+      setPreviewUrls(urls);
+    };
+    loadPreview();
+    // Cleanup function - revoke object URLs when component unmounts or file changes
+    return () => {
+      objectUrlsRef.current.forEach((url) => {
+        if (url && url.startsWith("blob:")) {
+          URL.revokeObjectURL(url);
+        }
+      });
+      objectUrlsRef.current = [];
+    };
+  }, [file]);
+  return (
+    <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden">
+      {/* Header */}
+      <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100">
+        <div className="flex items-center gap-3">
+          <div className="h-8 w-8 rounded-lg bg-indigo-50 flex items-center justify-center">
+            <FileText className="h-4 w-4 text-indigo-600" />
+          </div>
+          <div>
+            <h3 className="font-semibold text-slate-800 text-sm">Document Preview</h3>
+            <p className="text-xs text-slate-400">{file?.name || "No file selected"}</p>
+          </div>
+        </div>
+        {file && (
+          <div className="flex items-center gap-1">
+            <Button
+              variant="ghost"
+              size="icon"
+              className="h-8 w-8 text-slate-400 hover:text-slate-600"
+              onClick={() => setZoom(Math.max(50, zoom - 25))}
+            >
+              <ZoomOut className="h-4 w-4" />
+            </Button>
+            <span className="text-xs text-slate-500 w-12 text-center">{zoom}%</span>
+            <Button
+              variant="ghost"
+              size="icon"
+              className="h-8 w-8 text-slate-400 hover:text-slate-600"
+              onClick={() => setZoom(Math.min(200, zoom + 25))}
+            >
+              <ZoomIn className="h-4 w-4" />
+            </Button>
+            <div className="w-px h-4 bg-slate-200 mx-2" />
+            <Button
+              variant="ghost"
+              size="icon"
+              className="h-8 w-8 text-slate-400 hover:text-slate-600"
+              onClick={() => setRotation((rotation + 90) % 360)}
+            >
+              <RotateCw className="h-4 w-4" />
+            </Button>
+          </div>
+        )}
+      </div>
+      {/* Preview Area */}
+      <div className="flex-1 p-6 bg-slate-50/50 overflow-auto">
+        {!file ? (
+          <div className="h-full flex items-center justify-center">
+            <div className="text-center">
+              <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
+                <FileText className="h-10 w-10 text-slate-300" />
+              </div>
+              <p className="text-slate-400 text-sm">Upload a document to preview</p>
+            </div>
+          </div>
+        ) : previewUrls.length === 0 ? (
+          <div className="h-full flex items-center justify-center">
+            <div className="text-center">
+              <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
+                <FileText className="h-10 w-10 text-slate-300" />
+              </div>
+              <p className="text-slate-400 text-sm">Loading preview...</p>
+            </div>
+          </div>
+        ) : (
+          <div className="space-y-4">
+            {previewUrls.map((url, index) => (
+              <motion.div
+                key={index}
+                initial={{ opacity: 0, y: 20 }}
+                animate={{ opacity: 1, y: 0 }}
+                transition={{ delay: index * 0.1 }}
+                className="relative bg-white rounded-xl shadow-sm border border-slate-200 overflow-hidden flex items-center justify-center"
+                style={{
+                  minHeight: "400px",
+                }}
+              >
+                {url ? (
+                  <img
+                    src={url}
+                    alt={`Page ${index + 1}`}
+                    className="w-full h-auto"
+                    style={{
+                      transform: `scale(${zoom / 100}) rotate(${rotation}deg)`,
+                      maxWidth: "100%",
+                      objectFit: "contain",
+                      transition: "transform 0.2s ease",
+                    }}
+                  />
+                ) : (
+                  <div className="p-8 text-center">
+                    <p className="text-slate-400 text-sm">
+                      {isFromHistory
+                        ? "Original document not available for historical extractions"
+                        : "Unable to load preview"}
+                    </p>
+                  </div>
+                )}
+                {/* Processing overlay */}
+                {isProcessing && (
+                  <motion.div
+                    initial={{ opacity: 0 }}
+                    animate={{ opacity: 1 }}
+                    className="absolute inset-0 bg-indigo-600/5 backdrop-blur-[1px] pointer-events-none"
+                  >
+                    <motion.div
+                      initial={{ top: 0 }}
+                      animate={{ top: "100%" }}
+                      transition={{
+                        duration: 2,
+                        repeat: Infinity,
+                        ease: "linear",
+                      }}
+                      className="absolute left-0 right-0 h-1 bg-gradient-to-r from-transparent via-indigo-500 to-transparent"
+                    />
+                  </motion.div>
+                )}
+                {/* Page number */}
+                {previewUrls.length > 1 && (
+                  <div className="absolute bottom-3 right-3 text-xs text-slate-400 bg-white/90 px-2 py-1 rounded">
+                    Page {index + 1}
+                  </div>
+                )}
+              </motion.div>
+            ))}
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}

frontend/src/components/ocr/ExtractionOutput.jsx CHANGED Viewed

@@ -1,639 +1,1201 @@
-import React, { useState, useEffect } from "react";
-import { motion, AnimatePresence } from "framer-motion";
-import {
-  Code2,
-  Copy,
-  Check,
-  Braces,
-  FileCode2,
-  FileText,
-  Sparkles,
-  ChevronDown,
-} from "lucide-react";
-import { Button } from "@/components/ui/button";
-import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
-import { cn } from "@/lib/utils";
-// Mock extracted data
-const mockData = {
-  document: {
-    type: "Invoice",
-    confidence: 0.98,
-  },
-  vendor: {
-    name: "Acme Corporation",
-    address: "123 Business Ave, Suite 400",
-    city: "San Francisco",
-    state: "CA",
-    zip: "94102",
-    phone: "+1 (555) 123-4567",
-  },
-  invoice: {
-    number: "INV-2024-0847",
-    date: "2024-01-15",
-    due_date: "2024-02-14",
-    po_number: "PO-9823",
-  },
-  items: [
-    { description: "Professional Services", quantity: 40, unit_price: 150.0, total: 6000.0 },
-    { description: "Software License", quantity: 5, unit_price: 299.99, total: 1499.95 },
-    { description: "Support Package", quantity: 1, unit_price: 500.0, total: 500.0 },
-  ],
-  totals: {
-    subtotal: 7999.95,
-    tax_rate: 0.0875,
-    tax_amount: 699.99,
-    total: 8699.94,
-  },
-};
-const mockXML = `<?xml version="1.0" encoding="UTF-8"?>
-<extraction>
-  <document type="Invoice" confidence="0.98"/>
-  <vendor>
-    <name>Acme Corporation</name>
-    <address>123 Business Ave, Suite 400</address>
-    <city>San Francisco</city>
-    <state>CA</state>
-    <zip>94102</zip>
-  </vendor>
-  <invoice>
-    <number>INV-2024-0847</number>
-    <date>2024-01-15</date>
-    <due_date>2024-02-14</due_date>
-  </invoice>
-  <items>
-    <item>
-      <description>Professional Services</description>
-      <quantity>40</quantity>
-      <total>6000.00</total>
-    </item>
-  </items>
-  <totals>
-    <subtotal>7999.95</subtotal>
-    <tax>699.99</tax>
-    <total>8699.94</total>
-  </totals>
-</extraction>`;
-const mockText = `INVOICE
-ACME CORPORATION
-123 Business Ave, Suite 400
-San Francisco, CA 94102
-Phone: +1 (555) 123-4567
-Invoice Number: INV-2024-0847
-Invoice Date: January 15, 2024
-Due Date: February 14, 2024
-PO Number: PO-9823
-BILL TO:
-Customer Name
-456 Client Street
-New York, NY 10001
-ITEMS:
-─────────────────────────────────────────────────────────
-Description                  Qty    Unit Price    Total
-─────────────────────────────────────────────────────────
-Professional Services         40      $150.00    $6,000.00
-Software License               5      $299.99    $1,499.95
-Support Package                1      $500.00      $500.00
-─────────────────────────────────────────────────────────
-                              Subtotal:    $7,999.95
-                              Tax (8.75%):   $699.99
-                              ─────────────────────────
-                              TOTAL:       $8,699.94
-Payment Terms: Net 30
-Thank you for your business!`;
-// Helper function to convert object to XML
-// Prepare fields for JSON/XML output - remove duplicates and restructure
-function prepareFieldsForOutput(fields, format = "json") {
-  if (!fields || typeof fields !== "object") {
-    return fields;
-  }
-  const output = { ...fields };
-  // Remove full_text from top-level if pages array exists (to avoid duplication)
-  if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
-    delete output.full_text;
-    // Clean up each page: remove full_text from page.fields (it duplicates page.text)
-    output.pages = output.pages.map(page => {
-      const cleanedPage = { ...page };
-      if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
-        const cleanedFields = { ...cleanedPage.fields };
-        // Remove full_text from page fields (duplicates page.text)
-        delete cleanedFields.full_text;
-        cleanedPage.fields = cleanedFields;
-      }
-      return cleanedPage;
-    });
-  }
-  // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
-  if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
-    // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
-    const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text"));
-    output.pages.forEach((page, idx) => {
-      const pageNum = page.page_number || idx + 1;
-      const pageFields = page.fields || {};
-      // Remove duplicate fields from page.fields:
-      // 1. Remove full_text (duplicates page.text)
-      // 2. Remove fields that match top-level fields (already shown at root)
-      const cleanedPageFields = {};
-      for (const [key, value] of Object.entries(pageFields)) {
-        // Skip full_text and fields that match top-level exactly
-        if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
-          cleanedPageFields[key] = value;
-        }
-      }
-      const pageObj = {
-        text: page.text || "",
-        confidence: page.confidence || 0,
-        doc_type: page.doc_type || "other"
-      };
-      // Only add fields if there are unique page-specific fields
-      if (Object.keys(cleanedPageFields).length > 0) {
-        pageObj.fields = cleanedPageFields;
-      }
-      output[`page_${pageNum}`] = pageObj;
-    });
-    // Remove pages array - we now have page_1, page_2, etc. as separate fields
-    delete output.pages;
-  }
-  return output;
-}
-function objectToXML(obj, rootName = "extraction") {
-  // Prepare fields - remove full_text if pages exist
-  const preparedObj = prepareFieldsForOutput(obj, "xml");
-  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
-  const convert = (obj, indent = "  ") => {
-    for (const [key, value] of Object.entries(obj)) {
-      if (value === null || value === undefined) continue;
-      // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
-      if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
-        continue;
-      }
-      if (Array.isArray(value)) {
-        value.forEach((item) => {
-          xml += `${indent}<${key}>\n`;
-          if (typeof item === "object") {
-            convert(item, indent + "  ");
-          } else {
-            xml += `${indent}  ${escapeXML(String(item))}\n`;
-          }
-          xml += `${indent}</${key}>\n`;
-        });
-      } else if (typeof value === "object") {
-        xml += `${indent}<${key}>\n`;
-        convert(value, indent + "  ");
-        xml += `${indent}</${key}>\n`;
-      } else {
-        xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
-      }
-    }
-  };
-  convert(preparedObj);
-  xml += `</${rootName}>`;
-  return xml;
-}
-function escapeXML(str) {
-  return str
-    .replace(/&/g, "&amp;")
-    .replace(/</g, "&lt;")
-    .replace(/>/g, "&gt;")
-    .replace(/"/g, "&quot;")
-    .replace(/'/g, "&apos;");
-}
-// Helper function to format fields as readable text
-function fieldsToText(fields) {
-  if (!fields || typeof fields !== "object") {
-    return "No data extracted.";
-  }
-  // If full_text exists, show it prominently first
-  if (fields.full_text) {
-    let text = "=== FULL EXTRACTED TEXT ===\n\n";
-    text += fields.full_text;
-    // Don't show pages array separately if full_text already contains page markers
-    // (full_text from backend already includes "=== PAGE 1 ===" etc.)
-    const hasPageMarkers = fields.full_text.includes("=== PAGE") || fields.full_text.includes("--- Page");
-    // Only show pages array if full_text doesn't already have page breakdown
-    if (!hasPageMarkers && fields.pages && Array.isArray(fields.pages)) {
-      text += "\n\n=== TEXT BY PAGE ===\n\n";
-      fields.pages.forEach((page, idx) => {
-        text += `--- Page ${page.page_number || idx + 1} ---\n`;
-        text += page.text || "";
-        text += "\n\n";
-      });
-    }
-    // Then show other structured fields
-    const otherFields = { ...fields };
-    delete otherFields.full_text;
-    delete otherFields.pages;
-    if (Object.keys(otherFields).length > 0) {
-      text += "\n\n=== STRUCTURED FIELDS ===\n\n";
-      const formatValue = (key, value, indent = "") => {
-        if (Array.isArray(value)) {
-          text += `${indent}${key}:\n`;
-          value.forEach((item, idx) => {
-            if (typeof item === "object") {
-              text += `${indent}  Item ${idx + 1}:\n`;
-              Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + "    "));
-            } else {
-              text += `${indent}  - ${item}\n`;
-            }
-          });
-        } else if (typeof value === "object" && value !== null) {
-          text += `${indent}${key}:\n`;
-          Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + "  "));
-        } else {
-          text += `${indent}${key}: ${value}\n`;
-        }
-      };
-      Object.entries(otherFields).forEach(([key, value]) => {
-        formatValue(key, value);
-        text += "\n";
-      });
-    }
-    return text.trim();
-  }
-  // Fallback: format all fields normally
-  let text = "";
-  const formatValue = (key, value, indent = "") => {
-    if (Array.isArray(value)) {
-      text += `${indent}${key}:\n`;
-      value.forEach((item, idx) => {
-        if (typeof item === "object") {
-          text += `${indent}  Item ${idx + 1}:\n`;
-          Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + "    "));
-        } else {
-          text += `${indent}  - ${item}\n`;
-        }
-      });
-    } else if (typeof value === "object" && value !== null) {
-      text += `${indent}${key}:\n`;
-      Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + "  "));
-    } else {
-      text += `${indent}${key}: ${value}\n`;
-    }
-  };
-  Object.entries(fields).forEach(([key, value]) => {
-    formatValue(key, value);
-    text += "\n";
-  });
-  return text.trim() || "No data extracted.";
-}
-export default function ExtractionOutput({ hasFile, isProcessing, isComplete, extractionResult }) {
-  const [activeTab, setActiveTab] = useState("json");
-  const [copied, setCopied] = useState(false);
-  // Get fields from extraction result, default to empty object
-  const fields = extractionResult?.fields || {};
-  const confidence = extractionResult?.confidence || 0;
-  const fieldsExtracted = extractionResult?.fieldsExtracted || 0;
-  const totalTime = extractionResult?.totalTime || 0;
-  // Initialize expanded sections based on available fields
-  const [expandedSections, setExpandedSections] = useState(() =>
-    Object.keys(fields).slice(0, 5) // Expand first 5 sections by default
-  );
-  const handleCopy = () => {
-    let content = "";
-    if (activeTab === "json") {
-      const preparedFields = prepareFieldsForOutput(fields, "json");
-      content = JSON.stringify(preparedFields, null, 2);
-    } else if (activeTab === "xml") {
-      content = objectToXML(fields);
-    } else {
-      content = fieldsToText(fields);
-    }
-    navigator.clipboard.writeText(content);
-    setCopied(true);
-    setTimeout(() => setCopied(false), 2000);
-  };
-  // Get prepared fields for display
-  const preparedFields = React.useMemo(() => {
-    return prepareFieldsForOutput(fields, "json");
-  }, [fields]);
-  // Update expanded sections when fields change
-  React.useEffect(() => {
-    if (extractionResult?.fields) {
-      setExpandedSections(Object.keys(extractionResult.fields).slice(0, 5));
-    }
-  }, [extractionResult]);
-  const toggleSection = (section) => {
-    setExpandedSections((prev) =>
-      prev.includes(section) ? prev.filter((s) => s !== section) : [...prev, section]
-    );
-  };
-  const renderValue = (value) => {
-    if (typeof value === "number") {
-      return <span className="text-amber-600">{value}</span>;
-    }
-    if (typeof value === "string") {
-      return <span className="text-emerald-600">"{value}"</span>;
-    }
-    return String(value);
-  };
-  const renderSection = (key, value, level = 0) => {
-    const isExpanded = expandedSections.includes(key);
-    const isObject = typeof value === "object" && value !== null;
-    const isArray = Array.isArray(value);
-    if (!isObject) {
-      return (
-        <div
-          key={key}
-          className="flex items-start gap-2 py-1"
-          style={{ paddingLeft: level * 16 }}
-        >
-          <span className="text-violet-500">"{key}"</span>
-          <span className="text-slate-400">:</span>
-          {renderValue(value)}
-        </div>
-      );
-    }
-    return (
-      <div key={key}>
-        <button
-          onClick={() => toggleSection(key)}
-          className="flex items-center gap-2 py-1 hover:bg-slate-50 w-full text-left rounded"
-          style={{ paddingLeft: level * 16 }}
-        >
-          <ChevronDown
-            className={cn(
-              "h-3 w-3 text-slate-400 transition-transform",
-              !isExpanded && "-rotate-90"
-            )}
-          />
-          <span className="text-violet-500">"{key}"</span>
-          <span className="text-slate-400">:</span>
-          <span className="text-slate-400">{isArray ? "[" : "{"}</span>
-          {!isExpanded && (
-            <span className="text-slate-300 text-xs">
-              {isArray ? `${value.length} items` : `${Object.keys(value).length} fields`}
-            </span>
-          )}
-        </button>
-        <AnimatePresence>
-          {isExpanded && (
-            <motion.div
-              initial={{ height: 0, opacity: 0 }}
-              animate={{ height: "auto", opacity: 1 }}
-              exit={{ height: 0, opacity: 0 }}
-              transition={{ duration: 0.2 }}
-              className="overflow-hidden"
-            >
-              {isArray ? (
-                value.map((item, idx) => (
-                  <div key={idx} className="border-l border-slate-100 ml-4">
-                    {Object.entries(item).map(([k, v]) => renderSection(k, v, level + 2))}
-                    {idx < value.length - 1 && <div className="h-2" />}
-                  </div>
-                ))
-              ) : (
-                Object.entries(value).map(([k, v]) => renderSection(k, v, level + 1))
-              )}
-              <div style={{ paddingLeft: level * 16 }} className="text-slate-400">
-                {isArray ? "]" : "}"}
-              </div>
-            </motion.div>
-          )}
-        </AnimatePresence>
-      </div>
-    );
-  };
-  return (
-    <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden">
-      {/* Header */}
-      <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100">
-        <div className="flex items-center gap-3">
-          <div className="h-8 w-8 rounded-lg bg-emerald-50 flex items-center justify-center">
-            <Code2 className="h-4 w-4 text-emerald-600" />
-          </div>
-          <div>
-            <h3 className="font-semibold text-slate-800 text-sm">Extracted Data</h3>
-            <p className="text-xs text-slate-400">
-              {isComplete
-                ? `${fieldsExtracted} field${fieldsExtracted !== 1 ? 's' : ''} extracted`
-                : "Waiting for extraction"}
-            </p>
-          </div>
-        </div>
-        {isComplete && (
-          <div className="flex items-center gap-2">
-            <Tabs value={activeTab} onValueChange={setActiveTab}>
-              <TabsList className="h-8 bg-slate-100 p-0.5">
-                <TabsTrigger value="text" className="h-7 text-xs gap-1.5">
-                  <FileText className="h-3 w-3" />
-                  Text
-                </TabsTrigger>
-                <TabsTrigger value="json" className="h-7 text-xs gap-1.5">
-                  <Braces className="h-3 w-3" />
-                  JSON
-                </TabsTrigger>
-                <TabsTrigger value="xml" className="h-7 text-xs gap-1.5">
-                  <FileCode2 className="h-3 w-3" />
-                  XML
-                </TabsTrigger>
-              </TabsList>
-            </Tabs>
-            <Button
-              variant="ghost"
-              size="sm"
-              onClick={handleCopy}
-              className="h-8 text-xs gap-1.5"
-            >
-              {copied ? (
-                <>
-                  <Check className="h-3 w-3 text-emerald-500" />
-                  Copied
-                </>
-              ) : (
-                <>
-                  <Copy className="h-3 w-3" />
-                  Copy
-                </>
-              )}
-            </Button>
-          </div>
-        )}
-      </div>
-      {/* Output Area */}
-      <div className="flex-1 overflow-auto">
-        {!hasFile ? (
-          <div className="h-full flex items-center justify-center p-6">
-            <div className="text-center">
-              <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
-                <Code2 className="h-10 w-10 text-slate-300" />
-              </div>
-              <p className="text-slate-400 text-sm">Extracted data will appear here</p>
-            </div>
-          </div>
-        ) : isProcessing ? (
-          <div className="h-full flex items-center justify-center p-6">
-            <div className="text-center">
-              <motion.div
-                animate={{ rotate: 360 }}
-                transition={{ duration: 2, repeat: Infinity, ease: "linear" }}
-                className="h-16 w-16 mx-auto rounded-2xl bg-gradient-to-br from-indigo-100 to-violet-100 flex items-center justify-center mb-4"
-              >
-                <Sparkles className="h-8 w-8 text-indigo-500" />
-              </motion.div>
-              <p className="text-slate-700 font-medium mb-1">Extracting data...</p>
-              <p className="text-slate-400 text-sm">Analyzing document structure</p>
-              <div className="mt-6 flex items-center justify-center gap-1">
-                {[0, 1, 2].map((i) => (
-                  <motion.div
-                    key={i}
-                    animate={{ scale: [1, 1.2, 1] }}
-                    transition={{
-                      duration: 0.6,
-                      repeat: Infinity,
-                      delay: i * 0.2,
-                    }}
-                    className="h-2 w-2 rounded-full bg-indigo-400"
-                  />
-                ))}
-              </div>
-            </div>
-          </div>
-        ) : isComplete && Object.keys(fields).length === 0 ? (
-          <div className="h-full flex items-center justify-center p-6">
-            <div className="text-center">
-              <div className="h-20 w-20 mx-auto rounded-2xl bg-amber-100 flex items-center justify-center mb-4">
-                <Code2 className="h-10 w-10 text-amber-600" />
-              </div>
-              <p className="text-slate-600 font-medium mb-1">No data extracted</p>
-              <p className="text-slate-400 text-sm">The document may not contain extractable fields</p>
-            </div>
-          </div>
-        ) : (
-          <div className="p-4 font-mono text-sm">
-            {activeTab === "text" ? (
-              <pre className="text-sm text-slate-700 whitespace-pre-wrap leading-relaxed">
-                {fieldsToText(fields)}
-              </pre>
-            ) : activeTab === "json" ? (
-              <div className="space-y-1">
-                <span className="text-slate-400">{"{"}</span>
-                {Object.keys(preparedFields).length > 0 ? (
-                  Object.entries(preparedFields).map(([key, value]) =>
-                    renderSection(key, value, 1)
-                  )
-                ) : (
-                  <div className="pl-4 text-slate-400 italic">No fields extracted</div>
-                )}
-                <span className="text-slate-400">{"}"}</span>
-              </div>
-            ) : (
-              <pre className="text-sm text-slate-600 whitespace-pre-wrap">
-                {objectToXML(fields).split("\n").map((line, i) => (
-                  <div key={i} className="hover:bg-slate-50 px-2 -mx-2 rounded">
-                    {line.includes("<") ? (
-                      <>
-                        {line.split(/(<\/?[\w\s=".-]+>)/g).map((part, j) => {
-                          if (part.startsWith("</")) {
-                            return (
-                              <span key={j} className="text-rose-500">
-                                {part}
-                              </span>
-                            );
-                          }
-                          if (part.startsWith("<")) {
-                            return (
-                              <span key={j} className="text-indigo-500">
-                                {part}
-                              </span>
-                            );
-                          }
-                          return (
-                            <span key={j} className="text-slate-700">
-                              {part}
-                            </span>
-                          );
-                        })}
-                      </>
-                    ) : (
-                      line
-                    )}
-                  </div>
-                ))}
-              </pre>
-            )}
-          </div>
-        )}
-      </div>
-      {/* Confidence Footer */}
-      {isComplete && extractionResult && (
-        <div className="px-5 py-3 border-t border-slate-100 bg-slate-50/50">
-          <div className="flex items-center justify-between text-xs">
-            <div className="flex items-center gap-4">
-              <div className="flex items-center gap-1.5">
-                <div className={cn(
-                  "h-2 w-2 rounded-full",
-                  confidence >= 90 ? "bg-emerald-500" : confidence >= 70 ? "bg-amber-500" : "bg-red-500"
-                )} />
-                <span className="text-slate-500">Confidence:</span>
-                <span className="font-semibold text-slate-700">
-                  {confidence > 0 ? `${confidence.toFixed(1)}%` : "N/A"}
-                </span>
-              </div>
-              <div className="flex items-center gap-1.5">
-                <span className="text-slate-500">Fields:</span>
-                <span className="font-semibold text-slate-700">{fieldsExtracted}</span>
-              </div>
-            </div>
-            <span className="text-slate-400">
-              Processed in {totalTime >= 1000 ? `${(totalTime / 1000).toFixed(1)}s` : `${totalTime}ms`}
-            </span>
-          </div>
-        </div>
-      )}
-    </div>
-  );
-}

+import React, { useState, useEffect, useRef } from "react";
+import { motion, AnimatePresence } from "framer-motion";
+import {
+  Code2,
+  Copy,
+  Check,
+  Braces,
+  FileCode2,
+  FileText,
+  Sparkles,
+  ChevronDown,
+  Upload,
+} from "lucide-react";
+import { Button } from "@/components/ui/button";
+import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
+import { cn } from "@/lib/utils";
+// Helper function to convert pipe-separated tables to HTML tables
+function convertPipeTablesToHTML(text) {
+  if (!text) return text;
+  const lines = text.split('\n');
+  const result = [];
+  let i = 0;
+  while (i < lines.length) {
+    const line = lines[i];
+    // Check if this line looks like a table row (has multiple pipes)
+    if (line.includes('|') && line.split('|').length >= 3) {
+      // Check if it's a separator line (only |, -, :, spaces)
+      const isSeparator = /^[\s|\-:]+$/.test(line.trim());
+      if (!isSeparator) {
+        // Start of a table - collect all table rows
+        const tableRows = [];
+        let j = i;
+        // Collect header row
+        const headerLine = lines[j];
+        const headerCells = headerLine.split('|').map(cell => cell.trim()).filter(cell => cell || cell === '');
+        // Remove empty cells at start/end
+        if (headerCells.length > 0 && !headerCells[0]) headerCells.shift();
+        if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop();
+        if (headerCells.length >= 2) {
+          tableRows.push(headerCells);
+          j++;
+          // Skip separator line if present
+          if (j < lines.length && /^[\s|\-:]+$/.test(lines[j].trim())) {
+            j++;
+          }
+          // Collect data rows
+          while (j < lines.length) {
+            const rowLine = lines[j];
+            if (!rowLine.trim()) break; // Empty line ends table
+            // Check if it's still a table row
+            if (rowLine.includes('|') && rowLine.split('|').length >= 2) {
+              const isRowSeparator = /^[\s|\-:]+$/.test(rowLine.trim());
+              if (!isRowSeparator) {
+                const rowCells = rowLine.split('|').map(cell => cell.trim());
+                // Remove empty cells at start/end
+                if (rowCells.length > 0 && !rowCells[0]) rowCells.shift();
+                if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop();
+                tableRows.push(rowCells);
+                j++;
+              } else {
+                j++;
+              }
+            } else {
+              break; // Not a table row anymore
+            }
+          }
+          // Convert to HTML table
+          if (tableRows.length > 0) {
+            let htmlTable = '<table class="border-collapse border border-gray-300 w-full my-4">\n<thead>\n<tr>';
+            // Header row
+            tableRows[0].forEach(cell => {
+              htmlTable += `<th class="border border-gray-300 px-4 py-2 bg-gray-100 font-semibold text-left">${escapeHtml(cell)}</th>`;
+            });
+            htmlTable += '</tr>\n</thead>\n<tbody>\n';
+            // Data rows
+            for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) {
+              htmlTable += '<tr>';
+              tableRows[rowIdx].forEach((cell, colIdx) => {
+                // Use header cell count to ensure alignment
+                const cellContent = cell || '';
+                htmlTable += `<td class="border border-gray-300 px-4 py-2">${escapeHtml(cellContent)}</td>`;
+              });
+              htmlTable += '</tr>\n';
+            }
+            htmlTable += '</tbody>\n</table>';
+            result.push(htmlTable);
+            i = j;
+            continue;
+          }
+        }
+      }
+    }
+    // Not a table row, add as-is
+    result.push(line);
+    i++;
+  }
+  return result.join('\n');
+}
+// Helper function to escape HTML
+function escapeHtml(text) {
+  if (!text) return '';
+  const div = document.createElement('div');
+  div.textContent = text;
+  return div.innerHTML;
+}
+// Helper function to convert markdown/HTML text to safe HTML
+function renderMarkdownToHTML(text) {
+  if (!text) return "";
+  let html = text;
+  // FIRST: Convert pipe-separated tables to HTML tables
+  html = convertPipeTablesToHTML(html);
+  // Convert LaTeX-style superscripts/subscripts FIRST (before protecting tables)
+  // This ensures they're converted everywhere, including inside tables
+  // Convert LaTeX-style superscripts: $^{text}$ or $^text$ to <sup>text</sup>
+  html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '<sup>$1</sup>');
+  html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '<sup>$1</sup>');
+  // Convert LaTeX-style subscripts: $_{text}$ or $_text$ to <sub>text</sub>
+  html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '<sub>$1</sub>');
+  html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '<sub>$1</sub>');
+  // Split by HTML tags to preserve existing HTML (like tables)
+  // Process markdown only in non-HTML sections
+  // First, protect existing HTML blocks (tables, etc.)
+  const htmlBlocks = [];
+  let htmlBlockIndex = 0;
+  // Extract and protect HTML table blocks
+  html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => {
+    const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`;
+    htmlBlocks[htmlBlockIndex] = match;
+    htmlBlockIndex++;
+    return placeholder;
+  });
+  // Convert markdown headers (only if not inside HTML)
+  html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>');
+  html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>');
+  html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>');
+  // Convert markdown bold/italic (but not inside HTML tags)
+  html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
+  html = html.replace(/\*(.*?)\*/g, '<em>$1</em>');
+  // Convert markdown links
+  html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" target="_blank" rel="noopener noreferrer">$1</a>');
+  // Convert line breaks to paragraphs (but preserve structure around HTML blocks)
+  const parts = html.split(/(__HTML_BLOCK_\d+__)/);
+  const processedParts = parts.map((part, index) => {
+    if (part.match(/^__HTML_BLOCK_\d+__$/)) {
+      // Restore HTML block
+      const blockIndex = parseInt(part.match(/\d+/)[0]);
+      return htmlBlocks[blockIndex];
+    } else {
+      // Process markdown in this part
+      let processed = part;
+      // Convert double line breaks to paragraph breaks
+      processed = processed.replace(/\n\n+/g, '</p><p>');
+      // Convert single line breaks to <br> (but not if already in a tag)
+      processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2');
+      // Wrap in paragraph if there's content
+      if (processed.trim() && !processed.trim().startsWith('<')) {
+        processed = '<p>' + processed + '</p>';
+      }
+      return processed;
+    }
+  });
+  html = processedParts.join('');
+  // Process LaTeX notation in restored HTML blocks (tables) as well
+  // This handles any LaTeX that might be in table cells
+  html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*\^\s*\{([^}]+)\}\s*\$([^<]*)(<\/td>|<\/th>)/gi,
+    (match, openTag, before, supText, after, closeTag) => {
+      return openTag + before + '<sup>' + supText + '</sup>' + after + closeTag;
+    });
+  html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*\^\s*([^\s$<>]+)\s*\$([^<]*)(<\/td>|<\/th>)/gi,
+    (match, openTag, before, supText, after, closeTag) => {
+      return openTag + before + '<sup>' + supText + '</sup>' + after + closeTag;
+    });
+  html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*_\s*\{([^}]+)\}\s*\$([^<]*)(<\/td>|<\/th>)/gi,
+    (match, openTag, before, subText, after, closeTag) => {
+      return openTag + before + '<sub>' + subText + '</sub>' + after + closeTag;
+    });
+  html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*_\s*([^\s$<>]+)\s*\$([^<]*)(<\/td>|<\/th>)/gi,
+    (match, openTag, before, subText, after, closeTag) => {
+      return openTag + before + '<sub>' + subText + '</sub>' + after + closeTag;
+    });
+  // Clean up empty paragraphs and fix paragraph structure
+  html = html.replace(/<p><\/p>/g, '');
+  html = html.replace(/<p>\s*<br>\s*<\/p>/g, '');
+  html = html.replace(/<p>\s*<\/p>/g, '');
+  // Ensure proper spacing around HTML blocks
+  html = html.replace(/(<\/table>)\s*(<h[1-3])/g, '$1</p><p>$2');
+  html = html.replace(/(<\/h[1-3]>)\s*(<table)/g, '$1<p>$2');
+  html = html.replace(/(<\/table>)\s*(<p>)/g, '$1$2');
+  return html;
+}
+// Mock extracted data
+const mockData = {
+  document: {
+    type: "Invoice",
+    confidence: 0.98,
+  },
+  vendor: {
+    name: "Acme Corporation",
+    address: "123 Business Ave, Suite 400",
+    city: "San Francisco",
+    state: "CA",
+    zip: "94102",
+    phone: "+1 (555) 123-4567",
+  },
+  invoice: {
+    number: "INV-2024-0847",
+    date: "2024-01-15",
+    due_date: "2024-02-14",
+    po_number: "PO-9823",
+  },
+  items: [
+    { description: "Professional Services", quantity: 40, unit_price: 150.0, total: 6000.0 },
+    { description: "Software License", quantity: 5, unit_price: 299.99, total: 1499.95 },
+    { description: "Support Package", quantity: 1, unit_price: 500.0, total: 500.0 },
+  ],
+  totals: {
+    subtotal: 7999.95,
+    tax_rate: 0.0875,
+    tax_amount: 699.99,
+    total: 8699.94,
+  },
+};
+const mockXML = `<?xml version="1.0" encoding="UTF-8"?>
+<extraction>
+  <document type="Invoice" confidence="0.98"/>
+  <vendor>
+    <name>Acme Corporation</name>
+    <address>123 Business Ave, Suite 400</address>
+    <city>San Francisco</city>
+    <state>CA</state>
+    <zip>94102</zip>
+  </vendor>
+  <invoice>
+    <number>INV-2024-0847</number>
+    <date>2024-01-15</date>
+    <due_date>2024-02-14</due_date>
+  </invoice>
+  <items>
+    <item>
+      <description>Professional Services</description>
+      <quantity>40</quantity>
+      <total>6000.00</total>
+    </item>
+  </items>
+  <totals>
+    <subtotal>7999.95</subtotal>
+    <tax>699.99</tax>
+    <total>8699.94</total>
+  </totals>
+</extraction>`;
+const mockText = `INVOICE
+ACME CORPORATION
+123 Business Ave, Suite 400
+San Francisco, CA 94102
+Phone: +1 (555) 123-4567
+Invoice Number: INV-2024-0847
+Invoice Date: January 15, 2024
+Due Date: February 14, 2024
+PO Number: PO-9823
+BILL TO:
+Customer Name
+456 Client Street
+New York, NY 10001
+ITEMS:
+─────────────────────────────────────────────────────────
+Description                  Qty    Unit Price    Total
+─────────────────────────────────────────────────────────
+Professional Services         40      $150.00    $6,000.00
+Software License               5      $299.99    $1,499.95
+Support Package                1      $500.00      $500.00
+─────────────────────────────────────────────────────────
+                              Subtotal:    $7,999.95
+                              Tax (8.75%):   $699.99
+                              ─────────────────────────
+                              TOTAL:       $8,699.94
+Payment Terms: Net 30
+Thank you for your business!`;
+// Helper function to convert object to XML
+// Prepare fields for JSON/XML output - remove duplicates and restructure
+function prepareFieldsForOutput(fields, format = "json") {
+  if (!fields || typeof fields !== "object") {
+    return fields;
+  }
+  const output = { ...fields };
+  // Extract Fields from root level if it exists
+  const rootFields = output.Fields;
+  // Remove Fields from output temporarily (will be added back at top)
+  delete output.Fields;
+  // Remove full_text from top-level if pages array exists (to avoid duplication)
+  if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
+    delete output.full_text;
+    // Clean up each page: remove full_text from page.fields (it duplicates page.text)
+    output.pages = output.pages.map(page => {
+      const cleanedPage = { ...page };
+      if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
+        const cleanedFields = { ...cleanedPage.fields };
+        // Remove full_text from page fields (duplicates page.text)
+        delete cleanedFields.full_text;
+        cleanedPage.fields = cleanedFields;
+      }
+      return cleanedPage;
+    });
+  }
+  // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
+  if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
+    // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
+    const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields"));
+    output.pages.forEach((page, idx) => {
+      const pageNum = page.page_number || idx + 1;
+      const pageFields = page.fields || {};
+      // Remove duplicate fields from page.fields:
+      // 1. Remove full_text (duplicates page.text)
+      // 2. Remove fields that match top-level fields (already shown at root)
+      const cleanedPageFields = {};
+      for (const [key, value] of Object.entries(pageFields)) {
+        // Skip full_text and fields that match top-level exactly
+        if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
+          cleanedPageFields[key] = value;
+        }
+      }
+      const pageObj = {
+        text: page.text || "",
+        confidence: page.confidence || 0,
+        doc_type: page.doc_type || "other"
+      };
+      // Add table and footer_notes if they exist
+      if (page.table && Array.isArray(page.table) && page.table.length > 0) {
+        pageObj.table = page.table;
+      }
+      if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) {
+        pageObj.footer_notes = page.footer_notes;
+      }
+      // Only add fields if there are unique page-specific fields
+      if (Object.keys(cleanedPageFields).length > 0) {
+        pageObj.fields = cleanedPageFields;
+      }
+      output[`page_${pageNum}`] = pageObj;
+    });
+    // Remove pages array - we now have page_1, page_2, etc. as separate fields
+    delete output.pages;
+  }
+  // Handle page_X structure (from backend) - remove Fields from page objects if they exist
+  if (output && typeof output === "object") {
+    const pageKeys = Object.keys(output).filter(k => k.startsWith("page_"));
+    for (const pageKey of pageKeys) {
+      const pageData = output[pageKey];
+      if (pageData && typeof pageData === "object") {
+        // Remove Fields from page objects (it's now at root level)
+        delete pageData.Fields;
+        delete pageData.metadata;
+      }
+    }
+  }
+  // Rebuild output with Fields at the top (only if it exists and is not empty)
+  const finalOutput = {};
+  if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) {
+    finalOutput.Fields = rootFields;
+  }
+  // Add all other keys
+  Object.keys(output).forEach(key => {
+    finalOutput[key] = output[key];
+  });
+  return finalOutput;
+}
+function objectToXML(obj, rootName = "extraction") {
+  // Prepare fields - remove full_text if pages exist
+  const preparedObj = prepareFieldsForOutput(obj, "xml");
+  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
+  const convert = (obj, indent = "  ") => {
+    for (const [key, value] of Object.entries(obj)) {
+      if (value === null || value === undefined) continue;
+      // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
+      if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
+        continue;
+      }
+      if (Array.isArray(value)) {
+        value.forEach((item) => {
+          xml += `${indent}<${key}>\n`;
+          if (typeof item === "object") {
+            convert(item, indent + "  ");
+          } else {
+            xml += `${indent}  ${escapeXML(String(item))}\n`;
+          }
+          xml += `${indent}</${key}>\n`;
+        });
+      } else if (typeof value === "object") {
+        xml += `${indent}<${key}>\n`;
+        convert(value, indent + "  ");
+        xml += `${indent}</${key}>\n`;
+      } else {
+        xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
+      }
+    }
+  };
+  convert(preparedObj);
+  xml += `</${rootName}>`;
+  return xml;
+}
+function escapeXML(str) {
+  return str
+    .replace(/&/g, "&amp;")
+    .replace(/</g, "&lt;")
+    .replace(/>/g, "&gt;")
+    .replace(/"/g, "&quot;")
+    .replace(/'/g, "&apos;");
+}
+// Helper function to extract text from page structure
+function extractTextFromFields(fields) {
+  if (!fields || typeof fields !== "object") {
+    return "";
+  }
+  // Check for page_X structure first (preferred format)
+  const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_"));
+  if (pageKeys.length > 0) {
+    // Get text from first page (or combine all pages)
+    const pageTexts = pageKeys.map(key => {
+      const page = fields[key];
+      if (page && page.text) {
+        return page.text;
+      }
+      return "";
+    }).filter(text => text);
+    if (pageTexts.length > 0) {
+      return pageTexts.join("\n\n");
+    }
+  }
+  // Fallback to full_text
+  if (fields.full_text) {
+    return fields.full_text;
+  }
+  return "";
+}
+// Helper function to format fields as readable text
+function fieldsToText(fields) {
+  if (!fields || typeof fields !== "object") {
+    return "No data extracted.";
+  }
+  // Extract text from page structure or full_text
+  const extractedText = extractTextFromFields(fields);
+  if (extractedText) {
+    return extractedText;
+    // Don't show pages array separately if full_text already contains page markers
+    // (full_text from backend already includes "=== PAGE 1 ===" etc.)
+    const hasPageMarkers = fields.full_text.includes("=== PAGE") || fields.full_text.includes("--- Page");
+    // Only show pages array if full_text doesn't already have page breakdown
+    if (!hasPageMarkers && fields.pages && Array.isArray(fields.pages)) {
+      text += "\n\n=== TEXT BY PAGE ===\n\n";
+      fields.pages.forEach((page, idx) => {
+        text += `--- Page ${page.page_number || idx + 1} ---\n`;
+        text += page.text || "";
+        text += "\n\n";
+      });
+    }
+    // Then show other structured fields
+    const otherFields = { ...fields };
+    delete otherFields.full_text;
+    delete otherFields.pages;
+    if (Object.keys(otherFields).length > 0) {
+      text += "\n\n=== STRUCTURED FIELDS ===\n\n";
+      const formatValue = (key, value, indent = "") => {
+        if (Array.isArray(value)) {
+          text += `${indent}${key}:\n`;
+          value.forEach((item, idx) => {
+            if (typeof item === "object") {
+              text += `${indent}  Item ${idx + 1}:\n`;
+              Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + "    "));
+            } else {
+              text += `${indent}  - ${item}\n`;
+            }
+          });
+        } else if (typeof value === "object" && value !== null) {
+          text += `${indent}${key}:\n`;
+          Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + "  "));
+        } else {
+          text += `${indent}${key}: ${value}\n`;
+        }
+      };
+      Object.entries(otherFields).forEach(([key, value]) => {
+        formatValue(key, value);
+        text += "\n";
+      });
+    }
+    return text.trim();
+  }
+  // Fallback: format all fields normally
+  let text = "";
+  const formatValue = (key, value, indent = "") => {
+    if (Array.isArray(value)) {
+      text += `${indent}${key}:\n`;
+      value.forEach((item, idx) => {
+        if (typeof item === "object") {
+          text += `${indent}  Item ${idx + 1}:\n`;
+          Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + "    "));
+        } else {
+          text += `${indent}  - ${item}\n`;
+        }
+      });
+    } else if (typeof value === "object" && value !== null) {
+      text += `${indent}${key}:\n`;
+      Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + "  "));
+    } else {
+      text += `${indent}${key}: ${value}\n`;
+    }
+  };
+  Object.entries(fields).forEach(([key, value]) => {
+    formatValue(key, value);
+    text += "\n";
+  });
+  return text.trim() || "No data extracted.";
+}
+export default function ExtractionOutput({ hasFile, isProcessing, isComplete, extractionResult, onNewUpload }) {
+  const [activeTab, setActiveTab] = useState("json");
+  const [copied, setCopied] = useState(false);
+  const [statusMessage, setStatusMessage] = useState("Preparing document...");
+  // Get fields from extraction result, default to empty object
+  const fields = extractionResult?.fields || {};
+  const confidence = extractionResult?.confidence || 0;
+  const fieldsExtracted = extractionResult?.fieldsExtracted || 0;
+  const totalTime = extractionResult?.totalTime || 0;
+  // Dynamic status messages that rotate during processing
+  const statusMessages = [
+    "Preparing document...",
+    "Converting pages to images...",
+    "Visual Reasoning...",
+    "Reading text from document...",
+    "Identifying document structure...",
+    "Extracting tables and data...",
+    "Analyzing content...",
+    "Processing pages...",
+    "Organizing extracted information...",
+    "Finalizing results...",
+  ];
+  // Rotate status messages during processing
+  const messageIndexRef = useRef(0);
+  useEffect(() => {
+    if (!isProcessing) {
+      setStatusMessage("Analyzing document structure");
+      messageIndexRef.current = 0;
+      return;
+    }
+    setStatusMessage(statusMessages[0]);
+    messageIndexRef.current = 0;
+    const interval = setInterval(() => {
+      messageIndexRef.current = (messageIndexRef.current + 1) % statusMessages.length;
+      setStatusMessage(statusMessages[messageIndexRef.current]);
+    }, 2500); // Change message every 2.5 seconds
+    return () => clearInterval(interval);
+  }, [isProcessing]);
+  // Initialize expanded sections based on available fields
+  const [expandedSections, setExpandedSections] = useState(() =>
+    Object.keys(fields).slice(0, 5) // Expand first 5 sections by default
+  );
+  // Helper function to convert HTML to formatted plain text with layout preserved
+  const htmlToFormattedText = (html) => {
+    if (!html) return "";
+    // Create a temporary div to parse HTML
+    const tempDiv = document.createElement("div");
+    tempDiv.innerHTML = html;
+    let text = "";
+    // Process each element
+    const processNode = (node) => {
+      if (node.nodeType === Node.TEXT_NODE) {
+        return node.textContent;
+      }
+      if (node.nodeType !== Node.ELEMENT_NODE) {
+        return "";
+      }
+      const tagName = node.tagName?.toLowerCase();
+      const children = Array.from(node.childNodes);
+      switch (tagName) {
+        case "h1":
+          return "\n\n" + processChildren(children).trim() + "\n\n";
+        case "h2":
+          return "\n\n" + processChildren(children).trim() + "\n\n";
+        case "h3":
+          return "\n" + processChildren(children).trim() + "\n";
+        case "p":
+          return processChildren(children) + "\n\n";
+        case "br":
+          return "\n";
+        case "strong":
+        case "b":
+          return processChildren(children);
+        case "em":
+        case "i":
+          return processChildren(children);
+        case "sup":
+          return processChildren(children);
+        case "sub":
+          return processChildren(children);
+        case "table":
+          return "\n" + processTable(node) + "\n\n";
+        case "ul":
+        case "ol":
+          return "\n" + processList(node) + "\n\n";
+        case "li":
+          return "  • " + processChildren(children).trim() + "\n";
+        default:
+          return processChildren(children);
+      }
+    };
+    const processChildren = (children) => {
+      return children.map(processNode).join("");
+    };
+    const processTable = (table) => {
+      let tableText = "";
+      const rows = table.querySelectorAll("tr");
+      if (rows.length === 0) return "";
+      // First pass: calculate column widths
+      const allRows = Array.from(rows);
+      const columnCount = Math.max(...allRows.map(row => row.querySelectorAll("td, th").length));
+      const columnWidths = new Array(columnCount).fill(0);
+      allRows.forEach(row => {
+        const cells = row.querySelectorAll("td, th");
+        cells.forEach((cell, colIndex) => {
+          const cellText = processChildren(Array.from(cell.childNodes)).trim().replace(/\s+/g, " ");
+          columnWidths[colIndex] = Math.max(columnWidths[colIndex] || 0, cellText.length, 10);
+        });
+      });
+      // Second pass: format rows
+      allRows.forEach((row, rowIndex) => {
+        const cells = row.querySelectorAll("td, th");
+        const cellTexts = Array.from(cells).map(cell => {
+          let cellContent = processChildren(Array.from(cell.childNodes)).trim();
+          cellContent = cellContent.replace(/\s+/g, " ");
+          return cellContent;
+        });
+        // Pad cells to column widths
+        const paddedCells = cellTexts.map((text, i) => {
+          const width = columnWidths[i] || 10;
+          return text.padEnd(width);
+        });
+        tableText += paddedCells.join(" | ") + "\n";
+        // Add separator after header row
+        if (rowIndex === 0 && row.querySelector("th")) {
+          tableText += columnWidths.map(w => "-".repeat(w)).join("-|-") + "\n";
+        }
+      });
+      return tableText;
+    };
+    const processList = (list) => {
+      const items = list.querySelectorAll("li");
+      return Array.from(items).map(item => {
+        return "  • " + processChildren(Array.from(item.childNodes)).trim();
+      }).join("\n");
+    };
+    text = processChildren(Array.from(tempDiv.childNodes));
+    // Clean up extra newlines
+    text = text.replace(/\n{3,}/g, "\n\n");
+    text = text.trim();
+    return text;
+  };
+  const handleCopy = () => {
+    let content = "";
+    if (activeTab === "json") {
+      const preparedFields = prepareFieldsForOutput(fields, "json");
+      content = JSON.stringify(preparedFields, null, 2);
+    } else if (activeTab === "xml") {
+      content = objectToXML(fields);
+    } else {
+      // For text tab, get the formatted HTML and convert to plain text with layout
+      const textContent = extractTextFromFields(fields);
+      const htmlContent = renderMarkdownToHTML(textContent);
+      content = htmlToFormattedText(htmlContent);
+    }
+    navigator.clipboard.writeText(content);
+    setCopied(true);
+    setTimeout(() => setCopied(false), 2000);
+  };
+  // Get prepared fields for display
+  const preparedFields = React.useMemo(() => {
+    return prepareFieldsForOutput(fields, "json");
+  }, [fields]);
+  // Update expanded sections when fields change
+  React.useEffect(() => {
+    if (extractionResult?.fields) {
+      setExpandedSections(Object.keys(extractionResult.fields).slice(0, 5));
+    }
+  }, [extractionResult]);
+  const toggleSection = (section) => {
+    setExpandedSections((prev) =>
+      prev.includes(section) ? prev.filter((s) => s !== section) : [...prev, section]
+    );
+  };
+  const renderValue = (value) => {
+    if (typeof value === "number") {
+      return <span className="text-amber-600">{value}</span>;
+    }
+    if (typeof value === "string") {
+      return <span className="text-emerald-600">"{value}"</span>;
+    }
+    return String(value);
+  };
+  const renderSection = (key, value, level = 0) => {
+    const isExpanded = expandedSections.includes(key);
+    const isObject = typeof value === "object" && value !== null;
+    const isArray = Array.isArray(value);
+    if (!isObject) {
+      return (
+        <div
+          key={key}
+          className="flex items-start gap-2 py-1"
+          style={{ paddingLeft: level * 16 }}
+        >
+          <span className="text-violet-500">"{key}"</span>
+          <span className="text-slate-400">:</span>
+          {renderValue(value)}
+        </div>
+      );
+    }
+    return (
+      <div key={key}>
+        <button
+          onClick={() => toggleSection(key)}
+          className="flex items-center gap-2 py-1 hover:bg-slate-50 w-full text-left rounded"
+          style={{ paddingLeft: level * 16 }}
+        >
+          <ChevronDown
+            className={cn(
+              "h-3 w-3 text-slate-400 transition-transform",
+              !isExpanded && "-rotate-90"
+            )}
+          />
+          <span className="text-violet-500">"{key}"</span>
+          <span className="text-slate-400">:</span>
+          <span className="text-slate-400">{isArray ? "[" : "{"}</span>
+          {!isExpanded && (
+            <span className="text-slate-300 text-xs">
+              {isArray ? `${value.length} items` : `${Object.keys(value).length} fields`}
+            </span>
+          )}
+        </button>
+        <AnimatePresence>
+          {isExpanded && (
+            <motion.div
+              initial={{ height: 0, opacity: 0 }}
+              animate={{ height: "auto", opacity: 1 }}
+              exit={{ height: 0, opacity: 0 }}
+              transition={{ duration: 0.2 }}
+              className="overflow-hidden"
+            >
+              {isArray ? (
+                value.map((item, idx) => (
+                  <div key={idx} className="border-l border-slate-100 ml-4">
+                    {Object.entries(item).map(([k, v]) => renderSection(k, v, level + 2))}
+                    {idx < value.length - 1 && <div className="h-2" />}
+                  </div>
+                ))
+              ) : (
+                Object.entries(value).map(([k, v]) => renderSection(k, v, level + 1))
+              )}
+              <div style={{ paddingLeft: level * 16 }} className="text-slate-400">
+                {isArray ? "]" : "}"}
+              </div>
+            </motion.div>
+          )}
+        </AnimatePresence>
+      </div>
+    );
+  };
+  return (
+    <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden">
+      {/* Header */}
+      <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100">
+        <div className="flex items-center gap-3">
+          <div className="h-8 w-8 rounded-lg bg-emerald-50 flex items-center justify-center">
+            <Code2 className="h-4 w-4 text-emerald-600" />
+          </div>
+          <div>
+            <h3 className="font-semibold text-slate-800 text-sm">Extracted Data</h3>
+            <p className="text-xs text-slate-400">
+              {isComplete
+                ? `${fieldsExtracted} field${fieldsExtracted !== 1 ? 's' : ''} extracted`
+                : "Waiting for extraction"}
+            </p>
+          </div>
+          {isComplete && onNewUpload && (
+            <Button
+              variant="ghost"
+              size="sm"
+              onClick={onNewUpload}
+              className="h-8 ml-auto text-xs gap-1.5 text-indigo-600 hover:text-indigo-700 hover:bg-indigo-50"
+              title="Upload new document"
+            >
+              <Upload className="h-3.5 w-3.5" />
+              New
+            </Button>
+          )}
+        </div>
+        {isComplete && (
+          <div className="flex items-center gap-2">
+            <Tabs value={activeTab} onValueChange={setActiveTab}>
+              <TabsList className="h-8 bg-slate-100 p-0.5">
+                <TabsTrigger value="text" className="h-7 text-xs gap-1.5">
+                  <FileText className="h-3 w-3" />
+                  Text
+                </TabsTrigger>
+                <TabsTrigger value="json" className="h-7 text-xs gap-1.5">
+                  <Braces className="h-3 w-3" />
+                  JSON
+                </TabsTrigger>
+                <TabsTrigger value="xml" className="h-7 text-xs gap-1.5">
+                  <FileCode2 className="h-3 w-3" />
+                  XML
+                </TabsTrigger>
+              </TabsList>
+            </Tabs>
+            <Button
+              variant="ghost"
+              size="sm"
+              onClick={handleCopy}
+              className="h-8 text-xs gap-1.5"
+            >
+              {copied ? (
+                <>
+                  <Check className="h-3 w-3 text-emerald-500" />
+                  Copied
+                </>
+              ) : (
+                <>
+                  <Copy className="h-3 w-3" />
+                  Copy
+                </>
+              )}
+            </Button>
+          </div>
+        )}
+      </div>
+      {/* Output Area */}
+      <div className="flex-1 overflow-auto">
+        {!hasFile ? (
+          <div className="h-full flex items-center justify-center p-6">
+            <div className="text-center">
+              <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
+                <Code2 className="h-10 w-10 text-slate-300" />
+              </div>
+              <p className="text-slate-400 text-sm">Extracted data will appear here</p>
+            </div>
+          </div>
+        ) : isProcessing ? (
+          <div className="h-full flex items-center justify-center p-6">
+            <div className="text-center">
+              <motion.div
+                animate={{ rotate: 360 }}
+                transition={{ duration: 2, repeat: Infinity, ease: "linear" }}
+                className="h-16 w-16 mx-auto rounded-2xl bg-gradient-to-br from-indigo-100 to-violet-100 flex items-center justify-center mb-4"
+              >
+                <Sparkles className="h-8 w-8 text-indigo-500" />
+              </motion.div>
+              <p className="text-slate-700 font-medium mb-1">Extracting data...</p>
+              <p className="text-slate-400 text-sm">{statusMessage}</p>
+              <div className="mt-6 flex items-center justify-center gap-1">
+                {[0, 1, 2].map((i) => (
+                  <motion.div
+                    key={i}
+                    animate={{ scale: [1, 1.2, 1] }}
+                    transition={{
+                      duration: 0.6,
+                      repeat: Infinity,
+                      delay: i * 0.2,
+                    }}
+                    className="h-2 w-2 rounded-full bg-indigo-400"
+                  />
+                ))}
+              </div>
+            </div>
+          </div>
+        ) : isComplete && Object.keys(fields).length === 0 ? (
+          <div className="h-full flex items-center justify-center p-6">
+            <div className="text-center">
+              <div className="h-20 w-20 mx-auto rounded-2xl bg-amber-100 flex items-center justify-center mb-4">
+                <Code2 className="h-10 w-10 text-amber-600" />
+              </div>
+              <p className="text-slate-600 font-medium mb-1">No data extracted</p>
+              <p className="text-slate-400 text-sm">The document may not contain extractable fields</p>
+            </div>
+          </div>
+        ) : (
+          <div className="p-4 font-mono text-sm">
+            {activeTab === "text" ? (
+              <div
+                className="text-sm text-slate-700 leading-relaxed"
+                style={{
+                  fontFamily: 'system-ui, -apple-system, sans-serif'
+                }}
+              >
+                <div
+                  className="markdown-content"
+                  dangerouslySetInnerHTML={{ __html: renderMarkdownToHTML(fieldsToText(fields)) }}
+                  style={{
+                    lineHeight: '1.6'
+                  }}
+                />
+                <style>{`
+                  .markdown-content h1 {
+                    font-size: 1.5rem;
+                    font-weight: 700;
+                    color: #0f172a;
+                    margin-top: 1.5rem;
+                    margin-bottom: 1rem;
+                    line-height: 1.3;
+                  }
+                  .markdown-content h2 {
+                    font-size: 1.25rem;
+                    font-weight: 600;
+                    color: #0f172a;
+                    margin-top: 1.25rem;
+                    margin-bottom: 0.75rem;
+                    line-height: 1.3;
+                  }
+                  .markdown-content h3 {
+                    font-size: 1.125rem;
+                    font-weight: 600;
+                    color: #1e293b;
+                    margin-top: 1rem;
+                    margin-bottom: 0.5rem;
+                    line-height: 1.3;
+                  }
+                  .markdown-content p {
+                    margin-top: 0.75rem;
+                    margin-bottom: 0.75rem;
+                    color: #334155;
+                  }
+                  .markdown-content table {
+                    width: 100%;
+                    border-collapse: collapse;
+                    margin: 1.5rem 0;
+                    font-size: 0.875rem;
+                    box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1);
+                  }
+                  .markdown-content table caption {
+                    font-weight: 600;
+                    margin-bottom: 0.5rem;
+                    text-align: left;
+                  }
+                  .markdown-content table th {
+                    background-color: #f8fafc;
+                    border: 1px solid #cbd5e1;
+                    padding: 0.75rem;
+                    text-align: left;
+                    font-weight: 600;
+                    color: #0f172a;
+                  }
+                  .markdown-content table td {
+                    border: 1px solid #cbd5e1;
+                    padding: 0.75rem;
+                    color: #334155;
+                  }
+                  .markdown-content table tr:nth-child(even) {
+                    background-color: #f8fafc;
+                  }
+                  .markdown-content table tr:hover {
+                    background-color: #f1f5f9;
+                  }
+                  .markdown-content strong {
+                    font-weight: 600;
+                    color: #0f172a;
+                  }
+                  .markdown-content em {
+                    font-style: italic;
+                  }
+                  .markdown-content a {
+                    color: #4f46e5;
+                    text-decoration: underline;
+                  }
+                  .markdown-content a:hover {
+                    color: #4338ca;
+                  }
+                  .markdown-content sup {
+                    font-size: 0.75em;
+                    vertical-align: super;
+                    line-height: 0;
+                    position: relative;
+                    top: -0.5em;
+                  }
+                  .markdown-content sub {
+                    font-size: 0.75em;
+                    vertical-align: sub;
+                    line-height: 0;
+                    position: relative;
+                    bottom: -0.25em;
+                  }
+                  .markdown-content ul, .markdown-content ol {
+                    margin: 0.75rem 0;
+                    padding-left: 1.5rem;
+                  }
+                  .markdown-content li {
+                    margin: 0.25rem 0;
+                  }
+                `}</style>
+              </div>
+            ) : activeTab === "json" ? (
+              <div className="space-y-1">
+                <span className="text-slate-400">{"{"}</span>
+                {Object.keys(preparedFields).length > 0 ? (
+                  Object.entries(preparedFields).map(([key, value]) =>
+                    renderSection(key, value, 1)
+                  )
+                ) : (
+                  <div className="pl-4 text-slate-400 italic">No fields extracted</div>
+                )}
+                <span className="text-slate-400">{"}"}</span>
+              </div>
+            ) : (
+              <pre className="text-sm text-slate-600 whitespace-pre-wrap">
+                {objectToXML(fields).split("\n").map((line, i) => (
+                  <div key={i} className="hover:bg-slate-50 px-2 -mx-2 rounded">
+                    {line.includes("<") ? (
+                      <>
+                        {line.split(/(<\/?[\w\s=".-]+>)/g).map((part, j) => {
+                          if (part.startsWith("</")) {
+                            return (
+                              <span key={j} className="text-rose-500">
+                                {part}
+                              </span>
+                            );
+                          }
+                          if (part.startsWith("<")) {
+                            return (
+                              <span key={j} className="text-indigo-500">
+                                {part}
+                              </span>
+                            );
+                          }
+                          return (
+                            <span key={j} className="text-slate-700">
+                              {part}
+                            </span>
+                          );
+                        })}
+                      </>
+                    ) : (
+                      line
+                    )}
+                  </div>
+                ))}
+              </pre>
+            )}
+          </div>
+        )}
+      </div>
+      {/* Confidence Footer */}
+      {isComplete && extractionResult && (
+        <div className="px-5 py-3 border-t border-slate-100 bg-slate-50/50">
+          <div className="flex items-center justify-between text-xs">
+            <div className="flex items-center gap-4">
+              <div className="flex items-center gap-1.5">
+                <div className={cn(
+                  "h-2 w-2 rounded-full",
+                  confidence >= 90 ? "bg-emerald-500" : confidence >= 70 ? "bg-amber-500" : "bg-red-500"
+                )} />
+                <span className="text-slate-500">Confidence:</span>
+                <span className="font-semibold text-slate-700">
+                  {confidence > 0 ? `${confidence.toFixed(1)}%` : "N/A"}
+                </span>
+              </div>
+              <div className="flex items-center gap-1.5">
+                <span className="text-slate-500">Fields:</span>
+                <span className="font-semibold text-slate-700">{fieldsExtracted}</span>
+              </div>
+            </div>
+            <span className="text-slate-400">
+              Processed in {totalTime >= 1000 ? `${(totalTime / 1000).toFixed(1)}s` : `${totalTime}ms`}
+            </span>
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}

frontend/src/components/ocr/ProcessingStatus.jsx CHANGED Viewed

@@ -1,111 +1,118 @@
-import React from "react";
-import { motion } from "framer-motion";
-import {
-  FileSearch,
-  Cpu,
-  TableProperties,
-  CheckCircle2,
-  Loader2,
-} from "lucide-react";
-import { cn } from "@/lib/utils";
-const steps = [
-  { id: "upload", label: "Received", icon: FileSearch },
-  { id: "analyze", label: "Analysis", icon: Cpu },
-  { id: "extract", label: "Extraction", icon: TableProperties },
-  { id: "complete", label: "Done", icon: CheckCircle2 },
-];
-export default function ProcessingStatus({ isProcessing, isComplete }) {
-  const getCurrentStep = () => {
-    if (isComplete) return 4;
-    if (isProcessing) return 2;
-    return 0;
-  };
-  const currentStep = getCurrentStep();
-  if (!isProcessing && !isComplete) return null;
-  return (
-    <motion.div
-      initial={{ opacity: 0, y: -10 }}
-      animate={{ opacity: 1, y: 0 }}
-      className="bg-white rounded-xl border border-slate-200 px-4 py-3"
-    >
-      <div className="flex items-center justify-between gap-2">
-        {steps.map((step, index) => {
-          const isActive = index + 1 === currentStep;
-          const isCompleted = index + 1 < currentStep || isComplete;
-          const Icon = step.icon;
-          return (
-            <React.Fragment key={step.id}>
-              <div className="flex items-center gap-2">
-                <motion.div
-                  initial={false}
-                  animate={{
-                    scale: (isActive && !isComplete) ? 1.05 : 1,
-                    backgroundColor: isCompleted
-                      ? "rgb(16 185 129)"
-                      : (isActive && !isComplete)
-                      ? "rgb(99 102 241)"
-                      : "rgb(241 245 249)",
-                  }}
-                  className={cn(
-                    "h-8 w-8 rounded-lg flex items-center justify-center transition-colors",
-                    (isCompleted || isActive) && "shadow-md"
-                  )}
-                  style={{
-                    boxShadow: (isActive && !isComplete)
-                      ? "0 4px 8px -2px rgba(99, 102, 241, 0.3)"
-                      : isCompleted
-                      ? "0 4px 8px -2px rgba(16, 185, 129, 0.3)"
-                      : "none",
-                  }}
-                >
-                  {(isActive && !isComplete) ? (
-                    <motion.div
-                      animate={{ rotate: 360 }}
-                      transition={{ duration: 1.5, repeat: Infinity, ease: "linear" }}
-                    >
-                      <Loader2 className="h-4 w-4 text-white" />
-                    </motion.div>
-                  ) : isCompleted ? (
-                    <CheckCircle2 className="h-4 w-4 text-white" />
-                  ) : (
-                    <Icon className={cn("h-4 w-4 text-slate-400")} />
-                  )}
-                </motion.div>
-                <span
-                  className={cn(
-                    "text-xs font-medium hidden sm:inline",
-                    isActive ? "text-indigo-600" : isCompleted ? "text-emerald-600" : "text-slate-400"
-                  )}
-                >
-                  {step.label}
-                </span>
-              </div>
-              {index < steps.length - 1 && (
-                <div className="flex-1 h-0.5 mx-1 relative overflow-hidden rounded-full bg-slate-100">
-                  <motion.div
-                    initial={{ width: 0 }}
-                    animate={{
-                      width: isCompleted ? "100%" : isActive ? "50%" : "0%",
-                    }}
-                    transition={{ duration: 0.5 }}
-                    className={cn(
-                      "absolute inset-y-0 left-0",
-                      isCompleted ? "bg-emerald-500" : "bg-indigo-500"
-                    )}
-                  />
-                </div>
-              )}
-            </React.Fragment>
-          );
-        })}
-      </div>
-    </motion.div>
-  );
-}

+import React from "react";
+import { motion } from "framer-motion";
+import {
+  FileSearch,
+  Cpu,
+  TableProperties,
+  CheckCircle2,
+  Loader2,
+} from "lucide-react";
+import { cn } from "@/lib/utils";
+const steps = [
+  { id: "upload", label: "Received", icon: FileSearch },
+  { id: "analyze", label: "Analysis", icon: Cpu },
+  { id: "extract", label: "Extraction", icon: TableProperties },
+  { id: "complete", label: "Done", icon: CheckCircle2 },
+];
+export default function ProcessingStatus({ isProcessing, isComplete, currentStage }) {
+  const getCurrentStep = () => {
+    if (isComplete) return 4; // Done
+    if (!isProcessing) return 0; // Not started
+    // Use provided currentStage or default based on isProcessing
+    if (currentStage === "extraction") return 3; // Extraction
+    if (currentStage === "analysis") return 2; // Analysis
+    if (currentStage === "received") return 1; // Received
+    // Default: if processing, start at Analysis
+    return 2; // Analysis
+  };
+  const currentStep = getCurrentStep();
+  if (!isProcessing && !isComplete) return null;
+  return (
+    <motion.div
+      initial={{ opacity: 0, y: -10 }}
+      animate={{ opacity: 1, y: 0 }}
+      className="bg-white rounded-xl border border-slate-200 px-4 py-3"
+    >
+      <div className="flex items-center justify-between gap-2">
+        {steps.map((step, index) => {
+          const isActive = index + 1 === currentStep;
+          const isCompleted = index + 1 < currentStep || isComplete;
+          const Icon = step.icon;
+          return (
+            <React.Fragment key={step.id}>
+              <div className="flex items-center gap-2">
+                <motion.div
+                  initial={false}
+                  animate={{
+                    scale: (isActive && !isComplete) ? 1.05 : 1,
+                    backgroundColor: isCompleted
+                      ? "rgb(16 185 129)"
+                      : (isActive && !isComplete)
+                      ? "rgb(99 102 241)"
+                      : "rgb(241 245 249)",
+                  }}
+                  className={cn(
+                    "h-8 w-8 rounded-lg flex items-center justify-center transition-colors",
+                    (isCompleted || isActive) && "shadow-md"
+                  )}
+                  style={{
+                    boxShadow: (isActive && !isComplete)
+                      ? "0 4px 8px -2px rgba(99, 102, 241, 0.3)"
+                      : isCompleted
+                      ? "0 4px 8px -2px rgba(16, 185, 129, 0.3)"
+                      : "none",
+                  }}
+                >
+                  {(isActive && !isComplete) ? (
+                    <motion.div
+                      animate={{ rotate: 360 }}
+                      transition={{ duration: 1.5, repeat: Infinity, ease: "linear" }}
+                    >
+                      <Loader2 className="h-4 w-4 text-white" />
+                    </motion.div>
+                  ) : isCompleted ? (
+                    <CheckCircle2 className="h-4 w-4 text-white" />
+                  ) : (
+                    <Icon className={cn("h-4 w-4 text-slate-400")} />
+                  )}
+                </motion.div>
+                <span
+                  className={cn(
+                    "text-xs font-medium hidden sm:inline",
+                    isActive ? "text-indigo-600" : isCompleted ? "text-emerald-600" : "text-slate-400"
+                  )}
+                >
+                  {step.label}
+                </span>
+              </div>
+              {index < steps.length - 1 && (
+                <div className="flex-1 h-0.5 mx-1 relative overflow-hidden rounded-full bg-slate-100">
+                  <motion.div
+                    initial={{ width: 0 }}
+                    animate={{
+                      width: isCompleted ? "100%" : isActive ? "50%" : "0%",
+                    }}
+                    transition={{ duration: 0.5 }}
+                    className={cn(
+                      "absolute inset-y-0 left-0",
+                      isCompleted ? "bg-emerald-500" : "bg-indigo-500"
+                    )}
+                  />
+                </div>
+              )}
+            </React.Fragment>
+          );
+        })}
+      </div>
+    </motion.div>
+  );
+}

frontend/src/components/ocr/UpgradeModal.jsx ADDED Viewed

	@@ -0,0 +1,213 @@

+import React from "react";
+import { motion } from "framer-motion";
+import { cn } from "@/lib/utils";
+import {
+    X,
+    Sparkles,
+    Zap,
+    Shield,
+    Cloud,
+    BarChart3,
+    Bot,
+    Globe,
+    Lock,
+    Rocket,
+    Users,
+    CheckCircle2,
+    ArrowRight
+} from "lucide-react";
+import { Button } from "@/components/ui/button";
+const features = [
+    {
+        icon: Zap,
+        title: "Production-Scale Processing",
+        description: "Remove trial limits and run live AP and operations workflows",
+        color: "amber",
+        cta: "Explore with a demo",
+        gradient: "from-amber-500 to-orange-500"
+    },
+    {
+        icon: Bot,
+        title: "Advanced Agentic Processing",
+        description: "You can customize your own agentic pipeline with your own data",
+        color: "indigo",
+        cta: "Talk to Sales",
+        gradient: "from-indigo-500 to-violet-500"
+    },
+    {
+        icon: Cloud,
+        title: "API Access",
+        description: "Integrate EZOFIS into your workflow with our REST API",
+        color: "blue",
+        cta: "Talk to a Techie!",
+        gradient: "from-blue-500 to-cyan-500"
+    }
+];
+export default function UpgradeModal({ open, onClose }) {
+    if (!open) return null;
+    return (
+        <div className="fixed inset-0 z-50 flex items-center justify-center">
+            {/* Backdrop */}
+            <motion.div
+                initial={{ opacity: 0 }}
+                animate={{ opacity: 1 }}
+                exit={{ opacity: 0 }}
+                className="absolute inset-0 bg-black/50 backdrop-blur-sm"
+                onClick={onClose}
+            />
+            {/* Modal */}
+            <motion.div
+                initial={{ opacity: 0, scale: 0.95, y: 20 }}
+                animate={{ opacity: 1, scale: 1, y: 0 }}
+                exit={{ opacity: 0, scale: 0.95, y: 20 }}
+                className="relative z-10 w-full max-w-6xl max-h-[90vh] mx-4 bg-white rounded-2xl shadow-2xl overflow-hidden flex flex-col"
+                onClick={(e) => e.stopPropagation()}
+            >
+                {/* Header */}
+                <div className="sticky top-0 bg-gradient-to-r from-indigo-600 via-violet-600 to-purple-600 text-white px-8 py-6 z-10">
+                    <button
+                        onClick={onClose}
+                        className="absolute right-6 top-6 h-8 w-8 rounded-lg bg-white/10 hover:bg-white/20 flex items-center justify-center transition-colors"
+                    >
+                        <X className="h-4 w-4" />
+                    </button>
+                    <motion.div
+                        initial={{ opacity: 0, y: 20 }}
+                        animate={{ opacity: 1, y: 0 }}
+                        className="text-center"
+                    >
+                        <div className="inline-flex items-center gap-2 px-4 py-1.5 rounded-full bg-white/10 backdrop-blur-sm mb-4">
+                            <Sparkles className="h-4 w-4" />
+                            <span className="text-sm font-medium">Trial Limit Reached</span>
+                        </div>
+                        <h2 className="text-3xl font-bold mb-2">You've processed 2 documents</h2>
+                        <p className="text-white/80 text-lg">Continue with production-ready document intelligence</p>
+                    </motion.div>
+                </div>
+                {/* Stats Bar */}
+                <div className="grid grid-cols-3 gap-6 px-8 py-6 bg-slate-50 border-b border-slate-200">
+                    {[
+                        { label: "Accuracy Rate", value: "99.8%", icon: CheckCircle2 },
+                        { label: "Processing Speed", value: "< 10s", icon: Zap },
+                        { label: "Operational Users", value: "10,000+", icon: Users }
+                    ].map((stat, i) => (
+                        <motion.div
+                            key={stat.label}
+                            initial={{ opacity: 0, y: 20 }}
+                            animate={{ opacity: 1, y: 0 }}
+                            transition={{ delay: i * 0.1 }}
+                            className="text-center"
+                        >
+                            <div className="flex items-center justify-center gap-2 mb-1">
+                                <stat.icon className="h-4 w-4 text-indigo-600" />
+                                <span className="text-2xl font-bold text-slate-900">{stat.value}</span>
+                            </div>
+                            <p className="text-sm text-slate-500">{stat.label}</p>
+                        </motion.div>
+                    ))}
+                </div>
+                {/* Features Grid - Scrollable */}
+                <div className="flex-1 overflow-auto px-8 py-8">
+                    <div className="text-center mb-8">
+                        <h3 className="text-2xl font-bold text-slate-900 mb-2">
+                        Continue to Production Use
+                        </h3>
+                    </div>
+                    <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
+                        {features.map((feature, index) => (
+                            <motion.div
+                                key={feature.title}
+                                initial={{ opacity: 0, y: 20 }}
+                                animate={{ opacity: 1, y: 0 }}
+                                transition={{ delay: 0.2 + index * 0.1 }}
+                                className="group relative bg-white rounded-2xl border border-slate-200 p-6 hover:shadow-xl hover:shadow-slate-200/50 transition-all duration-300 hover:-translate-y-1 overflow-hidden"
+                            >
+                                {/* Gradient Background on Hover */}
+                                <div className={`absolute inset-0 bg-gradient-to-br ${feature.gradient} opacity-0 group-hover:opacity-5 transition-opacity duration-300`} />
+                                <div className="relative">
+                                    <div className={cn(
+                                        "h-12 w-12 rounded-xl flex items-center justify-center mb-4 group-hover:scale-110 transition-transform duration-300",
+                                        feature.color === "amber" && "bg-amber-50",
+                                        feature.color === "indigo" && "bg-indigo-50",
+                                        feature.color === "blue" && "bg-blue-50",
+                                        feature.color === "emerald" && "bg-emerald-50",
+                                        feature.color === "slate" && "bg-slate-50",
+                                        feature.color === "purple" && "bg-purple-50"
+                                    )}>
+                                        <feature.icon className={cn(
+                                            "h-6 w-6",
+                                            feature.color === "amber" && "text-amber-600",
+                                            feature.color === "indigo" && "text-indigo-600",
+                                            feature.color === "blue" && "text-blue-600",
+                                            feature.color === "emerald" && "text-emerald-600",
+                                            feature.color === "slate" && "text-slate-600",
+                                            feature.color === "purple" && "text-purple-600"
+                                        )} />
+                                    </div>
+                                    <h4 className="font-semibold text-slate-900 mb-2">{feature.title}</h4>
+                                    <p className="text-sm text-slate-600 mb-4 leading-relaxed">{feature.description}</p>
+                                    <Button
+                                        variant="ghost"
+                                        size="sm"
+                                        className={cn(
+                                            "w-full h-9 border transition-all group-hover:shadow-md",
+                                            feature.color === "amber" && "text-amber-600 hover:bg-amber-50 border-amber-200 hover:border-amber-300",
+                                            feature.color === "indigo" && "text-indigo-600 hover:bg-indigo-50 border-indigo-200 hover:border-indigo-300",
+                                            feature.color === "blue" && "text-blue-600 hover:bg-blue-50 border-blue-200 hover:border-blue-300",
+                                            feature.color === "emerald" && "text-emerald-600 hover:bg-emerald-50 border-emerald-200 hover:border-emerald-300",
+                                            feature.color === "slate" && "text-slate-600 hover:bg-slate-50 border-slate-200 hover:border-slate-300",
+                                            feature.color === "purple" && "text-purple-600 hover:bg-purple-50 border-purple-200 hover:border-purple-300"
+                                        )}
+                                    >
+                                        {feature.cta}
+                                        <ArrowRight className="h-3.5 w-3.5 ml-2 group-hover:translate-x-1 transition-transform" />
+                                    </Button>
+                                </div>
+                            </motion.div>
+                        ))}
+                    </div>
+                </div>
+                {/* CTA Footer */}
+                <div className="sticky bottom-0 bg-white border-t border-slate-200 px-8 py-6">
+                    <div className="flex items-center justify-between gap-6">
+                        <div className="flex-1">
+                            <h4 className="font-semibold text-slate-900 mb-1">Ready to scale?</h4>
+                            <p className="text-sm text-slate-600">No commitment. We’ll tailor the demo to your documents and workflows.</p>
+                        </div>
+                        <div className="flex items-center gap-3">
+                            <Button
+                                variant="outline"
+                                size="lg"
+                                className="h-11 border-slate-300"
+                            >
+                                <Users className="h-4 w-4 mr-2" />
+                                Talk to Sales
+                            </Button>
+                            <Button
+                                size="lg"
+                                className="h-11 bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700 shadow-lg shadow-indigo-500/25 hover:shadow-xl hover:shadow-indigo-500/30"
+                            >
+                                <Rocket className="h-4 w-4 mr-2" />
+                                Start a production evaluation
+                                <Sparkles className="h-4 w-4 ml-2" />
+                            </Button>
+                        </div>
+                    </div>
+                </div>
+            </motion.div>
+        </div>
+    );
+}

frontend/src/components/ocr/UploadZone.jsx CHANGED Viewed

@@ -1,147 +1,251 @@
-import React, { useState } from "react";
-import { motion, AnimatePresence } from "framer-motion";
-import { Upload, FileText, Image, FileSpreadsheet, X, Sparkles } from "lucide-react";
-import { cn } from "@/lib/utils";
-export default function UploadZone({ onFileSelect, selectedFile, onClear }) {
-  const [isDragging, setIsDragging] = useState(false);
-  const handleDragOver = (e) => {
-    e.preventDefault();
-    setIsDragging(true);
-  };
-  const handleDragLeave = () => {
-    setIsDragging(false);
-  };
-  const handleDrop = (e) => {
-    e.preventDefault();
-    setIsDragging(false);
-    const file = e.dataTransfer.files[0];
-    if (file) onFileSelect(file);
-  };
-  const getFileIcon = (type) => {
-    if (type?.includes("image")) return Image;
-    if (type?.includes("spreadsheet") || type?.includes("excel")) return FileSpreadsheet;
-    return FileText;
-  };
-  const FileIcon = selectedFile ? getFileIcon(selectedFile.type) : FileText;
-  return (
-    <div className="w-full">
-      <AnimatePresence mode="wait">
-        {!selectedFile ? (
-          <motion.div
-            key="upload"
-            initial={{ opacity: 0, y: 10 }}
-            animate={{ opacity: 1, y: 0 }}
-            exit={{ opacity: 0, y: -10 }}
-            transition={{ duration: 0.2 }}
-            onDragOver={handleDragOver}
-            onDragLeave={handleDragLeave}
-            onDrop={handleDrop}
-            className={cn(
-              "relative group cursor-pointer",
-              "border-2 border-dashed rounded-2xl",
-              "transition-all duration-300 ease-out",
-              isDragging
-                ? "border-indigo-400 bg-indigo-50/50"
-                : "border-slate-200 hover:border-indigo-300 hover:bg-slate-50/50"
-            )}
-          >
-            <label className="flex flex-col items-center justify-center py-16 px-8 cursor-pointer">
-              <motion.div
-                animate={isDragging ? { scale: 1.1, y: -5 } : { scale: 1, y: 0 }}
-                className={cn(
-                  "h-16 w-16 rounded-2xl flex items-center justify-center mb-6 transition-colors duration-300",
-                  isDragging
-                    ? "bg-indigo-100"
-                    : "bg-gradient-to-br from-slate-100 to-slate-50 group-hover:from-indigo-100 group-hover:to-violet-50"
-                )}
-              >
-                <Upload
-                  className={cn(
-                    "h-7 w-7 transition-colors duration-300",
-                    isDragging ? "text-indigo-600" : "text-slate-400 group-hover:text-indigo-500"
-                  )}
-                />
-              </motion.div>
-              <div className="text-center">
-                <p className="text-lg font-semibold text-slate-700 mb-1">
-                  {isDragging ? "Drop your file here" : "Drop your file here, or browse"}
-                </p>
-                <p className="text-sm text-slate-400">
-                  Supports PDF, PNG, JPG, TIFF, DOCX up to 50MB
-                </p>
-              </div>
-              <div className="flex items-center gap-2 mt-6">
-                <div className="flex -space-x-1">
-                  {[
-                    "bg-red-100 text-red-600",
-                    "bg-blue-100 text-blue-600",
-                    "bg-green-100 text-green-600",
-                    "bg-amber-100 text-amber-600",
-                  ].map((color, i) => (
-                    <div
-                      key={i}
-                      className={`h-8 w-8 rounded-lg ${color.split(" ")[0]} flex items-center justify-center border-2 border-white`}
-                    >
-                      <FileText className={`h-4 w-4 ${color.split(" ")[1]}`} />
-                    </div>
-                  ))}
-                </div>
-                <span className="text-xs text-slate-400 ml-2">Multiple formats supported</span>
-              </div>
-              <input
-                type="file"
-                className="hidden"
-                accept=".pdf,.png,.jpg,.jpeg,.tiff,.docx,.xlsx"
-                onChange={(e) => e.target.files[0] && onFileSelect(e.target.files[0])}
-              />
-            </label>
-            {/* Decorative gradient border on hover */}
-            <div className="absolute inset-0 -z-10 rounded-2xl bg-gradient-to-r from-indigo-500 via-violet-500 to-purple-500 opacity-0 group-hover:opacity-10 blur-xl transition-opacity duration-500" />
-          </motion.div>
-        ) : (
-          <motion.div
-            key="selected"
-            initial={{ opacity: 0, scale: 0.95 }}
-            animate={{ opacity: 1, scale: 1 }}
-            exit={{ opacity: 0, scale: 0.95 }}
-            className="relative bg-gradient-to-br from-indigo-50 to-violet-50 rounded-xl p-3 border border-indigo-100"
-          >
-            <div className="flex items-center gap-3">
-              <div className="h-10 w-10 rounded-lg bg-white shadow-sm flex items-center justify-center flex-shrink-0">
-                <FileIcon className="h-5 w-5 text-indigo-600" />
-              </div>
-              <div className="flex-1 min-w-0">
-                <p className="font-medium text-slate-800 truncate text-sm">{selectedFile.name}</p>
-                <div className="flex items-center gap-2 text-xs text-slate-500">
-                  <span>{(selectedFile.size / 1024 / 1024).toFixed(2)} MB</span>
-                  <span className="text-indigo-500">•</span>
-                  <span className="text-indigo-600 flex items-center gap-1">
-                    <Sparkles className="h-3 w-3" />
-                    Ready for extraction
-                  </span>
-                </div>
-              </div>
-              <button
-                onClick={onClear}
-                className="h-8 w-8 rounded-lg bg-white hover:bg-red-50 border border-slate-200 hover:border-red-200 flex items-center justify-center text-slate-400 hover:text-red-500 transition-colors"
-              >
-                <X className="h-4 w-4" />
-              </button>
-            </div>
-          </motion.div>
-        )}
-      </AnimatePresence>
-    </div>
-  );
-}

+import React, { useState, useEffect } from "react";
+import { motion, AnimatePresence } from "framer-motion";
+import { Upload, FileText, Image, FileSpreadsheet, X, Sparkles, AlertCircle } from "lucide-react";
+import { cn } from "@/lib/utils";
+import { Input } from "@/components/ui/input";
+// Allowed file types
+const ALLOWED_TYPES = [
+  "application/pdf",
+  "image/png",
+  "image/jpeg",
+  "image/jpg",
+  "image/tiff",
+  "image/tif"
+];
+// Allowed file extensions (for fallback validation)
+const ALLOWED_EXTENSIONS = [".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"];
+// Maximum file size: 4 MB
+const MAX_FILE_SIZE = 4 * 1024 * 1024; // 4 MB in bytes
+export default function UploadZone({ onFileSelect, selectedFile, onClear, keyFields = "", onKeyFieldsChange = () => {} }) {
+  const [isDragging, setIsDragging] = useState(false);
+  const [error, setError] = useState(null);
+  const validateFile = (file) => {
+    // Reset error
+    setError(null);
+    // Check file type
+    const fileExtension = "." + file.name.split(".").pop().toLowerCase();
+    const isValidType = ALLOWED_TYPES.includes(file.type) || ALLOWED_EXTENSIONS.includes(fileExtension);
+    if (!isValidType) {
+      setError("Only PDF, PNG, JPG, and TIFF files are allowed.");
+      return false;
+    }
+    // Check file size
+    if (file.size > MAX_FILE_SIZE) {
+      const fileSizeMB = (file.size / 1024 / 1024).toFixed(2);
+      setError(`File size exceeds 4 MB limit. Your file is ${fileSizeMB} MB.`);
+      return false;
+    }
+    return true;
+  };
+  const handleFileSelect = (file) => {
+    if (validateFile(file)) {
+      setError(null);
+      onFileSelect(file);
+    }
+  };
+  const handleDragOver = (e) => {
+    e.preventDefault();
+    setIsDragging(true);
+  };
+  const handleDragLeave = () => {
+    setIsDragging(false);
+  };
+  const handleDrop = (e) => {
+    e.preventDefault();
+    setIsDragging(false);
+    const file = e.dataTransfer.files[0];
+    if (file) {
+      handleFileSelect(file);
+    }
+  };
+  const getFileIcon = (type) => {
+    if (type?.includes("image")) return Image;
+    if (type?.includes("spreadsheet") || type?.includes("excel")) return FileSpreadsheet;
+    return FileText;
+  };
+  const FileIcon = selectedFile ? getFileIcon(selectedFile.type) : FileText;
+  // Clear error when file is cleared
+  useEffect(() => {
+    if (!selectedFile) {
+      setError(null);
+    }
+  }, [selectedFile]);
+  return (
+    <div className="w-full">
+      <AnimatePresence mode="wait">
+        {!selectedFile ? (
+          <motion.div
+            key="upload"
+            initial={{ opacity: 0, y: 10 }}
+            animate={{ opacity: 1, y: 0 }}
+            exit={{ opacity: 0, y: -10 }}
+            transition={{ duration: 0.2 }}
+            onDragOver={handleDragOver}
+            onDragLeave={handleDragLeave}
+            onDrop={handleDrop}
+            className={cn(
+              "relative group cursor-pointer",
+              "border-2 border-dashed rounded-2xl",
+              "transition-all duration-300 ease-out",
+              isDragging
+                ? "border-indigo-400 bg-indigo-50/50"
+                : "border-slate-200 hover:border-indigo-300 hover:bg-slate-50/50"
+            )}
+          >
+            <label className="flex flex-col items-center justify-center py-16 px-8 cursor-pointer">
+              <motion.div
+                animate={isDragging ? { scale: 1.1, y: -5 } : { scale: 1, y: 0 }}
+                className={cn(
+                  "h-16 w-16 rounded-2xl flex items-center justify-center mb-6 transition-colors duration-300",
+                  isDragging
+                    ? "bg-indigo-100"
+                    : "bg-gradient-to-br from-slate-100 to-slate-50 group-hover:from-indigo-100 group-hover:to-violet-50"
+                )}
+              >
+                <Upload
+                  className={cn(
+                    "h-7 w-7 transition-colors duration-300",
+                    isDragging ? "text-indigo-600" : "text-slate-400 group-hover:text-indigo-500"
+                  )}
+                />
+              </motion.div>
+              <div className="text-center">
+                <p className="text-lg font-semibold text-slate-700 mb-1">
+                  {isDragging ? "Drop your file here" : "Drop your file here, or browse"}
+                </p>
+                <p className="text-sm text-slate-400">
+                  Supports PDF, PNG, JPG, TIFF up to 4MB
+                </p>
+              </div>
+              <div className="flex items-center gap-2 mt-6">
+                <div className="flex -space-x-1">
+                  {[
+                    "bg-red-100 text-red-600",
+                    "bg-blue-100 text-blue-600",
+                    "bg-green-100 text-green-600",
+                    "bg-amber-100 text-amber-600",
+                  ].map((color, i) => (
+                    <div
+                      key={i}
+                      className={`h-8 w-8 rounded-lg ${color.split(" ")[0]} flex items-center justify-center border-2 border-white`}
+                    >
+                      <FileText className={`h-4 w-4 ${color.split(" ")[1]}`} />
+                    </div>
+                  ))}
+                </div>
+                <span className="text-xs text-slate-400 ml-2">Multiple formats supported</span>
+              </div>
+              <input
+                type="file"
+                className="hidden"
+                accept=".pdf,.png,.jpg,.jpeg,.tiff,.tif"
+                onChange={(e) => {
+                  const file = e.target.files[0];
+                  if (file) {
+                    handleFileSelect(file);
+                  }
+                  // Reset input so same file can be selected again after error
+                  e.target.value = "";
+                }}
+              />
+            </label>
+            {/* Decorative gradient border on hover */}
+            <div className="absolute inset-0 -z-10 rounded-2xl bg-gradient-to-r from-indigo-500 via-violet-500 to-purple-500 opacity-0 group-hover:opacity-10 blur-xl transition-opacity duration-500" />
+          </motion.div>
+        ) : (
+          <motion.div
+            key="selected"
+            initial={{ opacity: 0, scale: 0.95 }}
+            animate={{ opacity: 1, scale: 1 }}
+            exit={{ opacity: 0, scale: 0.95 }}
+            className="grid grid-cols-1 lg:grid-cols-2 gap-3"
+          >
+            {/* File Info Box */}
+            <div className="relative bg-gradient-to-br from-indigo-50 to-violet-50 rounded-xl p-3 border border-indigo-100">
+              <div className="flex items-center gap-3">
+                <div className="h-10 w-10 rounded-lg bg-white shadow-sm flex items-center justify-center flex-shrink-0">
+                  <FileIcon className="h-5 w-5 text-indigo-600" />
+                </div>
+                <div className="flex-1 min-w-0">
+                  <p className="font-medium text-slate-800 truncate text-sm">{selectedFile.name}</p>
+                  <div className="flex items-center gap-2 text-xs text-slate-500">
+                    <span>{(selectedFile.size / 1024 / 1024).toFixed(2)} MB</span>
+                    <span className="text-indigo-500">•</span>
+                    <span className="text-indigo-600 flex items-center gap-1">
+                      <Sparkles className="h-3 w-3" />
+                      Ready for extraction
+                    </span>
+                  </div>
+                </div>
+                <button
+                  onClick={onClear}
+                  className="h-8 w-8 rounded-lg bg-white hover:bg-red-50 border border-slate-200 hover:border-red-200 flex items-center justify-center text-slate-400 hover:text-red-500 transition-colors"
+                >
+                  <X className="h-4 w-4" />
+                </button>
+              </div>
+            </div>
+            {/* Key Fields Box */}
+            <div className="relative bg-white rounded-xl p-3 border border-slate-200">
+              <label className="block text-xs font-medium text-slate-600 mb-1.5">
+                <span className="font-bold">Key Fields</span> <span className="font-normal">(if required)</span>
+              </label>
+              <Input
+                type="text"
+                value={keyFields || ""}
+                onChange={(e) => {
+                  if (onKeyFieldsChange) {
+                    onKeyFieldsChange(e.target.value);
+                  }
+                }}
+                placeholder="Invoice Number, Invoice Date, PO Number, Supplier Name, Total Amount, Payment terms, Additional Notes"
+                className="h-8 text-xs border-slate-200 focus:border-indigo-300 focus:ring-indigo-200"
+              />
+            </div>
+          </motion.div>
+        )}
+      </AnimatePresence>
+      {/* Error Message */}
+      {error && (
+        <motion.div
+          initial={{ opacity: 0, y: -10 }}
+          animate={{ opacity: 1, y: 0 }}
+          exit={{ opacity: 0, y: -10 }}
+          className="mt-3 p-3 bg-red-50 border border-red-200 rounded-xl flex items-start gap-2"
+        >
+          <AlertCircle className="h-4 w-4 text-red-600 flex-shrink-0 mt-0.5" />
+          <p className="text-sm text-red-700 flex-1">{error}</p>
+          <button
+            onClick={() => setError(null)}
+            className="text-red-600 hover:text-red-800 transition-colors"
+          >
+            <X className="h-4 w-4" />
+          </button>
+        </motion.div>
+      )}
+    </div>
+  );
+}

frontend/src/components/ui/badge.jsx CHANGED Viewed

@@ -1,24 +1,24 @@
-import React from "react";
-import { cn } from "@/lib/utils";
-const variants = {
-  default:
-    "bg-slate-900 text-white hover:bg-slate-900/90",
-  secondary:
-    "bg-slate-100 text-slate-800 border border-slate-200",
-  outline:
-    "border border-slate-200 text-slate-700",
-};
-export function Badge({ className, variant = "default", ...props }) {
-  return (
-    <span
-      className={cn(
-        "inline-flex items-center rounded-full px-2.5 py-0.5 text-xs font-medium",
-        variants[variant] || variants.default,
-        className
-      )}
-      {...props}
-    />
-  );
-}

+import React from "react";
+import { cn } from "@/lib/utils";
+const variants = {
+  default:
+    "bg-slate-900 text-white hover:bg-slate-900/90",
+  secondary:
+    "bg-slate-100 text-slate-800 border border-slate-200",
+  outline:
+    "border border-slate-200 text-slate-700",
+};
+export function Badge({ className, variant = "default", ...props }) {
+  return (
+    <span
+      className={cn(
+        "inline-flex items-center rounded-full px-2.5 py-0.5 text-xs font-medium",
+        variants[variant] || variants.default,
+        className
+      )}
+      {...props}
+    />
+  );
+}

frontend/src/components/ui/button.jsx CHANGED Viewed

@@ -1,38 +1,38 @@
-import React from "react";
-import { cn } from "@/lib/utils";
-const base =
-  "inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500 focus-visible:ring-offset-2 disabled:opacity-50 disabled:pointer-events-none";
-const variants = {
-  default: "bg-indigo-600 text-white hover:bg-indigo-700 shadow-sm",
-  outline:
-    "border border-slate-200 bg-white text-slate-900 hover:bg-slate-50",
-  ghost: "bg-transparent text-slate-700 hover:bg-slate-100",
-};
-const sizes = {
-  default: "h-10 px-4 py-2",
-  sm: "h-8 px-3 text-xs",
-  lg: "h-11 px-6 text-sm",
-  icon: "h-9 w-9",
-};
-export function Button({
-  className,
-  variant = "default",
-  size = "default",
-  ...props
-}) {
-  return (
-    <button
-      className={cn(
-        base,
-        variants[variant] || variants.default,
-        sizes[size] || sizes.default,
-        className
-      )}
-      {...props}
-    />
-  );
-}

+import React from "react";
+import { cn } from "@/lib/utils";
+const base =
+  "inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500 focus-visible:ring-offset-2 disabled:opacity-50 disabled:pointer-events-none";
+const variants = {
+  default: "bg-indigo-600 text-white hover:bg-indigo-700 shadow-sm",
+  outline:
+    "border border-slate-200 bg-white text-slate-900 hover:bg-slate-50",
+  ghost: "bg-transparent text-slate-700 hover:bg-slate-100",
+};
+const sizes = {
+  default: "h-10 px-4 py-2",
+  sm: "h-8 px-3 text-xs",
+  lg: "h-11 px-6 text-sm",
+  icon: "h-9 w-9",
+};
+export function Button({
+  className,
+  variant = "default",
+  size = "default",
+  ...props
+}) {
+  return (
+    <button
+      className={cn(
+        base,
+        variants[variant] || variants.default,
+        sizes[size] || sizes.default,
+        className
+      )}
+      {...props}
+    />
+  );
+}

frontend/src/components/ui/dropdown-menu.jsx CHANGED Viewed

@@ -1,113 +1,113 @@
-import React, {
-  createContext,
-  useContext,
-  useState,
-  useRef,
-  useEffect,
-} from "react";
-import { cn } from "@/lib/utils";
-const DropdownContext = createContext(null);
-export function DropdownMenu({ children }) {
-  const [open, setOpen] = useState(false);
-  const triggerRef = useRef(null);
-  // Close on outside click
-  useEffect(() => {
-    if (!open) return;
-    function handleClick(e) {
-      if (!triggerRef.current) return;
-      if (!triggerRef.current.parentElement.contains(e.target)) {
-        setOpen(false);
-      }
-    }
-    document.addEventListener("mousedown", handleClick);
-    return () => document.removeEventListener("mousedown", handleClick);
-  }, [open]);
-  return (
-    <DropdownContext.Provider value={{ open, setOpen, triggerRef }}>
-      <div className="relative inline-block">{children}</div>
-    </DropdownContext.Provider>
-  );
-}
-export function DropdownMenuTrigger({ asChild, children }) {
-  const { setOpen, triggerRef } = useContext(DropdownContext);
-  const handleClick = (e) => {
-    e.stopPropagation();
-    setOpen((o) => !o);
-  };
-  if (asChild && React.isValidElement(children)) {
-    return React.cloneElement(children, {
-      ref: triggerRef,
-      onClick: (e) => {
-        children.props.onClick?.(e);
-        handleClick(e);
-      },
-    });
-  }
-  return (
-    <button
-      ref={triggerRef}
-      type="button"
-      onClick={handleClick}
-      className="inline-flex"
-    >
-      {children}
-    </button>
-  );
-}
-export function DropdownMenuContent({ className, align = "end", ...props }) {
-  const { open } = useContext(DropdownContext);
-  if (!open) return null;
-  const alignment =
-    align === "end"
-      ? "right-0 origin-top-right"
-      : align === "start"
-      ? "left-0 origin-top-left"
-      : "left-1/2 -translate-x-1/2 origin-top";
-  return (
-    <div
-      className={cn(
-        "absolute z-50 mt-2 min-w-[8rem] rounded-md border border-slate-200 bg-white shadow-lg focus:outline-none",
-        alignment,
-        className
-      )}
-      {...props}
-    />
-  );
-}
-export function DropdownMenuItem({ className, onClick, ...props }) {
-  const { setOpen } = useContext(DropdownContext);
-  const handleClick = (e) => {
-    onClick?.(e);
-    setOpen(false);
-  };
-  return (
-    <div
-      className={cn(
-        "flex cursor-pointer select-none items-center px-2 py-1.5 text-sm text-slate-700 hover:bg-slate-100 rounded-md",
-        className
-      )}
-      onClick={handleClick}
-      {...props}
-    />
-  );
-}
-export function DropdownMenuSeparator({ className }) {
-  return (
-    <div
-      className={cn("my-1 h-px bg-slate-200 w-full", className)}
-    />
-  );
-}

+import React, {
+  createContext,
+  useContext,
+  useState,
+  useRef,
+  useEffect,
+} from "react";
+import { cn } from "@/lib/utils";
+const DropdownContext = createContext(null);
+export function DropdownMenu({ children }) {
+  const [open, setOpen] = useState(false);
+  const triggerRef = useRef(null);
+  // Close on outside click
+  useEffect(() => {
+    if (!open) return;
+    function handleClick(e) {
+      if (!triggerRef.current) return;
+      if (!triggerRef.current.parentElement.contains(e.target)) {
+        setOpen(false);
+      }
+    }
+    document.addEventListener("mousedown", handleClick);
+    return () => document.removeEventListener("mousedown", handleClick);
+  }, [open]);
+  return (
+    <DropdownContext.Provider value={{ open, setOpen, triggerRef }}>
+      <div className="relative inline-block">{children}</div>
+    </DropdownContext.Provider>
+  );
+}
+export function DropdownMenuTrigger({ asChild, children }) {
+  const { setOpen, triggerRef } = useContext(DropdownContext);
+  const handleClick = (e) => {
+    e.stopPropagation();
+    setOpen((o) => !o);
+  };
+  if (asChild && React.isValidElement(children)) {
+    return React.cloneElement(children, {
+      ref: triggerRef,
+      onClick: (e) => {
+        children.props.onClick?.(e);
+        handleClick(e);
+      },
+    });
+  }
+  return (
+    <button
+      ref={triggerRef}
+      type="button"
+      onClick={handleClick}
+      className="inline-flex"
+    >
+      {children}
+    </button>
+  );
+}
+export function DropdownMenuContent({ className, align = "end", ...props }) {
+  const { open } = useContext(DropdownContext);
+  if (!open) return null;
+  const alignment =
+    align === "end"
+      ? "right-0 origin-top-right"
+      : align === "start"
+      ? "left-0 origin-top-left"
+      : "left-1/2 -translate-x-1/2 origin-top";
+  return (
+    <div
+      className={cn(
+        "absolute z-50 mt-2 min-w-[8rem] rounded-md border border-slate-200 bg-white shadow-lg focus:outline-none",
+        alignment,
+        className
+      )}
+      {...props}
+    />
+  );
+}
+export function DropdownMenuItem({ className, onClick, ...props }) {
+  const { setOpen } = useContext(DropdownContext);
+  const handleClick = (e) => {
+    onClick?.(e);
+    setOpen(false);
+  };
+  return (
+    <div
+      className={cn(
+        "flex cursor-pointer select-none items-center px-2 py-1.5 text-sm text-slate-700 hover:bg-slate-100 rounded-md",
+        className
+      )}
+      onClick={handleClick}
+      {...props}
+    />
+  );
+}
+export function DropdownMenuSeparator({ className }) {
+  return (
+    <div
+      className={cn("my-1 h-px bg-slate-200 w-full", className)}
+    />
+  );
+}

frontend/src/components/ui/input.jsx CHANGED Viewed

@@ -1,14 +1,14 @@
-import React from "react";
-import { cn } from "@/lib/utils";
-export function Input({ className, ...props }) {
-  return (
-    <input
-      className={cn(
-        "flex h-10 w-full rounded-md border border-slate-200 bg-white px-3 py-2 text-sm text-slate-900 shadow-sm placeholder:text-slate-400 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500 focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
-        className
-      )}
-      {...props}
-    />
-  );
-}

+import React from "react";
+import { cn } from "@/lib/utils";
+export function Input({ className, ...props }) {
+  return (
+    <input
+      className={cn(
+        "flex h-10 w-full rounded-md border border-slate-200 bg-white px-3 py-2 text-sm text-slate-900 shadow-sm placeholder:text-slate-400 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500 focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
+        className
+      )}
+      {...props}
+    />
+  );
+}

frontend/src/components/ui/select.jsx CHANGED Viewed

@@ -1,116 +1,116 @@
-import React, {
-  createContext,
-  useContext,
-  useState,
-  useRef,
-  useEffect,
-} from "react";
-import { cn } from "@/lib/utils";
-const SelectContext = createContext(null);
-export function Select({ value, onValueChange, children }) {
-  const [open, setOpen] = useState(false);
-  const [items, setItems] = useState({});
-  const triggerRef = useRef(null);
-  // Close on outside click
-  useEffect(() => {
-    if (!open) return;
-    function handleClick(e) {
-      if (!triggerRef.current) return;
-      if (!triggerRef.current.parentElement.contains(e.target)) {
-        setOpen(false);
-      }
-    }
-    document.addEventListener("mousedown", handleClick);
-    return () => document.removeEventListener("mousedown", handleClick);
-  }, [open]);
-  const registerItem = (val, label) => {
-    setItems((prev) => ({ ...prev, [val]: label }));
-  };
-  return (
-    <SelectContext.Provider
-      value={{
-        value,
-        onValueChange,
-        open,
-        setOpen,
-        items,
-        registerItem,
-        triggerRef,
-      }}
-    >
-      <div className="relative inline-block">{children}</div>
-    </SelectContext.Provider>
-  );
-}
-export function SelectTrigger({ className, children }) {
-  const { setOpen, triggerRef } = useContext(SelectContext);
-  return (
-    <button
-      type="button"
-      ref={triggerRef}
-      onClick={() => setOpen((o) => !o)}
-      className={cn(
-        "flex items-center justify-between rounded-md border bg-white px-3 py-2 text-sm text-slate-700 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500",
-        className
-      )}
-    >
-      {children}
-    </button>
-  );
-}
-export function SelectValue({ placeholder }) {
-  const { value, items } = useContext(SelectContext);
-  const label = value ? items[value] : null;
-  return (
-    <span className={cn("truncate text-sm", !label && "text-slate-400")}>
-      {label || placeholder}
-    </span>
-  );
-}
-export function SelectContent({ className, children }) {
-  const { open } = useContext(SelectContext);
-  if (!open) return null;
-  return (
-    <div
-      className={cn(
-        "absolute z-50 mt-2 min-w-[8rem] rounded-md border border-slate-200 bg-white shadow-lg",
-        className
-      )}
-    >
-      {children}
-    </div>
-  );
-}
-export function SelectItem({ value, children, className }) {
-  const { onValueChange, setOpen, registerItem } = useContext(SelectContext);
-  useEffect(() => {
-    registerItem(value, typeof children === "string" ? children : String(children));
-  }, [value, children, registerItem]);
-  const handleClick = () => {
-    onValueChange?.(value);
-    setOpen(false);
-  };
-  return (
-    <div
-      onClick={handleClick}
-      className={cn(
-        "cursor-pointer select-none px-3 py-1.5 text-sm text-slate-700 hover:bg-slate-100",
-        className
-      )}
-    >
-      {children}
-    </div>
-  );
-}

+import React, {
+  createContext,
+  useContext,
+  useState,
+  useRef,
+  useEffect,
+} from "react";
+import { cn } from "@/lib/utils";
+const SelectContext = createContext(null);
+export function Select({ value, onValueChange, children }) {
+  const [open, setOpen] = useState(false);
+  const [items, setItems] = useState({});
+  const triggerRef = useRef(null);
+  // Close on outside click
+  useEffect(() => {
+    if (!open) return;
+    function handleClick(e) {
+      if (!triggerRef.current) return;
+      if (!triggerRef.current.parentElement.contains(e.target)) {
+        setOpen(false);
+      }
+    }
+    document.addEventListener("mousedown", handleClick);
+    return () => document.removeEventListener("mousedown", handleClick);
+  }, [open]);
+  const registerItem = (val, label) => {
+    setItems((prev) => ({ ...prev, [val]: label }));
+  };
+  return (
+    <SelectContext.Provider
+      value={{
+        value,
+        onValueChange,
+        open,
+        setOpen,
+        items,
+        registerItem,
+        triggerRef,
+      }}
+    >
+      <div className="relative inline-block">{children}</div>
+    </SelectContext.Provider>
+  );
+}
+export function SelectTrigger({ className, children }) {
+  const { setOpen, triggerRef } = useContext(SelectContext);
+  return (
+    <button
+      type="button"
+      ref={triggerRef}
+      onClick={() => setOpen((o) => !o)}
+      className={cn(
+        "flex items-center justify-between rounded-md border bg-white px-3 py-2 text-sm text-slate-700 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500",
+        className
+      )}
+    >
+      {children}
+    </button>
+  );
+}
+export function SelectValue({ placeholder }) {
+  const { value, items } = useContext(SelectContext);
+  const label = value ? items[value] : null;
+  return (
+    <span className={cn("truncate text-sm", !label && "text-slate-400")}>
+      {label || placeholder}
+    </span>
+  );
+}
+export function SelectContent({ className, children }) {
+  const { open } = useContext(SelectContext);
+  if (!open) return null;
+  return (
+    <div
+      className={cn(
+        "absolute z-50 mt-2 min-w-[8rem] rounded-md border border-slate-200 bg-white shadow-lg",
+        className
+      )}
+    >
+      {children}
+    </div>
+  );
+}
+export function SelectItem({ value, children, className }) {
+  const { onValueChange, setOpen, registerItem } = useContext(SelectContext);
+  useEffect(() => {
+    registerItem(value, typeof children === "string" ? children : String(children));
+  }, [value, children, registerItem]);
+  const handleClick = () => {
+    onValueChange?.(value);
+    setOpen(false);
+  };
+  return (
+    <div
+      onClick={handleClick}
+      className={cn(
+        "cursor-pointer select-none px-3 py-1.5 text-sm text-slate-700 hover:bg-slate-100",
+        className
+      )}
+    >
+      {children}
+    </div>
+  );
+}

frontend/src/components/ui/separator.jsx ADDED Viewed

	@@ -0,0 +1,16 @@

+import React from "react";
+import { cn } from "@/lib/utils";
+export function Separator({ className, orientation = "horizontal", ...props }) {
+  return (
+    <div
+      className={cn(
+        "shrink-0 bg-slate-200",
+        orientation === "horizontal" ? "h-px w-full" : "h-full w-px",
+        className
+      )}
+      {...props}
+    />
+  );
+}

frontend/src/components/ui/tabs.jsx CHANGED Viewed

@@ -1,45 +1,45 @@
-import React, { createContext, useContext } from "react";
-import { cn } from "@/lib/utils";
-const TabsContext = createContext(null);
-export function Tabs({ value, onValueChange, children, className }) {
-  return (
-    <TabsContext.Provider value={{ value, onValueChange }}>
-      <div className={className}>{children}</div>
-    </TabsContext.Provider>
-  );
-}
-export function TabsList({ className, ...props }) {
-  return (
-    <div
-      className={cn(
-        "inline-flex items-center justify-center rounded-lg bg-slate-100 p-0.5",
-        className
-      )}
-      {...props}
-    />
-  );
-}
-export function TabsTrigger({ value, className, children, ...props }) {
-  const ctx = useContext(TabsContext);
-  const selected = ctx?.value === value;
-  return (
-    <button
-      type="button"
-      onClick={() => ctx?.onValueChange && ctx.onValueChange(value)}
-      data-state={selected ? "active" : "inactive"}
-      className={cn(
-        "inline-flex items-center justify-center rounded-md px-3 py-1.5 text-xs font-medium text-slate-600 transition-colors",
-        "data-[state=active]:bg-white data-[state=active]:text-slate-900",
-        className
-      )}
-      {...props}
-    >
-      {children}
-    </button>
-  );
-}

+import React, { createContext, useContext } from "react";
+import { cn } from "@/lib/utils";
+const TabsContext = createContext(null);
+export function Tabs({ value, onValueChange, children, className }) {
+  return (
+    <TabsContext.Provider value={{ value, onValueChange }}>
+      <div className={className}>{children}</div>
+    </TabsContext.Provider>
+  );
+}
+export function TabsList({ className, ...props }) {
+  return (
+    <div
+      className={cn(
+        "inline-flex items-center justify-center rounded-lg bg-slate-100 p-0.5",
+        className
+      )}
+      {...props}
+    />
+  );
+}
+export function TabsTrigger({ value, className, children, ...props }) {
+  const ctx = useContext(TabsContext);
+  const selected = ctx?.value === value;
+  return (
+    <button
+      type="button"
+      onClick={() => ctx?.onValueChange && ctx.onValueChange(value)}
+      data-state={selected ? "active" : "inactive"}
+      className={cn(
+        "inline-flex items-center justify-center rounded-md px-3 py-1.5 text-xs font-medium text-slate-600 transition-colors",
+        "data-[state=active]:bg-white data-[state=active]:text-slate-900",
+        className
+      )}
+      {...props}
+    >
+      {children}
+    </button>
+  );
+}

frontend/src/config/firebase.js ADDED Viewed

	@@ -0,0 +1,30 @@

+/**
+ * Firebase configuration and initialization
+ */
+import { initializeApp } from 'firebase/app';
+import { getAuth, GoogleAuthProvider } from 'firebase/auth';
+// Firebase configuration from environment variables
+const firebaseConfig = {
+  apiKey: import.meta.env.VITE_FIREBASE_API_KEY,
+  authDomain: import.meta.env.VITE_FIREBASE_AUTH_DOMAIN,
+  projectId: import.meta.env.VITE_FIREBASE_PROJECT_ID,
+  storageBucket: import.meta.env.VITE_FIREBASE_STORAGE_BUCKET,
+  messagingSenderId: import.meta.env.VITE_FIREBASE_MESSAGING_SENDER_ID,
+  appId: import.meta.env.VITE_FIREBASE_APP_ID,
+};
+// Initialize Firebase
+const app = initializeApp(firebaseConfig);
+// Initialize Firebase Authentication and get a reference to the service
+export const auth = getAuth(app);
+// Configure Google Auth Provider
+export const googleProvider = new GoogleAuthProvider();
+googleProvider.setCustomParameters({
+  prompt: 'select_account'
+});
+export default app;

frontend/src/contexts/AuthContext.jsx ADDED Viewed

	@@ -0,0 +1,115 @@

+import React, { createContext, useContext, useState, useEffect } from "react";
+import { signInWithPopup, signOut as firebaseSignOut } from "firebase/auth";
+import { auth, googleProvider } from "@/config/firebase";
+import { getCurrentUser, firebaseLogin, requestOTP, verifyOTP, logout as apiLogout } from "@/services/auth";
+const AuthContext = createContext(null);
+export function AuthProvider({ children }) {
+  const [user, setUser] = useState(null);
+  const [loading, setLoading] = useState(true);
+  const [token, setToken] = useState(localStorage.getItem("auth_token"));
+  useEffect(() => {
+    // Check if user is already authenticated
+    if (token) {
+      checkAuth();
+    } else {
+      setLoading(false);
+    }
+  }, [token]);
+  const checkAuth = async () => {
+    try {
+      const userData = await getCurrentUser();
+      setUser(userData);
+    } catch (error) {
+      // Token is invalid, clear it
+      localStorage.removeItem("auth_token");
+      setToken(null);
+      setUser(null);
+    } finally {
+      setLoading(false);
+    }
+  };
+  const handleFirebaseLogin = async () => {
+    try {
+      const result = await signInWithPopup(auth, googleProvider);
+      const idToken = await result.user.getIdToken();
+      const response = await firebaseLogin(idToken);
+      handleAuthCallback(response.token);
+    } catch (error) {
+      if (error.code === 'auth/popup-closed' || error.code === 'auth/cancelled-popup-request') {
+        // User closed popup or cancelled - don't show error
+        return;
+      }
+      console.error("Firebase login error:", error);
+      throw new Error(error.message || "Firebase authentication failed");
+    }
+  };
+  const handleOTPRequest = async (email) => {
+    try {
+      await requestOTP(email);
+    } catch (error) {
+      console.error("OTP request error:", error);
+      throw error;
+    }
+  };
+  const handleOTPVerify = async (email, otp) => {
+    try {
+      const response = await verifyOTP(email, otp);
+      handleAuthCallback(response.token);
+    } catch (error) {
+      console.error("OTP verify error:", error);
+      throw error;
+    }
+  };
+  const handleLogout = async () => {
+    try {
+      // Sign out from Firebase if user was using Firebase auth
+      if (auth.currentUser) {
+        await firebaseSignOut(auth);
+      }
+      await apiLogout();
+    } catch (error) {
+      console.error("Logout error:", error);
+    } finally {
+      localStorage.removeItem("auth_token");
+      setToken(null);
+      setUser(null);
+    }
+  };
+  const handleAuthCallback = (newToken) => {
+    localStorage.setItem("auth_token", newToken);
+    setToken(newToken);
+    checkAuth();
+  };
+  const value = {
+    user,
+    token,
+    loading,
+    firebaseLogin: handleFirebaseLogin,
+    requestOTP: handleOTPRequest,
+    verifyOTP: handleOTPVerify,
+    logout: handleLogout,
+    handleAuthCallback,
+    isAuthenticated: !!user,
+  };
+  return <AuthContext.Provider value={value}>{children}</AuthContext.Provider>;
+}
+export function useAuth() {
+  const context = useContext(AuthContext);
+  if (!context) {
+    throw new Error("useAuth must be used within an AuthProvider");
+  }
+  return context;
+}