EZOFISOCR

Running

App Files Files Community

Seth commited on Jan 3

Commit

272f36f

2 Parent(s): 8e8c6a4 daae7a9

Merge remote changes, keeping API key implementation

Browse files

Files changed (38) hide show

Dockerfile +83 -83
FIREBASE_OTP_SETUP.md +287 -0
GOOGLE_OAUTH_SETUP.md +82 -0
IMPLEMENTATION_COMPLETE.md +260 -0
README.md +10 -10
backend/app/apollo_service.py +447 -0
backend/app/auth.py +95 -0
backend/app/auth_routes.py +239 -0
backend/app/email_validator.py +64 -0
backend/app/firebase_auth.py +95 -0
backend/app/main.py +745 -0
backend/app/models.py +106 -0
backend/app/monday_service.py +394 -0
backend/app/openrouter_client.py +865 -0
backend/app/otp_service.py +200 -0
backend/app/schemas.py +29 -0
backend/requirements.txt +14 -14
frontend/index.html +16 -0
frontend/package.json +26 -26
frontend/src/App.jsx +109 -0
frontend/src/Layout.jsx +182 -0
frontend/src/components/ExportButtons.jsx +695 -0
frontend/src/components/ShareLinkModal.jsx +144 -0
frontend/src/components/ShareModal.jsx +200 -0
frontend/src/components/auth/LoginForm.jsx +515 -0
frontend/src/components/ocr/DocumentPreview.jsx +232 -0
frontend/src/components/ocr/ExtractionOutput.jsx +1204 -0
frontend/src/components/ocr/ProcessingStatus.jsx +121 -0
frontend/src/components/ocr/UpgradeModal.jsx +216 -0
frontend/src/components/ocr/UploadZone.jsx +254 -0
frontend/src/components/ui/separator.jsx +19 -0
frontend/src/config/firebase.js +33 -0
frontend/src/contexts/AuthContext.jsx +118 -0
frontend/src/pages/Dashboard.jsx +479 -0
frontend/src/pages/History.jsx +862 -0
frontend/src/pages/ShareHandler.jsx +98 -0
frontend/src/services/api.js +176 -0
frontend/src/services/auth.js +114 -0

Dockerfile CHANGED Viewed

@@ -1,83 +1,83 @@
-# ---------- 1) Build frontend (React + Vite) ----------
-FROM node:20-alpine AS frontend-build
-WORKDIR /frontend
-# Accept build arguments for Vite environment variables
-ARG VITE_FIREBASE_API_KEY
-ARG VITE_FIREBASE_AUTH_DOMAIN
-ARG VITE_FIREBASE_PROJECT_ID
-ARG VITE_FIREBASE_STORAGE_BUCKET
-ARG VITE_FIREBASE_MESSAGING_SENDER_ID
-ARG VITE_FIREBASE_APP_ID
-ARG VITE_API_BASE_URL
-# Set as environment variables so they're available to the build script
-ENV VITE_FIREBASE_API_KEY=$VITE_FIREBASE_API_KEY
-ENV VITE_FIREBASE_AUTH_DOMAIN=$VITE_FIREBASE_AUTH_DOMAIN
-ENV VITE_FIREBASE_PROJECT_ID=$VITE_FIREBASE_PROJECT_ID
-ENV VITE_FIREBASE_STORAGE_BUCKET=$VITE_FIREBASE_STORAGE_BUCKET
-ENV VITE_FIREBASE_MESSAGING_SENDER_ID=$VITE_FIREBASE_MESSAGING_SENDER_ID
-ENV VITE_FIREBASE_APP_ID=$VITE_FIREBASE_APP_ID
-ENV VITE_API_BASE_URL=$VITE_API_BASE_URL
-# Install frontend dependencies
-COPY frontend/package*.json ./
-RUN npm install
-# Copy rest of frontend
-COPY frontend/ .
-# Create .env file from environment variables and build
-# Inline the script to avoid permission issues
-RUN echo "Checking environment variables..." && \
-    [ -z "$VITE_FIREBASE_API_KEY" ] && echo "WARNING: VITE_FIREBASE_API_KEY is not set" || echo "✓ VITE_FIREBASE_API_KEY is set" && \
-    [ -z "$VITE_FIREBASE_AUTH_DOMAIN" ] && echo "WARNING: VITE_FIREBASE_AUTH_DOMAIN is not set" || echo "✓ VITE_FIREBASE_AUTH_DOMAIN is set" && \
-    [ -z "$VITE_FIREBASE_PROJECT_ID" ] && echo "WARNING: VITE_FIREBASE_PROJECT_ID is not set" || echo "✓ VITE_FIREBASE_PROJECT_ID is set" && \
-    echo "VITE_FIREBASE_API_KEY=${VITE_FIREBASE_API_KEY:-}" > .env && \
-    echo "VITE_FIREBASE_AUTH_DOMAIN=${VITE_FIREBASE_AUTH_DOMAIN:-}" >> .env && \
-    echo "VITE_FIREBASE_PROJECT_ID=${VITE_FIREBASE_PROJECT_ID:-}" >> .env && \
-    echo "VITE_FIREBASE_STORAGE_BUCKET=${VITE_FIREBASE_STORAGE_BUCKET:-}" >> .env && \
-    echo "VITE_FIREBASE_MESSAGING_SENDER_ID=${VITE_FIREBASE_MESSAGING_SENDER_ID:-}" >> .env && \
-    echo "VITE_FIREBASE_APP_ID=${VITE_FIREBASE_APP_ID:-}" >> .env && \
-    echo "VITE_API_BASE_URL=${VITE_API_BASE_URL:-}" >> .env && \
-    echo "Created .env file with environment variables" && \
-    npm run build
-# Vite will output to /frontend/dist by default
-# ---------- 2) Backend (FastAPI + Python) ----------
-FROM python:3.11-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ENV PYTHONUNBUFFERED=1
-WORKDIR /app
-# System deps (optional but useful for some libs)
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential \
-    && rm -rf /var/lib/apt/lists/*
-# Install backend dependencies
-COPY backend/requirements.txt ./backend/requirements.txt
-RUN pip install --no-cache-dir -r backend/requirements.txt
-# Copy backend code
-COPY backend ./backend
-# Copy built frontend into backend/frontend_dist
-# FastAPI will serve from this folder later
-RUN mkdir -p backend/frontend_dist
-COPY --from=frontend-build /frontend/dist ./backend/frontend_dist
-# Create data directory for SQLite
-RUN mkdir -p data
-# Env vars used in backend/db.py etc.
-ENV DB_PATH=/app/data/app.db
-ENV PORT=7860
-ENV PYTHONPATH=/app
-EXPOSE 7860
-# Launch FastAPI app (we'll use backend.app.main:app)
-CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "7860"]

+# ---------- 1) Build frontend (React + Vite) ----------
+FROM node:20-alpine AS frontend-build
+WORKDIR /frontend
+# Accept build arguments for Vite environment variables
+ARG VITE_FIREBASE_API_KEY
+ARG VITE_FIREBASE_AUTH_DOMAIN
+ARG VITE_FIREBASE_PROJECT_ID
+ARG VITE_FIREBASE_STORAGE_BUCKET
+ARG VITE_FIREBASE_MESSAGING_SENDER_ID
+ARG VITE_FIREBASE_APP_ID
+ARG VITE_API_BASE_URL
+# Set as environment variables so they're available to the build script
+ENV VITE_FIREBASE_API_KEY=$VITE_FIREBASE_API_KEY
+ENV VITE_FIREBASE_AUTH_DOMAIN=$VITE_FIREBASE_AUTH_DOMAIN
+ENV VITE_FIREBASE_PROJECT_ID=$VITE_FIREBASE_PROJECT_ID
+ENV VITE_FIREBASE_STORAGE_BUCKET=$VITE_FIREBASE_STORAGE_BUCKET
+ENV VITE_FIREBASE_MESSAGING_SENDER_ID=$VITE_FIREBASE_MESSAGING_SENDER_ID
+ENV VITE_FIREBASE_APP_ID=$VITE_FIREBASE_APP_ID
+ENV VITE_API_BASE_URL=$VITE_API_BASE_URL
+# Install frontend dependencies
+COPY frontend/package*.json ./
+RUN npm install
+# Copy rest of frontend
+COPY frontend/ .
+# Create .env file from environment variables and build
+# Inline the script to avoid permission issues
+RUN echo "Checking environment variables..." && \
+    [ -z "$VITE_FIREBASE_API_KEY" ] && echo "WARNING: VITE_FIREBASE_API_KEY is not set" || echo "✓ VITE_FIREBASE_API_KEY is set" && \
+    [ -z "$VITE_FIREBASE_AUTH_DOMAIN" ] && echo "WARNING: VITE_FIREBASE_AUTH_DOMAIN is not set" || echo "✓ VITE_FIREBASE_AUTH_DOMAIN is set" && \
+    [ -z "$VITE_FIREBASE_PROJECT_ID" ] && echo "WARNING: VITE_FIREBASE_PROJECT_ID is not set" || echo "✓ VITE_FIREBASE_PROJECT_ID is set" && \
+    echo "VITE_FIREBASE_API_KEY=${VITE_FIREBASE_API_KEY:-}" > .env && \
+    echo "VITE_FIREBASE_AUTH_DOMAIN=${VITE_FIREBASE_AUTH_DOMAIN:-}" >> .env && \
+    echo "VITE_FIREBASE_PROJECT_ID=${VITE_FIREBASE_PROJECT_ID:-}" >> .env && \
+    echo "VITE_FIREBASE_STORAGE_BUCKET=${VITE_FIREBASE_STORAGE_BUCKET:-}" >> .env && \
+    echo "VITE_FIREBASE_MESSAGING_SENDER_ID=${VITE_FIREBASE_MESSAGING_SENDER_ID:-}" >> .env && \
+    echo "VITE_FIREBASE_APP_ID=${VITE_FIREBASE_APP_ID:-}" >> .env && \
+    echo "VITE_API_BASE_URL=${VITE_API_BASE_URL:-}" >> .env && \
+    echo "Created .env file with environment variables" && \
+    npm run build
+# Vite will output to /frontend/dist by default
+# ---------- 2) Backend (FastAPI + Python) ----------
+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+# System deps (optional but useful for some libs)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Install backend dependencies
+COPY backend/requirements.txt ./backend/requirements.txt
+RUN pip install --no-cache-dir -r backend/requirements.txt
+# Copy backend code
+COPY backend ./backend
+# Copy built frontend into backend/frontend_dist
+# FastAPI will serve from this folder later
+RUN mkdir -p backend/frontend_dist
+COPY --from=frontend-build /frontend/dist ./backend/frontend_dist
+# Create data directory for SQLite
+RUN mkdir -p data
+# Env vars used in backend/db.py etc.
+ENV DB_PATH=/app/data/app.db
+ENV PORT=7860
+ENV PYTHONPATH=/app
+EXPOSE 7860
+# Launch FastAPI app (we'll use backend.app.main:app)
+CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "7860"]

FIREBASE_OTP_SETUP.md CHANGED Viewed

@@ -1,3 +1,4 @@
 # Firebase Authentication + OTP Setup Guide
 This application uses Firebase Authentication for Google sign-in and Brevo for OTP email delivery. Only business email addresses are allowed.
@@ -294,3 +295,289 @@ For issues:
 - Firebase: [Firebase Documentation](https://firebase.google.com/docs)
 - Brevo: [Brevo API Documentation](https://developers.brevo.com/)

+<<<<<<< HEAD
 # Firebase Authentication + OTP Setup Guide
 This application uses Firebase Authentication for Google sign-in and Brevo for OTP email delivery. Only business email addresses are allowed.
 - Firebase: [Firebase Documentation](https://firebase.google.com/docs)
 - Brevo: [Brevo API Documentation](https://developers.brevo.com/)
+=======
+# Firebase Authentication + OTP Setup Guide
+This application uses Firebase Authentication for Google sign-in and Brevo for OTP email delivery. Only business email addresses are allowed.
+## Prerequisites
+1. Firebase project
+2. Brevo account (for sending OTP emails)
+3. Business email domain verification
+---
+## Step 1: Firebase Setup
+### 1.1 Create Firebase Project
+1. Go to [Firebase Console](https://console.firebase.google.com/)
+2. Click "Add project" or select an existing project
+3. Follow the setup wizard
+### 1.2 Enable Google Authentication
+1. In Firebase Console, go to **Authentication** → **Sign-in method**
+2. Click on **Google** provider
+3. Enable it and set your project support email
+4. Save the changes
+### 1.3 Get Firebase Web App Configuration
+1. In Firebase Console, go to **Project Settings** (gear icon)
+2. Scroll down to "Your apps" section
+3. Click the **Web** icon (`</>`) to add a web app
+4. Register your app (you can skip Firebase Hosting for now)
+5. Copy the Firebase configuration object
+### 1.4 Get Firebase Service Account Key
+1. In Firebase Console, go to **Project Settings** → **Service accounts**
+2. Click **Generate new private key**
+3. Download the JSON file (keep it secure!)
+### 1.5 Set Frontend Environment Variables
+Create or update `frontend/.env`:
+```bash
+VITE_FIREBASE_API_KEY=your-api-key
+VITE_FIREBASE_AUTH_DOMAIN=your-project.firebaseapp.com
+VITE_FIREBASE_PROJECT_ID=your-project-id
+VITE_FIREBASE_STORAGE_BUCKET=your-project.appspot.com
+VITE_FIREBASE_MESSAGING_SENDER_ID=your-sender-id
+VITE_FIREBASE_APP_ID=your-app-id
+```
+### 1.6 Set Backend Environment Variables
+You have two options for Firebase Admin SDK:
+**Option A: Service Account JSON File**
+```bash
+FIREBASE_SERVICE_ACCOUNT_KEY=/path/to/service-account-key.json
+```
+**Option B: Service Account JSON String (Recommended for Docker/Cloud)**
+```bash
+FIREBASE_SERVICE_ACCOUNT_JSON='{"type":"service_account","project_id":"...","private_key_id":"...","private_key":"...","client_email":"...","client_id":"...","auth_uri":"...","token_uri":"...","auth_provider_x509_cert_url":"...","client_x509_cert_url":"..."}'
+```
+---
+## Step 2: Brevo Setup
+### 2.1 Create Brevo Account
+1. Go to [Brevo](https://www.brevo.com/) (formerly Sendinblue)
+2. Sign up for a free account (300 emails/day free tier)
+3. Verify your email address
+### 2.2 Get API Key
+1. Log in to Brevo
+2. Go to **Settings** → **API Keys**
+3. Click **Generate a new API key**
+4. Copy the API key (starts with `xkeysib-...`)
+### 2.3 Verify Sender Email
+1. Go to **Senders & IP** → **Senders**
+2. Click **Add a sender**
+3. Enter your sender email (e.g., `noreply@yourdomain.com`)
+4. Verify the email address (check your inbox for verification email)
+5. Once verified, you can use it to send emails
+### 2.4 Set Backend Environment Variables
+```bash
+BREVO_API_KEY=xkeysib-your-api-key-here
+BREVO_SENDER_EMAIL=noreply@yourdomain.com
+BREVO_SENDER_NAME=EZOFIS AI
+```
+---
+## Step 3: JWT Secret Key
+Generate a strong random string for JWT token signing:
+```bash
+# Generate a random secret (Linux/Mac)
+openssl rand -hex 32
+# Or use Python
+python -c "import secrets; print(secrets.token_hex(32))"
+```
+Set the environment variable:
+```bash
+JWT_SECRET_KEY=your-generated-secret-key-here
+```
+---
+## Step 4: Frontend URL
+Set the frontend URL for OAuth redirects:
+```bash
+FRONTEND_URL=http://localhost:5173  # Development
+# OR
+FRONTEND_URL=https://your-domain.com  # Production
+```
+---
+## Step 5: Install Dependencies
+### Backend
+```bash
+cd backend
+pip install -r requirements.txt
+```
+### Frontend
+```bash
+cd frontend
+npm install
+```
+---
+## Step 6: Database Migration
+The database will automatically create the new schema when you start the application. However, if you have existing data:
+**Option 1: Fresh Start (Recommended for Development)**
+- Delete the existing database file: `data/app.db`
+- Restart the application (tables will be recreated)
+**Option 2: Manual Migration (For Production)**
+- The new `users` table will be created automatically
+- Existing `extractions` table needs `user_id` column added
+- You'll need to assign existing records to a default user or migrate them
+---
+## Step 7: Test the Setup
+### 7.1 Test Firebase Authentication
+1. Start the backend server
+2. Start the frontend development server
+3. Navigate to the application
+4. Click "Google Sign In"
+5. Sign in with a business Google account
+6. Verify you're redirected to the dashboard
+### 7.2 Test OTP Authentication
+1. Click on "Email / OTP" tab
+2. Enter a business email address
+3. Click "Send OTP"
+4. Check your email for the OTP code
+5. Enter the OTP and verify
+6. Verify you're redirected to the dashboard
+### 7.3 Test Business Email Validation
+1. Try to sign in with a personal Gmail account
+2. Verify you get an error message
+3. Try OTP with a personal email
+4. Verify it's blocked
+---
+## Environment Variables Summary
+### Backend (.env or environment)
+```bash
+# Firebase
+FIREBASE_SERVICE_ACCOUNT_JSON='{...}'  # OR
+FIREBASE_SERVICE_ACCOUNT_KEY=/path/to/key.json
+# Brevo
+BREVO_API_KEY=xkeysib-...
+BREVO_SENDER_EMAIL=noreply@yourdomain.com
+BREVO_SENDER_NAME=EZOFIS AI
+# JWT
+JWT_SECRET_KEY=your-secret-key
+# Frontend URL
+FRONTEND_URL=http://localhost:5173
+```
+### Frontend (.env)
+```bash
+VITE_FIREBASE_API_KEY=...
+VITE_FIREBASE_AUTH_DOMAIN=...
+VITE_FIREBASE_PROJECT_ID=...
+VITE_FIREBASE_STORAGE_BUCKET=...
+VITE_FIREBASE_MESSAGING_SENDER_ID=...
+VITE_FIREBASE_APP_ID=...
+VITE_API_BASE_URL=http://localhost:7860
+```
+---
+## Troubleshooting
+### Firebase Issues
+- **"Firebase not configured"**: Check that `FIREBASE_SERVICE_ACCOUNT_JSON` or `FIREBASE_SERVICE_ACCOUNT_KEY` is set correctly
+- **"Invalid Firebase token"**: Ensure Firebase Web SDK is properly configured in frontend
+- **"Email not found"**: Make sure Google sign-in is enabled in Firebase Console
+### Brevo Issues
+- **"Failed to send email"**:
+  - Verify your API key is correct
+  - Check that sender email is verified in Brevo
+  - Ensure you haven't exceeded the free tier limit (300 emails/day)
+- **"API key not set"**: Check that `BREVO_API_KEY` environment variable is set
+### Business Email Validation
+- Personal emails (Gmail, Yahoo, etc.) are automatically blocked
+- Only business/corporate email domains are allowed
+- The validation happens on both frontend and backend
+---
+## Security Notes
+1. **Never commit** Firebase service account keys or API keys to version control
+2. Use environment variables or secure secret management
+3. JWT tokens expire after 7 days
+4. OTP codes expire after 10 minutes
+5. Maximum 5 OTP verification attempts per email
+6. All extraction records are filtered by user_id for data isolation
+---
+## Production Deployment
+1. Set all environment variables in your hosting platform
+2. Use HTTPS for both frontend and backend
+3. Update `FRONTEND_URL` to your production domain
+4. Verify sender email in Brevo with your production domain
+5. Consider using Redis for OTP storage instead of in-memory (for scalability)
+6. Set up proper error monitoring and logging
+---
+## Support
+For issues:
+- Firebase: [Firebase Documentation](https://firebase.google.com/docs)
+- Brevo: [Brevo API Documentation](https://developers.brevo.com/)
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

GOOGLE_OAUTH_SETUP.md CHANGED Viewed

@@ -1,3 +1,4 @@
 # Google OAuth Setup Guide
 This application uses Google OAuth for user authentication. Follow these steps to set it up:
@@ -77,3 +78,84 @@ New dependencies added:
 - All extraction records are filtered by user_id
 - Users can only see their own data and history

+<<<<<<< HEAD
 # Google OAuth Setup Guide
 This application uses Google OAuth for user authentication. Follow these steps to set it up:
 - All extraction records are filtered by user_id
 - Users can only see their own data and history
+=======
+# Google OAuth Setup Guide
+This application uses Google OAuth for user authentication. Follow these steps to set it up:
+## 1. Create Google OAuth Credentials
+1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
+2. Create a new project or select an existing one
+3. Enable the Google+ API
+4. Go to "Credentials" → "Create Credentials" → "OAuth client ID"
+5. Choose "Web application"
+6. Add authorized redirect URIs:
+   - For development: `http://localhost:7860/api/auth/callback`
+   - For production: `https://your-domain.com/api/auth/callback`
+7. Copy the Client ID and Client Secret
+## 2. Set Environment Variables
+Set the following environment variables:
+```bash
+# Google OAuth
+GOOGLE_CLIENT_ID=your-client-id-here
+GOOGLE_CLIENT_SECRET=your-client-secret-here
+# JWT Secret (use a strong random string)
+JWT_SECRET_KEY=your-secret-key-here
+# Frontend URL (for OAuth redirect)
+FRONTEND_URL=http://localhost:5173  # or your production URL
+```
+## 3. Database Migration
+The database will automatically create the new `users` table and add `user_id` to the `extractions` table when you start the application.
+**Note:** If you have an existing database with extraction records, you'll need to:
+1. Back up your data
+2. Delete the old database file
+3. Restart the application to recreate tables with the new schema
+Or manually migrate:
+- Add `user_id` column to `extractions` table (you may need to set a default user_id for existing records)
+## 4. Install Dependencies
+Make sure to install the new Python dependencies:
+```bash
+pip install -r backend/requirements.txt
+```
+New dependencies added:
+- `authlib` - OAuth library
+- `pyjwt` - JWT token handling
+- `python-jose[cryptography]` - JWT verification
+## 5. Start the Application
+1. Start the backend server
+2. Start the frontend development server
+3. Users will be prompted to sign in with Google when they try to access the application
+## How It Works
+1. User clicks "Sign in with Google" → redirected to Google login
+2. After authentication, Google redirects to `/api/auth/callback`
+3. Backend creates/updates user in database and generates JWT token
+4. Frontend receives token and stores it in localStorage
+5. All API requests include the JWT token in the Authorization header
+6. Backend verifies token and filters data by user_id
+## Security Notes
+- JWT tokens expire after 7 days
+- Tokens are stored in localStorage (consider httpOnly cookies for production)
+- All extraction records are filtered by user_id
+- Users can only see their own data and history
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

IMPLEMENTATION_COMPLETE.md CHANGED Viewed

@@ -1,3 +1,4 @@
 # ✅ Firebase + OTP Authentication Implementation Complete
 All code changes have been applied successfully! Here are the next steps you need to follow:
@@ -255,3 +256,262 @@ Once you complete the setup steps above, your application will have:
 Good luck! 🚀

+<<<<<<< HEAD
 # ✅ Firebase + OTP Authentication Implementation Complete
 All code changes have been applied successfully! Here are the next steps you need to follow:
 Good luck! 🚀
+=======
+# ✅ Firebase + OTP Authentication Implementation Complete
+All code changes have been applied successfully! Here are the next steps you need to follow:
+## 📋 Implementation Summary
+### ✅ Backend Changes (Completed)
+- ✅ Updated `requirements.txt` with Firebase Admin SDK
+- ✅ Updated `models.py` - User model now supports Firebase and OTP auth methods
+- ✅ Created `email_validator.py` - Business email validation
+- ✅ Created `firebase_auth.py` - Firebase token verification
+- ✅ Created `brevo_service.py` - Brevo email service for OTP
+- ✅ Created `otp_service.py` - OTP generation and verification
+- ✅ Updated `auth_routes.py` - New endpoints for Firebase and OTP login
+### ✅ Frontend Changes (Completed)
+- ✅ Updated `package.json` with Firebase SDK
+- ✅ Created `config/firebase.js` - Firebase configuration
+- ✅ Updated `services/auth.js` - Firebase and OTP auth functions
+- ✅ Updated `contexts/AuthContext.jsx` - Firebase and OTP support
+- ✅ Created `components/auth/LoginForm.jsx` - Login UI with both options
+- ✅ Updated `App.jsx` - Integrated LoginForm component
+---
+## 🚀 Next Steps (YOU NEED TO DO THESE)
+### Step 1: Install Dependencies
+**Backend:**
+```bash
+cd backend
+pip install -r requirements.txt
+```
+**Frontend:**
+```bash
+cd frontend
+npm install
+```
+---
+### Step 2: Set Up Firebase
+1. **Create Firebase Project:**
+   - Go to https://console.firebase.google.com/
+   - Create a new project or use existing one
+2. **Enable Google Authentication:**
+   - In Firebase Console → Authentication → Sign-in method
+   - Enable "Google" provider
+   - Set project support email
+3. **Get Web App Config:**
+   - Project Settings → Your apps → Add Web app
+   - Copy the config values
+4. **Get Service Account Key:**
+   - Project Settings → Service accounts
+   - Click "Generate new private key"
+   - Download the JSON file
+5. **Set Frontend Environment Variables:**
+   Create `frontend/.env`:
+   ```bash
+   VITE_FIREBASE_API_KEY=your-api-key-here
+   VITE_FIREBASE_AUTH_DOMAIN=your-project.firebaseapp.com
+   VITE_FIREBASE_PROJECT_ID=your-project-id
+   VITE_FIREBASE_STORAGE_BUCKET=your-project.appspot.com
+   VITE_FIREBASE_MESSAGING_SENDER_ID=your-sender-id
+   VITE_FIREBASE_APP_ID=your-app-id
+   VITE_API_BASE_URL=http://localhost:7860
+   ```
+6. **Set Backend Environment Variables:**
+   Option A (JSON file path):
+   ```bash
+   FIREBASE_SERVICE_ACCOUNT_KEY=/path/to/service-account-key.json
+   ```
+   Option B (JSON string - recommended for Docker):
+   ```bash
+   FIREBASE_SERVICE_ACCOUNT_JSON='{"type":"service_account","project_id":"...","private_key":"...","client_email":"..."}'
+   ```
+   (Copy the entire JSON content from the downloaded file)
+---
+### Step 3: Set Up Brevo
+1. **Create Brevo Account:**
+   - Go to https://www.brevo.com/
+   - Sign up (free tier: 300 emails/day)
+2. **Get API Key:**
+   - Settings → API Keys
+   - Generate new API key
+   - Copy the key (starts with `xkeysib-`)
+3. **Verify Sender Email:**
+   - Senders & IP → Senders
+   - Add sender email (e.g., `noreply@yourdomain.com`)
+   - Verify via email
+4. **Set Backend Environment Variables:**
+   ```bash
+   BREVO_API_KEY=xkeysib-your-api-key-here
+   BREVO_SENDER_EMAIL=noreply@yourdomain.com
+   BREVO_SENDER_NAME=EZOFIS AI
+   ```
+---
+### Step 4: Set JWT Secret
+Generate a secure random key:
+```bash
+# Linux/Mac
+openssl rand -hex 32
+# Or Python
+python -c "import secrets; print(secrets.token_hex(32))"
+```
+Set environment variable:
+```bash
+JWT_SECRET_KEY=your-generated-secret-key-here
+```
+---
+### Step 5: Set Frontend URL
+```bash
+FRONTEND_URL=http://localhost:5173  # Development
+# OR
+FRONTEND_URL=https://your-domain.com  # Production
+```
+---
+### Step 6: Database Migration
+**If you have existing data:**
+- The new schema will be created automatically
+- Existing `extractions` table needs `user_id` column
+- You may need to assign existing records to a default user
+**For fresh start (recommended for development):**
+- Delete `data/app.db` (if exists)
+- Restart application - tables will be recreated
+---
+### Step 7: Test the Implementation
+1. **Start Backend:**
+   ```bash
+   cd backend
+   uvicorn app.main:app --reload --port 7860
+   ```
+2. **Start Frontend:**
+   ```bash
+   cd frontend
+   npm run dev
+   ```
+3. **Test Firebase Login:**
+   - Navigate to http://localhost:5173
+   - Click "Google Sign In" tab
+   - Sign in with business Google account
+   - Should redirect to dashboard
+4. **Test OTP Login:**
+   - Click "Email / OTP" tab
+   - Enter business email
+   - Click "Send OTP"
+   - Check email for OTP code
+   - Enter OTP and verify
+   - Should redirect to dashboard
+5. **Test Business Email Validation:**
+   - Try personal Gmail account → Should be blocked
+   - Try OTP with personal email → Should be blocked
+---
+## 📝 Environment Variables Checklist
+### Backend (.env or system environment)
+- [ ] `FIREBASE_SERVICE_ACCOUNT_JSON` or `FIREBASE_SERVICE_ACCOUNT_KEY`
+- [ ] `BREVO_API_KEY`
+- [ ] `BREVO_SENDER_EMAIL`
+- [ ] `BREVO_SENDER_NAME`
+- [ ] `JWT_SECRET_KEY`
+- [ ] `FRONTEND_URL`
+### Frontend (.env)
+- [ ] `VITE_FIREBASE_API_KEY`
+- [ ] `VITE_FIREBASE_AUTH_DOMAIN`
+- [ ] `VITE_FIREBASE_PROJECT_ID`
+- [ ] `VITE_FIREBASE_STORAGE_BUCKET`
+- [ ] `VITE_FIREBASE_MESSAGING_SENDER_ID`
+- [ ] `VITE_FIREBASE_APP_ID`
+- [ ] `VITE_API_BASE_URL`
+---
+## 🔒 Security Reminders
+1. ✅ Never commit API keys or secrets to git
+2. ✅ Use `.env` files (add to `.gitignore`)
+3. ✅ Business email validation is enforced on both frontend and backend
+4. ✅ JWT tokens expire after 7 days
+5. ✅ OTP codes expire after 10 minutes
+6. ✅ Maximum 5 OTP verification attempts
+---
+## 📚 Documentation
+- **Firebase Setup:** See `FIREBASE_OTP_SETUP.md` for detailed instructions
+- **Brevo API:** https://developers.brevo.com/reference/sendtransacemail
+---
+## ⚠️ Important Notes
+1. **Database Schema Change:**
+   - User model changed from `google_id` (required) to `firebase_uid` (optional)
+   - If you have existing users, you'll need to migrate the data
+   - For development, deleting `data/app.db` is the easiest option
+2. **Business Email Validation:**
+   - Personal email domains are blocked (Gmail, Yahoo, Outlook, etc.)
+   - Validation happens on both frontend and backend
+   - Users must use their work/corporate email addresses
+3. **OTP Storage:**
+   - Currently stored in memory (works for single server)
+   - For production with multiple servers, consider using Redis
+---
+## 🎉 You're All Set!
+Once you complete the setup steps above, your application will have:
+- ✅ Firebase Google Sign-in (no OAuth credentials needed!)
+- ✅ Email/OTP authentication via Brevo
+- ✅ Business email validation
+- ✅ User-specific data isolation
+- ✅ Secure JWT token authentication
+Good luck! 🚀
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
----
-title: EZOFISAIOCR
-emoji: 🌍
-colorFrom: blue
-colorTo: purple
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: EZOFISOCR
+emoji: 🌍
+colorFrom: indigo
+colorTo: purple
+sdk: docker
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

backend/app/apollo_service.py CHANGED Viewed

@@ -1,3 +1,4 @@
 """
 Apollo.io API service for creating contacts, enriching contact data, and adding them to sequences.
 Reference:
@@ -442,3 +443,449 @@ async def enrich_contact_by_email(email: str) -> Optional[Dict[str, Any]]:
         print(f"[ERROR] Failed to enrich contact from Apollo.io: {str(e)}")
         return None

+<<<<<<< HEAD
 """
 Apollo.io API service for creating contacts, enriching contact data, and adding them to sequences.
 Reference:
         print(f"[ERROR] Failed to enrich contact from Apollo.io: {str(e)}")
         return None
+=======
+"""
+Apollo.io API service for creating contacts, enriching contact data, and adding them to sequences.
+Reference:
+- Create contact: https://docs.apollo.io/reference/create-a-contact
+- Add to sequence: https://docs.apollo.io/reference/add-contacts-to-sequence
+- Enrich person: https://docs.apollo.io/reference/enrich-people-data
+"""
+import os
+import httpx
+from typing import Optional, Dict, Any
+APOLLO_API_KEY = os.environ.get("APOLLO_API_KEY", "")
+APOLLO_API_URL = "https://api.apollo.io/api/v1"
+APOLLO_TRIAL_LIST_NAME = "VPR TRIAL LEADS"
+# Allow list ID to be set directly via environment variable (more reliable than lookup)
+APOLLO_TRIAL_LIST_ID = os.environ.get("APOLLO_TRIAL_LIST_ID", None)
+# Sequence ID for adding contacts to email sequences (preferred over lists)
+APOLLO_TRIAL_SEQUENCE_ID = os.environ.get("APOLLO_TRIAL_SEQUENCE_ID", None)
+async def get_list_id(list_name: Optional[str] = None) -> Optional[str]:
+    """
+    Get Apollo list ID. First tries environment variable, then attempts API lookup.
+    Args:
+        list_name: Name of the list (for lookup if env var not set)
+    Returns:
+        List ID as string if found, None otherwise
+    """
+    # First, try to use the list ID from environment variable (most reliable)
+    if APOLLO_TRIAL_LIST_ID:
+        # Apollo list IDs are typically hexadecimal strings (MongoDB ObjectIds)
+        # Accept them as strings, just strip whitespace
+        list_id = str(APOLLO_TRIAL_LIST_ID).strip()
+        if list_id:
+            print(f"[INFO] Using Apollo list ID from environment variable: {list_id}")
+            return list_id
+        else:
+            print(f"[WARNING] APOLLO_TRIAL_LIST_ID is empty")
+    # If no env var, try to look up by name (this may not work if API endpoint is different)
+    if not list_name or not APOLLO_API_KEY:
+        return None
+    # Note: The /lists endpoint may not be available in all Apollo API versions
+    # Try alternative: search for lists using a different endpoint
+    try:
+        async with httpx.AsyncClient() as client:
+            # Try the lists endpoint (may return 404 in some API versions)
+            response = await client.get(
+                f"{APOLLO_API_URL}/lists",
+                headers={
+                    "Content-Type": "application/json",
+                    "Cache-Control": "no-cache",
+                    "X-Api-Key": APOLLO_API_KEY
+                },
+                timeout=10.0
+            )
+            if response.status_code == 200:
+                data = response.json()
+                lists = data.get("lists", [])
+                for list_item in lists:
+                    if list_item.get("name") == list_name:
+                        list_id = list_item.get("id")
+                        print(f"[INFO] Found Apollo list '{list_name}' with ID: {list_id}")
+                        # Return as string (Apollo IDs are typically hex strings)
+                        return str(list_id) if list_id else None
+                print(f"[WARNING] Apollo list '{list_name}' not found in available lists")
+            else:
+                print(f"[WARNING] Apollo lists endpoint returned {response.status_code}, cannot lookup list by name")
+    except Exception as e:
+        print(f"[WARNING] Failed to fetch Apollo list ID: {str(e)}")
+    return None
+async def add_contact_to_sequence(contact_id: str, sequence_id: str) -> bool:
+    """
+    Add a contact to an Apollo.io email sequence.
+    Args:
+        contact_id: The Apollo contact ID
+        sequence_id: The Apollo sequence ID
+    Returns:
+        True if contact was successfully added to sequence, False otherwise
+    """
+    if not APOLLO_API_KEY:
+        print("[WARNING] APOLLO_API_KEY not set, skipping sequence enrollment")
+        return False
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                f"{APOLLO_API_URL}/sequence_contacts",
+                headers={
+                    "Content-Type": "application/json",
+                    "Cache-Control": "no-cache",
+                    "X-Api-Key": APOLLO_API_KEY
+                },
+                json={
+                    "sequence_id": sequence_id,
+                    "contact_id": contact_id
+                },
+                timeout=10.0
+            )
+            if response.status_code in [200, 201]:
+                print(f"[INFO] Successfully added contact {contact_id} to sequence {sequence_id}")
+                return True
+            else:
+                error_data = response.text
+                print(f"[ERROR] Failed to add contact to sequence: {response.status_code} - {error_data}")
+                return False
+    except httpx.HTTPStatusError as e:
+        print(f"[ERROR] Apollo API HTTP error adding to sequence: {e.response.status_code} - {e.response.text}")
+        return False
+    except Exception as e:
+        print(f"[ERROR] Failed to add contact to sequence: {str(e)}")
+        return False
+async def create_apollo_contact(
+    email: str,
+    first_name: Optional[str] = None,
+    last_name: Optional[str] = None,
+    organization_name: Optional[str] = None,
+    title: Optional[str] = None,
+    list_name: Optional[str] = None,
+    sequence_id: Optional[str] = None
+) -> bool:
+    """
+    Create a contact in Apollo.io and optionally add to a sequence or list.
+    Args:
+        email: Contact email address (required)
+        first_name: Contact first name
+        last_name: Contact last name
+        organization_name: Organization name
+        title: Job title
+        list_name: Name of the list to add contact to (defaults to APOLLO_TRIAL_LIST_NAME)
+        sequence_id: ID of the sequence to add contact to (preferred over list)
+    Returns:
+        True if contact created successfully, False otherwise
+    Raises:
+        ValueError: If APOLLO_API_KEY is not set
+    """
+    if not APOLLO_API_KEY:
+        print("[WARNING] APOLLO_API_KEY not set, skipping Apollo contact creation")
+        return False
+    # Use default list name if not provided
+    if list_name is None:
+        list_name = APOLLO_TRIAL_LIST_NAME
+    # Parse name if full name is provided but first/last are not
+    if not first_name and not last_name:
+        # Try to extract from email or use email prefix
+        email_prefix = email.split('@')[0]
+        if '.' in email_prefix:
+            parts = email_prefix.split('.')
+            first_name = parts[0].capitalize() if parts else None
+            last_name = parts[1].capitalize() if len(parts) > 1 else None
+        else:
+            first_name = email_prefix.capitalize()
+    # Extract organization domain from email
+    organization_domain = None
+    if '@' in email:
+        organization_domain = email.split('@')[1]
+    # Prepare contact data
+    contact_data: Dict[str, Any] = {
+        "email": email.lower(),
+        "run_dedupe": True  # Prevent duplicate contacts
+    }
+    if first_name:
+        contact_data["first_name"] = first_name
+    if last_name:
+        contact_data["last_name"] = last_name
+    if organization_name:
+        contact_data["organization_name"] = organization_name
+    if organization_domain:
+        contact_data["organization_domain"] = organization_domain
+    if title:
+        contact_data["title"] = title
+    try:
+        async with httpx.AsyncClient() as client:
+            # Get the list ID if list_name is provided
+            list_ids = []
+            target_list_id = None  # Store for later use
+            if list_name:
+                list_id = await get_list_id(list_name)
+                if list_id:
+                    target_list_id = list_id  # Store for verification later
+                    # Apollo API accepts list_ids as an array of strings (hex IDs)
+                    list_ids = [str(list_id)]
+                    contact_data["list_ids"] = list_ids
+                    print(f"[INFO] Adding contact to list ID: {list_id}")
+                else:
+                    print(f"[WARNING] Could not find list '{list_name}'. Set APOLLO_TRIAL_LIST_ID environment variable with the list ID, or create contact without list assignment")
+            # Log the payload being sent (for debugging)
+            print(f"[DEBUG] Creating Apollo contact with payload: {contact_data}")
+            # Create the contact
+            response = await client.post(
+                f"{APOLLO_API_URL}/contacts",
+                headers={
+                    "Content-Type": "application/json",
+                    "Cache-Control": "no-cache",
+                    "X-Api-Key": APOLLO_API_KEY
+                },
+                json=contact_data,
+                timeout=10.0
+            )
+            # Log the full response for debugging
+            print(f"[DEBUG] Apollo API response status: {response.status_code}")
+            try:
+                response_json = response.json()
+                print(f"[DEBUG] Apollo API response (full): {response_json}")
+            except:
+                print(f"[DEBUG] Apollo API response body (text): {response.text[:1000]}")  # First 1000 chars
+            if response.status_code == 200 or response.status_code == 201:
+                result = response.json()
+                contact = result.get("contact", {})
+                contact_id = contact.get("id")
+                print(f"[INFO] Successfully created Apollo contact: {email} (ID: {contact_id})")
+                # Priority: Add to sequence if sequence_id is provided (this is supported by API)
+                target_sequence_id = sequence_id or APOLLO_TRIAL_SEQUENCE_ID
+                if contact_id and target_sequence_id:
+                    print(f"[INFO] Adding contact to sequence: {target_sequence_id}")
+                    sequence_success = await add_contact_to_sequence(contact_id, target_sequence_id)
+                    if sequence_success:
+                        print(f"[INFO] ✓ Contact successfully enrolled in sequence")
+                    else:
+                        print(f"[WARNING] Failed to add contact to sequence, but contact was created")
+                # Fallback: Try to add to list (API limitation - may not work)
+                if list_ids and contact_id and target_list_id and not target_sequence_id:
+                    print(f"[INFO] Contact created with list_ids parameter: {list_ids}")
+                    print(f"[INFO] ⚠️  Apollo.io API Limitation: The API does not return list_ids in responses,")
+                    print(f"[INFO]    so we cannot verify if the contact was added to the list via API.")
+                    print(f"[INFO]    Please verify manually in Apollo.io that contact '{email}' is in list '{list_name or target_list_id}'")
+                    print(f"[INFO]    Consider using sequences instead (APOLLO_TRIAL_SEQUENCE_ID) for better API support.")
+                return True
+            else:
+                error_data = response.text
+                print(f"[ERROR] Failed to create Apollo contact: {response.status_code} - {error_data}")
+                return False
+    except httpx.HTTPStatusError as e:
+        print(f"[ERROR] Apollo API HTTP error: {e.response.status_code} - {e.response.text}")
+        return False
+    except Exception as e:
+        print(f"[ERROR] Failed to create Apollo contact: {str(e)}")
+        return False
+async def enrich_contact_by_email(email: str) -> Optional[Dict[str, Any]]:
+    """
+    Enrich contact data from Apollo.io using email address.
+    Args:
+        email: Contact email address
+    Returns:
+        Dictionary with enriched contact data, or None if not found
+    """
+    if not APOLLO_API_KEY:
+        print("[WARNING] APOLLO_API_KEY not set, skipping Apollo enrichment")
+        return None
+    try:
+        async with httpx.AsyncClient() as client:
+            # Try people/match endpoint first (for exact email match)
+            print(f"[DEBUG] Attempting Apollo.io enrichment for {email} via /people/match endpoint")
+            response = await client.post(
+                f"{APOLLO_API_URL}/people/match",
+                headers={
+                    "Content-Type": "application/json",
+                    "Cache-Control": "no-cache",
+                    "X-Api-Key": APOLLO_API_KEY
+                },
+                json={
+                    "email": email.lower()
+                    # Note: reveal_phone_number requires webhook_url, so we skip it for now
+                },
+                timeout=10.0
+            )
+            print(f"[DEBUG] Apollo.io /people/match response status: {response.status_code}")
+            if response.status_code == 200:
+                data = response.json()
+                print(f"[DEBUG] Apollo.io /people/match response data keys: {list(data.keys())}")
+                person = data.get("person", {})
+                if person:
+                    print(f"[DEBUG] Found person data in Apollo.io response")
+                    # Extract enriched data
+                    enriched_data = {
+                        "first_name": person.get("first_name"),
+                        "last_name": person.get("last_name"),
+                        "title": person.get("title"),
+                        "phone_number": person.get("phone_numbers", [{}])[0].get("raw_number") if person.get("phone_numbers") else None,
+                        "linkedin_url": person.get("linkedin_url"),
+                        "headline": person.get("headline"),
+                        "organization_name": person.get("organization", {}).get("name") if person.get("organization") else None,
+                        "organization_website": person.get("organization", {}).get("website_url") if person.get("organization") else None,
+                        "organization_address": None,  # May need to parse from organization data
+                    }
+                    # Try to get organization address
+                    if person.get("organization"):
+                        org = person.get("organization", {})
+                        address_parts = []
+                        if org.get("street_address"):
+                            address_parts.append(org.get("street_address"))
+                        if org.get("city"):
+                            address_parts.append(org.get("city"))
+                        if org.get("state"):
+                            address_parts.append(org.get("state"))
+                        if org.get("postal_code"):
+                            address_parts.append(org.get("postal_code"))
+                        if org.get("country"):
+                            address_parts.append(org.get("country"))
+                        if address_parts:
+                            enriched_data["organization_address"] = ", ".join(address_parts)
+                    print(f"[INFO] Successfully enriched contact data for {email} from Apollo.io")
+                    return enriched_data
+                else:
+                    print(f"[DEBUG] Apollo.io /people/match returned 200 but no person data found")
+            elif response.status_code == 404:
+                print(f"[DEBUG] Apollo.io /people/match returned 404 - contact not found in database")
+            elif response.status_code == 401:
+                print(f"[ERROR] Apollo.io API authentication failed - check your API key")
+                try:
+                    error_data = response.json()
+                    print(f"[ERROR] Apollo.io error details: {error_data}")
+                except:
+                    print(f"[ERROR] Apollo.io error response: {response.text}")
+            else:
+                print(f"[DEBUG] Apollo.io /people/match returned status {response.status_code}")
+                try:
+                    error_data = response.json()
+                    print(f"[DEBUG] Apollo.io response: {error_data}")
+                except:
+                    print(f"[DEBUG] Apollo.io response text: {response.text[:500]}")
+            # If match fails, try the new search endpoint (api_search)
+            print(f"[DEBUG] Attempting Apollo.io enrichment for {email} via /mixed_people/api_search endpoint")
+            search_response = await client.post(
+                f"{APOLLO_API_URL}/mixed_people/api_search",
+                headers={
+                    "Content-Type": "application/json",
+                    "Cache-Control": "no-cache",
+                    "X-Api-Key": APOLLO_API_KEY
+                },
+                json={
+                    "email": email.lower(),
+                    "per_page": 1
+                },
+                timeout=10.0
+            )
+            print(f"[DEBUG] Apollo.io /mixed_people/api_search response status: {search_response.status_code}")
+            if search_response.status_code == 200:
+                search_data = search_response.json()
+                print(f"[DEBUG] Apollo.io /mixed_people/api_search response data keys: {list(search_data.keys())}")
+                people = search_data.get("people", [])
+                print(f"[DEBUG] Found {len(people)} people in search results")
+                if people:
+                    person = people[0]
+                    # Extract enriched data (same structure as above)
+                    enriched_data = {
+                        "first_name": person.get("first_name"),
+                        "last_name": person.get("last_name"),
+                        "title": person.get("title"),
+                        "phone_number": person.get("phone_numbers", [{}])[0].get("raw_number") if person.get("phone_numbers") else None,
+                        "linkedin_url": person.get("linkedin_url"),
+                        "headline": person.get("headline"),
+                        "organization_name": person.get("organization", {}).get("name") if person.get("organization") else None,
+                        "organization_website": person.get("organization", {}).get("website_url") if person.get("organization") else None,
+                        "organization_address": None,
+                    }
+                    if person.get("organization"):
+                        org = person.get("organization", {})
+                        address_parts = []
+                        if org.get("street_address"):
+                            address_parts.append(org.get("street_address"))
+                        if org.get("city"):
+                            address_parts.append(org.get("city"))
+                        if org.get("state"):
+                            address_parts.append(org.get("state"))
+                        if org.get("postal_code"):
+                            address_parts.append(org.get("postal_code"))
+                        if org.get("country"):
+                            address_parts.append(org.get("country"))
+                        if address_parts:
+                            enriched_data["organization_address"] = ", ".join(address_parts)
+                    print(f"[INFO] Successfully enriched contact data for {email} from Apollo.io (via search)")
+                    return enriched_data
+                else:
+                    print(f"[DEBUG] Apollo.io /mixed_people/api_search returned 200 but no people in results")
+            elif search_response.status_code == 404:
+                print(f"[DEBUG] Apollo.io /mixed_people/api_search returned 404 - contact not found")
+            elif search_response.status_code == 401:
+                print(f"[ERROR] Apollo.io API authentication failed on search - check your API key")
+                try:
+                    error_data = search_response.json()
+                    print(f"[ERROR] Apollo.io search error details: {error_data}")
+                except:
+                    print(f"[ERROR] Apollo.io search error response: {search_response.text}")
+            else:
+                print(f"[DEBUG] Apollo.io /mixed_people/api_search returned status {search_response.status_code}")
+                try:
+                    error_data = search_response.json()
+                    print(f"[DEBUG] Apollo.io search response: {error_data}")
+                except:
+                    print(f"[DEBUG] Apollo.io search response text: {search_response.text[:500]}")
+            print(f"[INFO] No contact data found in Apollo.io for {email} - contact may not exist in Apollo's database")
+            return None
+    except httpx.HTTPStatusError as e:
+        print(f"[ERROR] Apollo API HTTP error during enrichment: {e.response.status_code} - {e.response.text}")
+        return None
+    except Exception as e:
+        print(f"[ERROR] Failed to enrich contact from Apollo.io: {str(e)}")
+        return None
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

backend/app/auth.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import jwt
 from datetime import datetime, timedelta
@@ -90,3 +91,97 @@ def get_current_user(
     return user

+<<<<<<< HEAD
 import os
 import jwt
 from datetime import datetime, timedelta
     return user
+=======
+import os
+import jwt
+from datetime import datetime, timedelta
+from typing import Optional
+from fastapi import Depends, HTTPException, status
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from sqlalchemy.orm import Session
+from .db import SessionLocal
+from .models import User
+# JWT Configuration
+SECRET_KEY = os.environ.get("JWT_SECRET_KEY", "your-secret-key-change-in-production")
+ALGORITHM = "HS256"
+ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7  # 7 days
+security = HTTPBearer()
+def get_db():
+    """Database dependency."""
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
+    """Create a JWT access token."""
+    to_encode = data.copy()
+    # Ensure 'sub' (subject) is a string, not an integer
+    if "sub" in to_encode:
+        to_encode["sub"] = str(to_encode["sub"])
+    if expires_delta:
+        expire = datetime.utcnow() + expires_delta
+    else:
+        expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
+    to_encode.update({"exp": expire})
+    encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
+    return encoded_jwt
+def verify_token(token: str) -> dict:
+    """Verify and decode a JWT token."""
+    try:
+        payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
+        return payload
+    except jwt.ExpiredSignatureError:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Token has expired",
+        )
+    except jwt.InvalidTokenError:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Could not validate credentials",
+        )
+def get_current_user(
+    credentials: HTTPAuthorizationCredentials = Depends(security),
+    db: Session = Depends(get_db)
+) -> User:
+    """Get the current authenticated user from JWT token."""
+    token = credentials.credentials
+    payload = verify_token(token)
+    user_id: int = payload.get("sub")
+    if user_id is None:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Could not validate credentials",
+        )
+    # Convert user_id back to integer for database query
+    try:
+        user_id_int = int(user_id)
+    except (ValueError, TypeError):
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid user ID in token",
+        )
+    user = db.query(User).filter(User.id == user_id_int).first()
+    if user is None:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="User not found",
+        )
+    return user
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

backend/app/auth_routes.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 from fastapi import APIRouter, Depends, HTTPException, Body
 from pydantic import BaseModel, EmailStr
@@ -345,3 +346,241 @@ async def delete_api_key(
         "message": "API key deactivated successfully"
     }

+<<<<<<< HEAD
 import os
 from fastapi import APIRouter, Depends, HTTPException, Body
 from pydantic import BaseModel, EmailStr
         "message": "API key deactivated successfully"
     }
+=======
+import os
+from fastapi import APIRouter, Depends, HTTPException, Body
+from pydantic import BaseModel, EmailStr
+from sqlalchemy.orm import Session
+from .models import User
+from .auth import create_access_token, get_current_user
+from .firebase_auth import verify_firebase_token
+from .otp_service import request_otp, verify_otp
+from .email_validator import validate_business_email, is_business_email
+from .db import SessionLocal
+def get_db():
+    """Database dependency."""
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+router = APIRouter()
+class FirebaseLoginRequest(BaseModel):
+    id_token: str
+class OTPRequestRequest(BaseModel):
+    email: EmailStr
+class OTPVerifyRequest(BaseModel):
+    email: EmailStr
+    otp: str
+@router.post("/api/auth/firebase/login")
+async def firebase_login(
+    request: FirebaseLoginRequest,
+    db: Session = Depends(get_db)
+):
+    """
+    Login with Firebase ID token.
+    Validates business email and creates/updates user.
+    """
+    try:
+        # Verify Firebase token
+        user_info = await verify_firebase_token(request.id_token)
+        email = user_info.get('email')
+        if not email:
+            raise HTTPException(status_code=400, detail="Email not found in Firebase token")
+        # Validate business email
+        if not is_business_email(email):
+            raise HTTPException(
+                status_code=400,
+                detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
+            )
+        # Get or create user
+        user = db.query(User).filter(
+            (User.email == email.lower()) | (User.firebase_uid == user_info['uid'])
+        ).first()
+        if not user:
+            user = User(
+                email=email.lower(),
+                name=user_info.get('name'),
+                picture=user_info.get('picture'),
+                firebase_uid=user_info['uid'],
+                auth_method='firebase',
+                email_verified=True
+            )
+            db.add(user)
+            db.commit()
+            db.refresh(user)
+            print(f"[INFO] New user created via Firebase: {email}")
+            # Enrich contact data from Apollo.io and update Brevo + Monday.com
+            try:
+                from .apollo_service import enrich_contact_by_email
+                from .brevo_service import create_brevo_contact, BREVO_TRIAL_LIST_ID
+                from .monday_service import create_monday_lead
+                # Enrich contact data from Apollo.io
+                enriched_data = await enrich_contact_by_email(email)
+                # Use enriched data if available, otherwise use basic data
+                first_name = enriched_data.get("first_name") if enriched_data else None
+                last_name = enriched_data.get("last_name") if enriched_data else None
+                org_name = enriched_data.get("organization_name") if enriched_data else None
+                # Fallback to Firebase data if Apollo didn't provide it
+                if not first_name or not last_name:
+                    full_name = user_info.get('name', '')
+                    if full_name:
+                        name_parts = full_name.strip().split(' ', 1)
+                        first_name = first_name or (name_parts[0] if name_parts else None)
+                        last_name = last_name or (name_parts[1] if len(name_parts) > 1 else None)
+                if not org_name:
+                    org_domain = email.split('@')[1] if '@' in email else None
+                    org_name = org_domain.split('.')[0].capitalize() if org_domain else None
+                # Update Brevo contact with enriched data
+                await create_brevo_contact(
+                    email=email,
+                    first_name=first_name,
+                    last_name=last_name,
+                    organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
+                    phone_number=enriched_data.get("phone_number") if enriched_data else None,
+                    linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
+                    title=enriched_data.get("title") if enriched_data else None,
+                    headline=enriched_data.get("headline") if enriched_data else None,
+                    organization_website=enriched_data.get("organization_website") if enriched_data else None,
+                    organization_address=enriched_data.get("organization_address") if enriched_data else None,
+                    list_id=BREVO_TRIAL_LIST_ID
+                )
+                # Create lead in Monday.com
+                await create_monday_lead(
+                    email=email,
+                    first_name=first_name,
+                    last_name=last_name,
+                    phone_number=enriched_data.get("phone_number") if enriched_data else None,
+                    linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
+                    title=enriched_data.get("title") if enriched_data else None,
+                    headline=enriched_data.get("headline") if enriched_data else None,
+                    organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
+                    organization_website=enriched_data.get("organization_website") if enriched_data else None,
+                    organization_address=enriched_data.get("organization_address") if enriched_data else None,
+                )
+            except Exception as e:
+                # Don't fail user creation if integrations fail
+                print(f"[WARNING] Failed to enrich/update contact for {email}: {str(e)}")
+        else:
+            # Update user info
+            user.firebase_uid = user_info['uid']
+            user.email_verified = True
+            user.name = user_info.get('name', user.name)
+            user.picture = user_info.get('picture', user.picture)
+            if user.auth_method != 'firebase':
+                user.auth_method = 'firebase'
+            db.commit()
+            print(f"[INFO] User logged in via Firebase: {email}")
+        # Generate JWT token
+        token = create_access_token(data={"sub": user.id})
+        return {
+            "token": token,
+            "user": {
+                "id": user.id,
+                "email": user.email,
+                "name": user.name,
+                "picture": user.picture,
+                "auth_method": user.auth_method
+            }
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"[ERROR] Firebase login failed: {str(e)}")
+        raise HTTPException(status_code=400, detail=f"Authentication failed: {str(e)}")
+@router.post("/api/auth/otp/request")
+async def request_otp_endpoint(
+    request: OTPRequestRequest,
+    db: Session = Depends(get_db)
+):
+    """
+    Request OTP for email login.
+    Validates business email before sending OTP.
+    """
+    try:
+        # Validate business email
+        validate_business_email(request.email)
+        # Request OTP
+        result = await request_otp(request.email, db)
+        return result
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"[ERROR] OTP request failed: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Failed to send OTP: {str(e)}")
+@router.post("/api/auth/otp/verify")
+async def verify_otp_endpoint(
+    request: OTPVerifyRequest,
+    db: Session = Depends(get_db)
+):
+    """
+    Verify OTP and login.
+    Validates business email and OTP code.
+    """
+    try:
+        # Validate business email
+        validate_business_email(request.email)
+        # Verify OTP
+        user = await verify_otp(request.email, request.otp, db)
+        # Generate JWT token
+        token = create_access_token(data={"sub": user.id})
+        return {
+            "token": token,
+            "user": {
+                "id": user.id,
+                "email": user.email,
+                "name": user.name,
+                "picture": user.picture,
+                "auth_method": user.auth_method
+            }
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"[ERROR] OTP verification failed: {str(e)}")
+        raise HTTPException(status_code=400, detail=f"OTP verification failed: {str(e)}")
+@router.get("/api/auth/me")
+async def get_current_user_info(current_user: User = Depends(get_current_user)):
+    """Get current user information."""
+    return {
+        "id": current_user.id,
+        "email": current_user.email,
+        "name": current_user.name,
+        "picture": current_user.picture,
+        "auth_method": current_user.auth_method,
+    }
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

backend/app/email_validator.py CHANGED Viewed

@@ -1,3 +1,4 @@
 """
 Email validation utilities to ensure only business emails are allowed.
 """
@@ -59,3 +60,66 @@ def validate_business_email(email: str) -> None:
             detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
         )

+<<<<<<< HEAD
 """
 Email validation utilities to ensure only business emails are allowed.
 """
             detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
         )
+=======
+"""
+Email validation utilities to ensure only business emails are allowed.
+"""
+from fastapi import HTTPException
+# List of personal email domains to block
+PERSONAL_EMAIL_DOMAINS = {
+    'gmail.com', 'yahoo.com', 'hotmail.com', 'outlook.com',
+    'aol.com', 'icloud.com', 'mail.com', 'protonmail.com',
+    'yandex.com', 'zoho.com', 'gmx.com', 'live.com', 'msn.com',
+    'me.com', 'mac.com', 'yahoo.co.uk', 'yahoo.co.jp', 'yahoo.fr',
+    'yahoo.de', 'yahoo.it', 'yahoo.es', 'yahoo.in', 'yahoo.com.au',
+    'gmail.co.uk', 'gmail.fr', 'gmail.de', 'gmail.it', 'gmail.es',
+    'gmail.in', 'gmail.com.au', 'hotmail.co.uk', 'hotmail.fr',
+    'hotmail.de', 'hotmail.it', 'hotmail.es', 'outlook.co.uk',
+    'outlook.fr', 'outlook.de', 'outlook.it', 'outlook.es',
+    'rediffmail.com', 'sina.com', 'qq.com', '163.com', '126.com',
+    'mail.ru', 'inbox.com', 'fastmail.com', 'tutanota.com',
+    'hey.com', 'pm.me'
+}
+def is_business_email(email: str) -> bool:
+    """
+    Check if email is a business email (not personal).
+    Args:
+        email: Email address to validate
+    Returns:
+        True if business email, False if personal email
+    """
+    if not email or '@' not in email:
+        return False
+    domain = email.split('@')[1].lower().strip()
+    return domain not in PERSONAL_EMAIL_DOMAINS
+def validate_business_email(email: str) -> None:
+    """
+    Raise exception if email is not a business email.
+    Args:
+        email: Email address to validate
+    Raises:
+        HTTPException: If email is a personal email domain
+    """
+    if not email:
+        raise HTTPException(
+            status_code=400,
+            detail="Email address is required"
+        )
+    if not is_business_email(email):
+        raise HTTPException(
+            status_code=400,
+            detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
+        )
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

backend/app/firebase_auth.py CHANGED Viewed

@@ -1,3 +1,4 @@
 """
 Firebase Authentication utilities.
 """
@@ -90,3 +91,97 @@ async def verify_firebase_token(id_token: str) -> dict:
             detail=f"Firebase authentication failed: {str(e)}"
         )

+<<<<<<< HEAD
 """
 Firebase Authentication utilities.
 """
             detail=f"Firebase authentication failed: {str(e)}"
         )
+=======
+"""
+Firebase Authentication utilities.
+"""
+import os
+import json
+import firebase_admin
+from firebase_admin import auth, credentials
+from fastapi import HTTPException
+# Initialize Firebase Admin SDK
+_firebase_initialized = False
+def initialize_firebase():
+    """Initialize Firebase Admin SDK."""
+    global _firebase_initialized
+    if _firebase_initialized:
+        return
+    if not firebase_admin._apps:
+        # Try to get service account from environment variable (JSON string)
+        service_account_json = os.environ.get("FIREBASE_SERVICE_ACCOUNT_JSON")
+        if service_account_json:
+            try:
+                service_account_info = json.loads(service_account_json)
+                cred = credentials.Certificate(service_account_info)
+                firebase_admin.initialize_app(cred)
+                _firebase_initialized = True
+                print("[INFO] Firebase Admin SDK initialized from environment variable")
+                return
+            except json.JSONDecodeError:
+                print("[WARNING] Failed to parse FIREBASE_SERVICE_ACCOUNT_JSON")
+        # Try to get service account from file path
+        service_account_path = os.environ.get("FIREBASE_SERVICE_ACCOUNT_KEY")
+        if service_account_path and os.path.exists(service_account_path):
+            cred = credentials.Certificate(service_account_path)
+            firebase_admin.initialize_app(cred)
+            _firebase_initialized = True
+            print(f"[INFO] Firebase Admin SDK initialized from file: {service_account_path}")
+            return
+        # Try to use default credentials (for Google Cloud environments)
+        try:
+            firebase_admin.initialize_app()
+            _firebase_initialized = True
+            print("[INFO] Firebase Admin SDK initialized with default credentials")
+            return
+        except Exception as e:
+            print(f"[WARNING] Firebase initialization failed: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="Firebase not configured. Please set FIREBASE_SERVICE_ACCOUNT_JSON or FIREBASE_SERVICE_ACCOUNT_KEY environment variable."
+            )
+async def verify_firebase_token(id_token: str) -> dict:
+    """
+    Verify Firebase ID token and return user info.
+    Args:
+        id_token: Firebase ID token from client
+    Returns:
+        Dictionary with user information (uid, email, name, picture)
+    Raises:
+        HTTPException: If token is invalid
+    """
+    initialize_firebase()
+    try:
+        decoded_token = auth.verify_id_token(id_token)
+        return {
+            'uid': decoded_token['uid'],
+            'email': decoded_token.get('email'),
+            'name': decoded_token.get('name'),
+            'picture': decoded_token.get('picture'),
+        }
+    except ValueError as e:
+        raise HTTPException(
+            status_code=401,
+            detail=f"Invalid Firebase token: {str(e)}"
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=401,
+            detail=f"Firebase authentication failed: {str(e)}"
+        )
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

backend/app/main.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import time
 from typing import List, Dict, Optional
@@ -784,3 +785,747 @@ if os.path.isdir(frontend_dir):
             return FileResponse(index_path)
         from fastapi import HTTPException
         raise HTTPException(status_code=404)

+<<<<<<< HEAD
 import os
 import time
 from typing import List, Dict, Optional
             return FileResponse(index_path)
         from fastapi import HTTPException
         raise HTTPException(status_code=404)
+=======
+import os
+import time
+from typing import List, Dict, Optional
+from fastapi import FastAPI, UploadFile, File, Depends, Form, HTTPException, Body
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from sqlalchemy.orm import Session
+from pydantic import BaseModel
+from .db import Base, engine, SessionLocal
+from .models import ExtractionRecord, User, ShareToken
+from .schemas import ExtractionRecordBase, ExtractionStage
+from .openrouter_client import extract_fields_from_document
+from .auth import get_current_user, get_db
+from .auth_routes import router as auth_router
+# Allowed file types
+ALLOWED_CONTENT_TYPES = [
+    "application/pdf",
+    "image/png",
+    "image/jpeg",
+    "image/jpg",
+    "image/tiff",
+    "image/tif"
+]
+# Allowed file extensions (for fallback validation)
+ALLOWED_EXTENSIONS = [".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"]
+# Maximum file size: 4 MB
+MAX_FILE_SIZE = 4 * 1024 * 1024  # 4 MB in bytes
+# Ensure data dir exists for SQLite
+os.makedirs("data", exist_ok=True)
+# Create tables
+Base.metadata.create_all(bind=engine)
+app = FastAPI(title="Document Capture Demo – Backend")
+# Include auth routes
+app.include_router(auth_router)
+# CORS (for safety we allow all; you can tighten later)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+@app.get("/ping")
+def ping():
+    """Healthcheck."""
+    return {"status": "ok", "message": "backend alive"}
+def make_stages(total_ms: int, status: str) -> Dict[str, ExtractionStage]:
+    """
+    Build synthetic stage timing data for the History UI.
+    For now we just split total_ms into 4 stages.
+    """
+    if total_ms <= 0:
+        total_ms = 1000
+    return {
+        "uploading": ExtractionStage(
+            time=int(total_ms * 0.15),
+            status="completed",
+            variation="normal",
+        ),
+        "aiAnalysis": ExtractionStage(
+            time=int(total_ms * 0.55),
+            status="completed" if status == "completed" else "failed",
+            variation="normal",
+        ),
+        "dataExtraction": ExtractionStage(
+            time=int(total_ms * 0.2),
+            status="completed" if status == "completed" else "skipped",
+            variation="fast",
+        ),
+        "outputRendering": ExtractionStage(
+            time=int(total_ms * 0.1),
+            status="completed" if status == "completed" else "skipped",
+            variation="normal",
+        ),
+    }
+@app.post("/api/extract")
+async def extract_document(
+    file: UploadFile = File(...),
+    key_fields: Optional[str] = Form(None),
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Main extraction endpoint used by the Dashboard.
+    1) Read the uploaded file
+    2) Call OpenRouter + Qwen3-VL
+    3) Store a record in SQLite
+    4) Return extraction result + metadata
+    """
+    start = time.time()
+    content = await file.read()
+    content_type = file.content_type or "application/octet-stream"
+    file_size = len(content)
+    size_mb = file_size / 1024 / 1024
+    size_str = f"{size_mb:.2f} MB"
+    # Convert file content to base64 for storage
+    import base64
+    file_base64 = base64.b64encode(content).decode("utf-8")
+    # Validate file size
+    if file_size > MAX_FILE_SIZE:
+        raise HTTPException(
+            status_code=400,
+            detail=f"File size exceeds 4 MB limit. Your file is {size_mb:.2f} MB."
+        )
+    # Validate file type
+    file_extension = ""
+    if file.filename:
+        file_extension = "." + file.filename.split(".")[-1].lower()
+    is_valid_type = (
+        content_type in ALLOWED_CONTENT_TYPES or
+        file_extension in ALLOWED_EXTENSIONS
+    )
+    if not is_valid_type:
+        raise HTTPException(
+            status_code=400,
+            detail="Only PDF, PNG, JPG, and TIFF files are allowed."
+        )
+    try:
+        print(f"[INFO] Starting extraction for file: {file.filename}, type: {content_type}, size: {size_str}")
+        if key_fields:
+            print(f"[INFO] Key fields requested: {key_fields}")
+        extracted = await extract_fields_from_document(content, content_type, file.filename, key_fields)
+        total_ms = int((time.time() - start) * 1000)
+        print(f"[INFO] Extraction completed. Response keys: {list(extracted.keys())}")
+        print(f"[INFO] Fields extracted: {extracted.get('fields', {})}")
+        confidence = float(extracted.get("confidence", 90))
+        fields = extracted.get("fields", {})
+        # Get Fields from root level (if user provided key_fields)
+        root_fields = extracted.get("Fields", {})
+        # Get full_text for text output
+        full_text = extracted.get("full_text", "")
+        if full_text:
+            full_text_words = len(str(full_text).split())
+            print(f"[INFO] Full text extracted: {full_text_words} words")
+        # Check if fields contain structured data (from table parsing)
+        # If fields is a dict with page_X keys, it's already structured
+        # If fields is empty or simple, add full_text and pages for text display
+        if not fields or (isinstance(fields, dict) and not any(k.startswith("page_") for k in fields.keys())):
+            if full_text:
+                fields["full_text"] = full_text
+            # Also check for pages array
+            pages_data = extracted.get("pages", [])
+            if pages_data and isinstance(pages_data, list):
+                print(f"[INFO] Extracted text from {len(pages_data)} page(s)")
+                fields["pages"] = pages_data
+        # Add Fields at root level if it exists
+        if root_fields:
+            fields["Fields"] = root_fields
+        # Count fields - if structured data exists, count table rows + root Fields
+        if isinstance(fields, dict):
+            # Check if it's structured page data
+            if any(k.startswith("page_") for k in fields.keys()):
+                # Count table rows from all pages
+                table_rows_count = 0
+                for page_key, page_data in fields.items():
+                    if page_key.startswith("page_") and isinstance(page_data, dict):
+                        table_rows = page_data.get("table", [])
+                        if isinstance(table_rows, list):
+                            table_rows_count += len(table_rows)
+                # Count Fields from root level
+                fields_keys = 0
+                if isinstance(root_fields, dict):
+                    fields_keys = len(root_fields)
+                fields_extracted = table_rows_count + fields_keys
+                print(f"[INFO] Structured data: {table_rows_count} table rows, {fields_keys} extracted fields")
+            else:
+                # Regular fields count (excluding full_text, pages, and Fields)
+                fields_extracted = len([k for k in fields.keys() if k not in ["full_text", "pages", "Fields"]])
+                # Add Fields count if it exists
+                if isinstance(root_fields, dict):
+                    fields_extracted += len(root_fields)
+        else:
+            fields_extracted = 0
+        print(f"[INFO] Final stats - confidence: {confidence}, fields_count: {fields_extracted}")
+        status = "completed"
+        error_message = None
+    except Exception as e:
+        import traceback
+        total_ms = int((time.time() - start) * 1000)
+        confidence = 0.0
+        fields = {}
+        fields_extracted = 0
+        status = "failed"
+        error_message = str(e)
+        print(f"[ERROR] Extraction failed: {error_message}")
+        print(f"[ERROR] Traceback: {traceback.format_exc()}")
+    # Save record to DB
+    import json
+    import base64
+    rec = ExtractionRecord(
+        user_id=current_user.id,
+        file_name=file.filename,
+        file_type=content_type,
+        file_size=size_str,
+        status=status,
+        confidence=confidence,
+        fields_extracted=fields_extracted,
+        total_time_ms=total_ms,
+        raw_output=json.dumps(fields),  # Use JSON instead of str() to preserve structure
+        file_base64=file_base64,  # Store base64 encoded file for preview
+        error_message=error_message,
+    )
+    db.add(rec)
+    db.commit()
+    db.refresh(rec)
+    stages = make_stages(total_ms, status)
+    # Response shape that frontend will consume
+    return {
+        "id": rec.id,
+        "fileName": rec.file_name,
+        "fileType": rec.file_type,
+        "fileSize": rec.file_size,
+        "status": status,
+        "confidence": confidence,
+        "fieldsExtracted": fields_extracted,
+        "totalTime": total_ms,
+        "fields": fields,
+        "stages": {k: v.dict() for k, v in stages.items()},
+        "errorMessage": error_message,
+    }
+@app.get("/api/history", response_model=List[ExtractionRecordBase])
+def get_history(
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Used by the History page.
+    Returns last 100 records for the current user, with synthetic stage data.
+    """
+    recs = (
+        db.query(ExtractionRecord)
+        .filter(ExtractionRecord.user_id == current_user.id)
+        .order_by(ExtractionRecord.created_at.desc())
+        .limit(100)
+        .all()
+    )
+    # Deduplicate: if multiple extractions share the same shared_from_extraction_id,
+    # keep only the most recent one (to prevent duplicates when same extraction is shared multiple times)
+    seen_shared_ids = set()
+    deduplicated_recs = []
+    for rec in recs:
+        if rec.shared_from_extraction_id:
+            # This is a shared extraction
+            if rec.shared_from_extraction_id not in seen_shared_ids:
+                seen_shared_ids.add(rec.shared_from_extraction_id)
+                deduplicated_recs.append(rec)
+            # Skip duplicates
+        else:
+            # Original extraction (not shared), always include
+            deduplicated_recs.append(rec)
+    recs = deduplicated_recs
+    output: List[ExtractionRecordBase] = []
+    for r in recs:
+        stages = make_stages(r.total_time_ms or 1000, r.status or "completed")
+        output.append(
+            ExtractionRecordBase(
+                id=r.id,
+                fileName=r.file_name,
+                fileType=r.file_type or "",
+                fileSize=r.file_size or "",
+                extractedAt=r.created_at,
+                status=r.status or "completed",
+                confidence=r.confidence or 0.0,
+                fieldsExtracted=r.fields_extracted or 0,
+                totalTime=r.total_time_ms or 0,
+                stages=stages,
+                errorMessage=r.error_message,
+            )
+        )
+    return output
+@app.get("/api/extraction/{extraction_id}")
+def get_extraction(
+    extraction_id: int,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Get a specific extraction by ID with full fields data.
+    Used when viewing output from History page.
+    """
+    import json
+    rec = (
+        db.query(ExtractionRecord)
+        .filter(
+            ExtractionRecord.id == extraction_id,
+            ExtractionRecord.user_id == current_user.id
+        )
+        .first()
+    )
+    if not rec:
+        from fastapi import HTTPException
+        raise HTTPException(status_code=404, detail="Extraction not found")
+    # Parse the raw_output JSON string back to dict
+    fields = {}
+    if rec.raw_output:
+        try:
+            # Try parsing as JSON first (new format)
+            fields = json.loads(rec.raw_output)
+        except (json.JSONDecodeError, TypeError):
+            # If that fails, try using ast.literal_eval for old str() format (backward compatibility)
+            try:
+                import ast
+                # Only use literal_eval if it looks like a Python dict string
+                if rec.raw_output.strip().startswith('{'):
+                    fields = ast.literal_eval(rec.raw_output)
+                else:
+                    fields = {}
+            except:
+                fields = {}
+    stages = make_stages(rec.total_time_ms or 1000, rec.status or "completed")
+    return {
+        "id": rec.id,
+        "fileName": rec.file_name,
+        "fileType": rec.file_type or "",
+        "fileSize": rec.file_size or "",
+        "status": rec.status or "completed",
+        "confidence": rec.confidence or 0.0,
+        "fieldsExtracted": rec.fields_extracted or 0,
+        "totalTime": rec.total_time_ms or 0,
+        "fields": fields,
+        "fileBase64": rec.file_base64,  # Include base64 encoded file for preview
+        "stages": {k: v.dict() for k, v in stages.items()},
+        "errorMessage": rec.error_message,
+    }
+@app.post("/api/share")
+async def share_extraction(
+    extraction_id: int = Body(...),
+    recipient_emails: List[str] = Body(...),
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Share an extraction with one or more users via email.
+    Creates share tokens and sends emails to recipients.
+    """
+    import secrets
+    from datetime import datetime, timedelta
+    from .brevo_service import send_share_email
+    from .email_validator import validate_business_email
+    # Validate recipient emails list
+    if not recipient_emails or len(recipient_emails) == 0:
+        raise HTTPException(status_code=400, detail="At least one recipient email is required")
+    # Validate each recipient email is a business email
+    for email in recipient_emails:
+        try:
+            validate_business_email(email)
+        except HTTPException:
+            raise  # Re-raise HTTPException from validate_business_email
+    # Get the extraction record
+    extraction = (
+        db.query(ExtractionRecord)
+        .filter(
+            ExtractionRecord.id == extraction_id,
+            ExtractionRecord.user_id == current_user.id
+        )
+        .first()
+    )
+    if not extraction:
+        raise HTTPException(status_code=404, detail="Extraction not found")
+    # Generate share link base URL
+    base_url = os.environ.get("VITE_API_BASE_URL", "https://seth0330-ezofisocr.hf.space")
+    # Process each recipient email
+    successful_shares = []
+    failed_shares = []
+    share_records = []
+    for recipient_email in recipient_emails:
+        recipient_email = recipient_email.strip().lower()
+        # Generate secure share token for this recipient
+        share_token = secrets.token_urlsafe(32)
+        # Create share token record (expires in 30 days)
+        expires_at = datetime.utcnow() + timedelta(days=30)
+        share_record = ShareToken(
+            token=share_token,
+            extraction_id=extraction_id,
+            sender_user_id=current_user.id,
+            recipient_email=recipient_email,
+            expires_at=expires_at,
+        )
+        db.add(share_record)
+        share_records.append((share_record, share_token, recipient_email))
+    # Commit all share tokens
+    try:
+        db.commit()
+        for share_record, share_token, recipient_email in share_records:
+            db.refresh(share_record)
+    except Exception as e:
+        db.rollback()
+        raise HTTPException(status_code=500, detail=f"Failed to create share tokens: {str(e)}")
+    # Send emails to all recipients
+    for share_record, share_token, recipient_email in share_records:
+        share_link = f"{base_url}/share/{share_token}"
+        try:
+            # Get sender's name from current_user, fallback to None if not available
+            sender_name = current_user.name if current_user.name else None
+            await send_share_email(recipient_email, current_user.email, share_link, sender_name)
+            successful_shares.append(recipient_email)
+        except Exception as e:
+            # Log error but continue with other emails
+            print(f"[ERROR] Failed to send share email to {recipient_email}: {str(e)}")
+            failed_shares.append(recipient_email)
+            # Optionally, you could delete the share token if email fails
+            # db.delete(share_record)
+    # Build response message
+    if len(failed_shares) == 0:
+        message = f"Extraction shared successfully with {len(successful_shares)} recipient(s)"
+    elif len(successful_shares) == 0:
+        raise HTTPException(status_code=500, detail=f"Failed to send share emails to all recipients")
+    else:
+        message = f"Extraction shared with {len(successful_shares)} recipient(s). Failed to send to: {', '.join(failed_shares)}"
+    return {
+        "success": True,
+        "message": message,
+        "successful_count": len(successful_shares),
+        "failed_count": len(failed_shares),
+        "successful_emails": successful_shares,
+        "failed_emails": failed_shares if failed_shares else None
+    }
+class ShareLinkRequest(BaseModel):
+    extraction_id: int
+@app.post("/api/share/link")
+async def create_share_link(
+    request: ShareLinkRequest,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Create a shareable link for an extraction without requiring recipient emails.
+    Returns a share link that can be copied and shared manually.
+    """
+    import secrets
+    from datetime import datetime, timedelta
+    # Get the extraction record
+    extraction = (
+        db.query(ExtractionRecord)
+        .filter(
+            ExtractionRecord.id == request.extraction_id,
+            ExtractionRecord.user_id == current_user.id
+        )
+        .first()
+    )
+    if not extraction:
+        raise HTTPException(status_code=404, detail="Extraction not found")
+    # Generate secure share token
+    share_token = secrets.token_urlsafe(32)
+    # Create share token record (expires in 30 days, no specific recipient)
+    expires_at = datetime.utcnow() + timedelta(days=30)
+    share_record = ShareToken(
+        token=share_token,
+        extraction_id=request.extraction_id,
+        sender_user_id=current_user.id,
+        recipient_email=None,  # None for public share links (copyable links)
+        expires_at=expires_at,
+    )
+    db.add(share_record)
+    db.commit()
+    db.refresh(share_record)
+    # Generate share link
+    base_url = os.environ.get("VITE_API_BASE_URL", "https://seth0330-ezofisocr.hf.space")
+    share_link = f"{base_url}/share/{share_token}"
+    return {
+        "success": True,
+        "share_link": share_link,
+        "share_token": share_token,
+        "expires_at": expires_at.isoformat() if expires_at else None
+    }
+@app.get("/api/share/{token}")
+async def access_shared_extraction(
+    token: str,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Access a shared extraction and copy it to the current user's account.
+    This endpoint is called after the user logs in via the share link.
+    """
+    from datetime import datetime
+    import json
+    # Find the share token
+    share = (
+        db.query(ShareToken)
+        .filter(ShareToken.token == token)
+        .first()
+    )
+    if not share:
+        raise HTTPException(status_code=404, detail="Share link not found or expired")
+    # Check if token is expired
+    if share.expires_at and share.expires_at < datetime.utcnow():
+        raise HTTPException(status_code=410, detail="Share link has expired")
+    # Get the original extraction
+    original_extraction = (
+        db.query(ExtractionRecord)
+        .filter(ExtractionRecord.id == share.extraction_id)
+        .first()
+    )
+    if not original_extraction:
+        raise HTTPException(status_code=404, detail="Original extraction not found")
+    # Check if already copied for this user (check by share token to prevent duplicates from same share)
+    # Also check if this specific share token was already used by this user
+    if share.accessed and share.accessed_by_user_id == current_user.id:
+        # This share token was already used by this user, find the extraction
+        existing_copy = (
+            db.query(ExtractionRecord)
+            .filter(
+                ExtractionRecord.user_id == current_user.id,
+                ExtractionRecord.shared_from_extraction_id == original_extraction.id
+            )
+            .order_by(ExtractionRecord.created_at.desc())
+            .first()
+        )
+        if existing_copy:
+            return {
+                "success": True,
+                "extraction_id": existing_copy.id,
+                "message": "Extraction already shared with you"
+            }
+    # Also check if any copy exists for this user from this original extraction
+    existing_copy = (
+        db.query(ExtractionRecord)
+        .filter(
+            ExtractionRecord.user_id == current_user.id,
+            ExtractionRecord.shared_from_extraction_id == original_extraction.id
+        )
+        .first()
+    )
+    if existing_copy:
+        # Already copied, mark this share as accessed and return existing extraction ID
+        share.accessed = True
+        share.accessed_at = datetime.utcnow()
+        share.accessed_by_user_id = current_user.id
+        db.commit()
+        return {
+            "success": True,
+            "extraction_id": existing_copy.id,
+            "message": "Extraction already shared with you"
+        }
+    # Copy extraction to current user's account
+    # Parse the raw_output JSON string back to dict
+    fields = {}
+    if original_extraction.raw_output:
+        try:
+            fields = json.loads(original_extraction.raw_output)
+        except (json.JSONDecodeError, TypeError):
+            try:
+                import ast
+                if original_extraction.raw_output.strip().startswith('{'):
+                    fields = ast.literal_eval(original_extraction.raw_output)
+                else:
+                    fields = {}
+            except:
+                fields = {}
+    # Create new extraction record for the recipient
+    new_extraction = ExtractionRecord(
+        user_id=current_user.id,
+        file_name=original_extraction.file_name,
+        file_type=original_extraction.file_type,
+        file_size=original_extraction.file_size,
+        status=original_extraction.status or "completed",
+        confidence=original_extraction.confidence or 0.0,
+        fields_extracted=original_extraction.fields_extracted or 0,
+        total_time_ms=original_extraction.total_time_ms or 0,
+        raw_output=original_extraction.raw_output,  # Copy the JSON string
+        file_base64=original_extraction.file_base64,  # Copy the base64 file
+        shared_from_extraction_id=original_extraction.id,
+        shared_by_user_id=share.sender_user_id,
+    )
+    db.add(new_extraction)
+    # Mark share as accessed
+    share.accessed = True
+    share.accessed_at = datetime.utcnow()
+    share.accessed_by_user_id = current_user.id
+    db.commit()
+    db.refresh(new_extraction)
+    return {
+        "success": True,
+        "extraction_id": new_extraction.id,
+        "message": "Extraction shared successfully"
+    }
+# Static frontend mounting (used after we build React)
+# Dockerfile copies the Vite build into backend/frontend_dist
+# IMPORTANT: API routes must be defined BEFORE this so they take precedence
+frontend_dir = os.path.join(
+    os.path.dirname(os.path.dirname(__file__)), "frontend_dist"
+)
+if os.path.isdir(frontend_dir):
+    # Serve static files (JS, CSS, images, etc.) from assets directory
+    assets_dir = os.path.join(frontend_dir, "assets")
+    if os.path.isdir(assets_dir):
+        app.mount(
+            "/assets",
+            StaticFiles(directory=assets_dir),
+            name="assets",
+        )
+    # Serve static files from root (logo.png, favicon.ico, etc.)
+    # Files in public/ directory are copied to dist/ root during Vite build
+    # These routes must be defined BEFORE the catch-all route
+    @app.get("/logo.png")
+    async def serve_logo():
+        """Serve logo.png from frontend_dist root."""
+        from fastapi.responses import FileResponse
+        logo_path = os.path.join(frontend_dir, "logo.png")
+        if os.path.exists(logo_path):
+            return FileResponse(logo_path, media_type="image/png")
+        from fastapi import HTTPException
+        raise HTTPException(status_code=404)
+    @app.get("/favicon.ico")
+    async def serve_favicon():
+        """Serve favicon.ico from frontend_dist root."""
+        from fastapi.responses import FileResponse
+        favicon_path = os.path.join(frontend_dir, "favicon.ico")
+        if os.path.exists(favicon_path):
+            return FileResponse(favicon_path, media_type="image/x-icon")
+        from fastapi import HTTPException
+        raise HTTPException(status_code=404)
+    # Catch-all route to serve index.html for React Router
+    # This must be last so API routes and static files are matched first
+    @app.get("/{full_path:path}")
+    async def serve_frontend(full_path: str):
+        """
+        Serve React app for all non-API routes.
+        React Router will handle client-side routing.
+        """
+        # Skip API routes, docs, static assets, and known static files
+        if (full_path.startswith("api/") or
+            full_path.startswith("docs") or
+            full_path.startswith("openapi.json") or
+            full_path.startswith("assets/") or
+            full_path in ["logo.png", "favicon.ico"]):
+            from fastapi import HTTPException
+            raise HTTPException(status_code=404)
+        # Serve index.html for all other routes (React Router will handle routing)
+        from fastapi.responses import FileResponse
+        index_path = os.path.join(frontend_dir, "index.html")
+        if os.path.exists(index_path):
+            return FileResponse(index_path)
+        from fastapi import HTTPException
+        raise HTTPException(status_code=404)
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

backend/app/models.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from sqlalchemy import Column, Integer, String, Float, DateTime, Text, ForeignKey, Boolean
 from sqlalchemy.orm import relationship
 from sqlalchemy.sql import func
@@ -134,3 +135,108 @@ class APIKey(Base):
         "User",
         back_populates="api_keys"
     )

+<<<<<<< HEAD
 from sqlalchemy import Column, Integer, String, Float, DateTime, Text, ForeignKey, Boolean
 from sqlalchemy.orm import relationship
 from sqlalchemy.sql import func
         "User",
         back_populates="api_keys"
     )
+=======
+from sqlalchemy import Column, Integer, String, Float, DateTime, Text, ForeignKey, Boolean
+from sqlalchemy.orm import relationship
+from sqlalchemy.sql import func
+from .db import Base
+class User(Base):
+    """
+    Stores user information from Firebase or OTP authentication.
+    """
+    __tablename__ = "users"
+    id = Column(Integer, primary_key=True, index=True)
+    email = Column(String, unique=True, index=True, nullable=False)
+    name = Column(String, nullable=True)
+    picture = Column(String, nullable=True)
+    # Auth method: 'firebase' or 'otp'
+    auth_method = Column(String, default='firebase')
+    # Firebase-specific
+    firebase_uid = Column(String, unique=True, index=True, nullable=True)
+    # OTP-specific
+    email_verified = Column(Boolean, default=False)
+    created_at = Column(
+        DateTime(timezone=True),
+        server_default=func.now(),
+    )
+    # Relationship to extraction records (explicitly specify user_id as the foreign key)
+    # Note: primaryjoin must be specified because ExtractionRecord has multiple foreign keys to User
+    extractions = relationship(
+        "ExtractionRecord",
+        back_populates="user",
+        primaryjoin="User.id == ExtractionRecord.user_id"
+    )
+class ExtractionRecord(Base):
+    """
+    Stores one extraction run so the History page can show past jobs.
+    We'll fill it from the /api/extract endpoint later.
+    """
+    __tablename__ = "extractions"
+    id = Column(Integer, primary_key=True, index=True)
+    user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
+    file_name = Column(String, index=True)
+    file_type = Column(String)
+    file_size = Column(String)
+    status = Column(String)              # "completed" | "failed"
+    confidence = Column(Float)           # overall confidence (0–100)
+    fields_extracted = Column(Integer)   # number of fields extracted
+    total_time_ms = Column(Integer)      # total processing time in ms
+    raw_output = Column(Text)            # JSON string from the model
+    file_base64 = Column(Text, nullable=True)  # Base64 encoded original file for preview
+    error_message = Column(Text, nullable=True)
+    created_at = Column(
+        DateTime(timezone=True),
+        server_default=func.now(),
+    )
+    # Relationship to user (explicitly specify user_id as the foreign key)
+    # Note: primaryjoin must be specified because ExtractionRecord has multiple foreign keys to User
+    user = relationship(
+        "User",
+        back_populates="extractions",
+        primaryjoin="ExtractionRecord.user_id == User.id"
+    )
+    # Track if this extraction was shared (original extraction ID)
+    shared_from_extraction_id = Column(Integer, ForeignKey("extractions.id"), nullable=True, index=True)
+    shared_by_user_id = Column(Integer, ForeignKey("users.id"), nullable=True, index=True)
+class ShareToken(Base):
+    """
+    Stores share tokens for sharing extractions with other users.
+    """
+    __tablename__ = "share_tokens"
+    id = Column(Integer, primary_key=True, index=True)
+    token = Column(String, unique=True, index=True, nullable=False)  # Unique share token
+    extraction_id = Column(Integer, ForeignKey("extractions.id"), nullable=False, index=True)
+    sender_user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
+    recipient_email = Column(String, nullable=True, index=True)  # Nullable for public share links
+    expires_at = Column(DateTime(timezone=True), nullable=True)  # Optional expiration
+    accessed = Column(Boolean, default=False)  # Track if link was accessed
+    accessed_at = Column(DateTime(timezone=True), nullable=True)
+    accessed_by_user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
+    created_at = Column(
+        DateTime(timezone=True),
+        server_default=func.now(),
+    )
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

backend/app/monday_service.py CHANGED Viewed

@@ -1,3 +1,4 @@
 """
 Monday.com API service for creating leads with automatic field matching.
 Reference: https://developer.monday.com/api-reference/docs
@@ -389,3 +390,396 @@ async def create_monday_lead(
         print(f"[ERROR] Failed to create Monday.com lead: {str(e)}")
         return False

+<<<<<<< HEAD
 """
 Monday.com API service for creating leads with automatic field matching.
 Reference: https://developer.monday.com/api-reference/docs
         print(f"[ERROR] Failed to create Monday.com lead: {str(e)}")
         return False
+=======
+"""
+Monday.com API service for creating leads with automatic field matching.
+Reference: https://developer.monday.com/api-reference/docs
+"""
+import os
+import httpx
+import json
+from typing import Optional, Dict, Any, List, Tuple
+from difflib import SequenceMatcher
+MONDAY_API_KEY = os.environ.get("MONDAY_API_KEY", "")
+MONDAY_API_URL = "https://api.monday.com/v2"
+MONDAY_BOARD_ID = os.environ.get("MONDAY_BOARD_ID", None)  # Your "New Leads" board ID
+# Cache for board columns to avoid repeated API calls
+_board_columns_cache: Dict[str, List[Dict[str, Any]]] = {}
+def _calculate_similarity(str1: str, str2: str) -> float:
+    """
+    Calculate similarity between two strings using SequenceMatcher.
+    Returns a value between 0.0 and 1.0.
+    """
+    return SequenceMatcher(None, str1.lower(), str2.lower()).ratio()
+def _find_best_column_match(
+    field_name: str,
+    available_columns: List[Dict[str, Any]],
+    min_similarity: float = 0.3
+) -> Optional[Tuple[str, str, float]]:
+    """
+    Find the best matching column for a field name using semantic similarity.
+    Args:
+        field_name: The field name to match (e.g., "first_name", "email")
+        available_columns: List of column dicts with 'id' and 'title' keys
+        min_similarity: Minimum similarity threshold (0.0 to 1.0)
+    Returns:
+        Tuple of (column_id, column_title, similarity_score) or None if no match found
+    """
+    best_match = None
+    best_score = 0.0
+    # Normalize field name for matching
+    normalized_field = field_name.lower().replace("_", " ").replace("-", " ")
+    # Common field name variations
+    field_variations = [
+        normalized_field,
+        field_name.lower(),
+        field_name.replace("_", ""),
+    ]
+    # Add common synonyms
+    synonyms = {
+        "first_name": ["first name", "firstname", "fname", "given name"],
+        "last_name": ["last name", "lastname", "lname", "surname", "family name"],
+        "email": ["email address", "email", "e-mail", "mail"],
+        "phone_number": ["phone", "phone number", "telephone", "mobile", "cell"],
+        "linkedin_url": ["linkedin", "linkedin profile", "linkedin url", "linkedin link"],
+        "title": ["job title", "position", "role", "job"],
+        "headline": ["headline", "tagline", "bio"],
+        "organization_name": ["company", "organization", "org", "company name", "employer"],
+        "organization_website": ["website", "company website", "url", "web"],
+        "organization_address": ["address", "company address", "location"],
+    }
+    if field_name in synonyms:
+        field_variations.extend(synonyms[field_name])
+    for column in available_columns:
+        column_title = column.get("title", "").lower()
+        column_id = column.get("id", "")
+        if not column_title or not column_id:
+            continue
+        # Calculate similarity for each variation
+        for variation in field_variations:
+            score = _calculate_similarity(variation, column_title)
+            if score > best_score:
+                best_score = score
+                best_match = (column_id, column.get("title", ""), score)
+    if best_match and best_score >= min_similarity:
+        return best_match
+    return None
+async def _get_board_columns(board_id: str) -> List[Dict[str, Any]]:
+    """
+    Fetch board columns from Monday.com API.
+    Args:
+        board_id: Monday.com board ID
+    Returns:
+        List of column dictionaries with 'id', 'title', and 'type' keys
+    """
+    # Check cache first
+    if board_id in _board_columns_cache:
+        return _board_columns_cache[board_id]
+    if not MONDAY_API_KEY:
+        print("[WARNING] MONDAY_API_KEY not set, cannot fetch board columns")
+        return []
+    query = """
+    query ($boardId: ID!) {
+        boards(ids: [$boardId]) {
+            columns {
+                id
+                title
+                type
+            }
+        }
+    }
+    """
+    headers = {
+        "Authorization": MONDAY_API_KEY,
+        "Content-Type": "application/json"
+    }
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                MONDAY_API_URL,
+                json={
+                    "query": query,
+                    "variables": {"boardId": board_id}
+                },
+                headers=headers
+            )
+            if response.status_code == 200:
+                result = response.json()
+                if result.get("data") and result["data"].get("boards"):
+                    boards = result["data"]["boards"]
+                    if boards and boards[0].get("columns"):
+                        columns = boards[0]["columns"]
+                        # Cache the result
+                        _board_columns_cache[board_id] = columns
+                        print(f"[INFO] Fetched {len(columns)} columns from Monday.com board {board_id}")
+                        return columns
+                elif result.get("errors"):
+                    print(f"[ERROR] Failed to fetch board columns: {result['errors']}")
+            else:
+                print(f"[ERROR] Failed to fetch board columns: {response.status_code} - {response.text}")
+    except Exception as e:
+        print(f"[ERROR] Exception while fetching board columns: {str(e)}")
+    return []
+def _format_column_value(value: Any, column_type: str, column_id: Optional[str] = None) -> Any:
+    """
+    Format a value according to Monday.com column type.
+    Args:
+        value: The value to format
+        column_type: Monday.com column type (email, phone, link, text, etc.)
+        column_id: Column ID (for special handling)
+    Returns:
+        For email/phone/link: Python dict object
+        For text/other types: Plain string
+    """
+    if value is None:
+        return ""
+    value_str = str(value)
+    if column_type == "email":
+        # Monday.com email format requires dict object (will be JSON encoded later)
+        return {"email": value_str, "text": value_str}
+    elif column_type == "phone":
+        return {"phone": value_str, "countryShortName": "US"}
+    elif column_type == "link":
+        # If it's already a URL, use it; otherwise create a link
+        if value_str.startswith("http://") or value_str.startswith("https://"):
+            return {"url": value_str, "text": value_str}
+        else:
+            return {"url": f"https://{value_str}", "text": value_str}
+    else:
+        # Text, status, and other types - just return the string
+        return value_str
+async def create_monday_lead(
+    email: str,
+    first_name: Optional[str] = None,
+    last_name: Optional[str] = None,
+    phone_number: Optional[str] = None,
+    linkedin_url: Optional[str] = None,
+    title: Optional[str] = None,
+    headline: Optional[str] = None,
+    organization_name: Optional[str] = None,
+    organization_website: Optional[str] = None,
+    organization_address: Optional[str] = None,
+    board_id: Optional[str] = None
+) -> bool:
+    """
+    Create a new lead item in Monday.com board.
+    Args:
+        email: Contact email address (required)
+        first_name: Contact first name
+        last_name: Contact last name
+        phone_number: Phone number
+        linkedin_url: LinkedIn profile URL
+        title: Job title
+        headline: Professional headline
+        organization_name: Company name
+        organization_website: Company website
+        organization_address: Company address
+        board_id: Monday.com board ID as string (defaults to MONDAY_BOARD_ID env var)
+    Returns:
+        True if lead created successfully, False otherwise
+    """
+    if not MONDAY_API_KEY:
+        print("[WARNING] MONDAY_API_KEY not set, skipping Monday.com lead creation")
+        return False
+    target_board_id = board_id or MONDAY_BOARD_ID
+    if not target_board_id:
+        print("[WARNING] MONDAY_BOARD_ID not set, skipping Monday.com lead creation")
+        return False
+    # Prepare item name (use full name or email)
+    item_name = email
+    if first_name and last_name:
+        item_name = f"{first_name} {last_name}"
+    elif first_name:
+        item_name = first_name
+    elif last_name:
+        item_name = last_name
+    # Fetch board columns to automatically match fields
+    print(f"[INFO] Fetching Monday.com board columns for automatic field matching...")
+    board_columns = await _get_board_columns(str(target_board_id))
+    if not board_columns:
+        print("[WARNING] Could not fetch board columns, skipping Monday.com lead creation")
+        return False
+    # Create a mapping of column IDs to column types for formatting
+    column_types = {col["id"]: col.get("type", "text") for col in board_columns}
+    # Prepare data fields to map
+    data_fields = {
+        "email": email,
+        "first_name": first_name,
+        "last_name": last_name,
+        "phone_number": phone_number,
+        "linkedin_url": linkedin_url,
+        "title": title,
+        "headline": headline,
+        "organization_name": organization_name,
+        "organization_website": organization_website,
+        "organization_address": organization_address,
+    }
+    # Automatically match fields to columns using semantic similarity
+    column_values = {}
+    matched_fields = []
+    # Track which columns have been matched to handle duplicates (e.g., first_name and last_name -> Name)
+    column_matches = {}  # column_id -> (field_name, value)
+    for field_name, field_value in data_fields.items():
+        if not field_value:
+            continue
+        match = _find_best_column_match(field_name, board_columns)
+        if match:
+            column_id, column_title, similarity = match
+            column_type = column_types.get(column_id, "text")
+            # Handle special case: if first_name and last_name both match to the same "Name" column
+            if column_id in column_matches:
+                existing_field, existing_value = column_matches[column_id]
+                # If both first_name and last_name match to the same column, combine them
+                if (field_name in ["first_name", "last_name"] and
+                    existing_field in ["first_name", "last_name"] and
+                    field_name != existing_field):
+                    # Combine first and last name
+                    if field_name == "first_name":
+                        combined_value = f"{field_value} {existing_value}"
+                    else:
+                        combined_value = f"{existing_value} {field_value}"
+                    formatted_value = _format_column_value(combined_value, column_type, column_id)
+                    column_values[column_id] = formatted_value
+                    matched_fields.append(f"{existing_field}+{field_name} -> {column_title} (combined)")
+                    print(f"[INFO] Combined '{existing_field}' and '{field_name}' to column '{column_title}' (ID: {column_id})")
+                    continue
+                else:
+                    # Different fields matching to same column - use the one with higher similarity
+                    print(f"[DEBUG] Column '{column_title}' already matched to '{existing_field}', skipping '{field_name}'")
+                    continue
+            formatted_value = _format_column_value(field_value, column_type, column_id)
+            column_values[column_id] = formatted_value
+            column_matches[column_id] = (field_name, field_value)
+            matched_fields.append(f"{field_name} -> {column_title} (similarity: {similarity:.2f})")
+            print(f"[INFO] Matched '{field_name}' to column '{column_title}' (ID: {column_id}, type: {column_type}, value: {formatted_value[:100] if len(str(formatted_value)) > 100 else formatted_value})")
+        else:
+            print(f"[DEBUG] No suitable column match found for '{field_name}' (skipping)")
+    if not column_values:
+        print("[WARNING] No fields could be matched to board columns")
+        return False
+    print(f"[INFO] Successfully matched {len(matched_fields)} fields to Monday.com columns")
+    # Convert column_values to JSON string for GraphQL mutation
+    # Monday.com expects column values as a JSON string where:
+    # - Text columns: plain string values
+    # - Email/Phone/Link columns: dict objects (properly JSON encoded)
+    column_values_json = json.dumps(column_values)
+    print(f"[DEBUG] Monday.com column_values JSON: {column_values_json[:500]}")
+    # GraphQL mutation
+    # Note: Monday.com uses ID! (string) type for board_id, not Int!
+    mutation = """
+    mutation ($boardId: ID!, $itemName: String!, $columnValues: JSON!) {
+        create_item (board_id: $boardId, item_name: $itemName, column_values: $columnValues) {
+            id
+        }
+    }
+    """
+    # Convert board_id to string (Monday.com expects ID! which is a string)
+    board_id_str = str(target_board_id)
+    variables = {
+        "boardId": board_id_str,
+        "itemName": item_name,
+        "columnValues": column_values_json
+    }
+    headers = {
+        "Authorization": MONDAY_API_KEY,
+        "Content-Type": "application/json"
+    }
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                MONDAY_API_URL,
+                json={
+                    "query": mutation,
+                    "variables": variables
+                },
+                headers=headers
+            )
+            if response.status_code == 200:
+                result = response.json()
+                if result.get("data") and result["data"].get("create_item"):
+                    item_id = result["data"]["create_item"].get("id")
+                    print(f"[INFO] Successfully created Monday.com lead: {item_name} (ID: {item_id})")
+                    return True
+                elif result.get("errors"):
+                    errors = result.get("errors", [])
+                    for error in errors:
+                        error_msg = error.get("message", "Unknown error")
+                        error_path = error.get("path", [])
+                        print(f"[ERROR] Monday.com API error: {error_msg}")
+                        if error_path:
+                            print(f"[ERROR] Error path: {error_path}")
+                    # Log full error for debugging
+                    print(f"[DEBUG] Full Monday.com error response: {json.dumps(errors, indent=2)}")
+                    return False
+                else:
+                    print(f"[ERROR] Unexpected Monday.com API response: {result}")
+                    return False
+            else:
+                error_data = response.text
+                print(f"[ERROR] Failed to create Monday.com lead: {response.status_code} - {error_data}")
+                return False
+    except httpx.HTTPStatusError as e:
+        print(f"[ERROR] Monday.com API HTTP error: {e.response.status_code} - {e.response.text}")
+        return False
+    except Exception as e:
+        print(f"[ERROR] Failed to create Monday.com lead: {str(e)}")
+        return False
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

backend/app/openrouter_client.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import base64
 import json
@@ -860,3 +861,867 @@ async def extract_fields_from_document(
         return_obj["Fields"] = extracted_fields
     return return_obj

+<<<<<<< HEAD
 import os
 import base64
 import json
         return_obj["Fields"] = extracted_fields
     return return_obj
+=======
+import os
+import base64
+import json
+import re
+import time
+import asyncio
+from io import BytesIO
+from typing import Any, Dict, List, Optional, Tuple
+import httpx
+try:
+    import fitz  # PyMuPDF
+    from PIL import Image
+    PDF_SUPPORT = True
+except ImportError as e:
+    PDF_SUPPORT = False
+    print(f"[WARNING] PDF support libraries not available: {e}. PDF conversion will not work.")
+# RunPod Serverless OCR Configuration
+RUNPOD_ENDPOINT = os.environ.get("RUNPOD_ENDPOINT", "https://api.runpod.ai/v2/j2jvf8t6n0rk5c/run")
+RUNPOD_API_KEY = os.environ.get("RUNPOD_API_KEY", "rpa_0UJOK33ZO7SID9B3ASFSKKPUHNPBQC5Z2128RB4O4qi9ts")
+# Extract endpoint ID from endpoint URL for status polling
+# URL format: https://api.runpod.ai/v2/{endpoint_id}/run
+_endpoint_id = RUNPOD_ENDPOINT.split("/v2/")[1].split("/")[0] if "/v2/" in RUNPOD_ENDPOINT else None
+RUNPOD_STATUS_ENDPOINT = f"https://api.runpod.ai/v2/{_endpoint_id}/status" if _endpoint_id else None
+def _pdf_to_images(pdf_bytes: bytes) -> List[bytes]:
+    """
+    Convert PDF pages to PNG images.
+    Returns a list of PNG image bytes, one per page.
+    """
+    if not PDF_SUPPORT:
+        raise RuntimeError("PyMuPDF not installed. Cannot convert PDF to images.")
+    pdf_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+    images = []
+    print(f"[INFO] PDF has {len(pdf_doc)} page(s)")
+    for page_num in range(len(pdf_doc)):
+        page = pdf_doc[page_num]
+        # Render page to image (zoom factor 2 for better quality)
+        mat = fitz.Matrix(2.0, 2.0)  # 2x zoom for better quality
+        pix = page.get_pixmap(matrix=mat)
+        # Convert to PIL Image
+        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        # Resize if too large to avoid GPU memory issues (max 1920px on longest side)
+        max_size = 1920
+        w, h = img.size
+        if w > max_size or h > max_size:
+            if w > h:
+                new_w = max_size
+                new_h = int(h * (max_size / w))
+            else:
+                new_h = max_size
+                new_w = int(w * (max_size / h))
+            img = img.resize((new_w, new_h), Image.LANCZOS)
+            print(f"[INFO] Resized page {page_num + 1} from {w}x{h} to {new_w}x{new_h}")
+        else:
+            print(f"[INFO] Converted page {page_num + 1} to image ({w}x{h})")
+        # Convert to JPEG bytes (better compression)
+        img_bytes = BytesIO()
+        img.save(img_bytes, format="JPEG", quality=95)
+        images.append(img_bytes.getvalue())
+    pdf_doc.close()
+    return images
+def _image_bytes_to_base64(image_bytes: bytes) -> str:
+    """Convert image bytes to base64 data URL (JPEG format)."""
+    b64 = base64.b64encode(image_bytes).decode("utf-8")
+    data_url = f"data:image/jpeg;base64,{b64}"
+    print(f"[DEBUG] Base64 encoded image: {len(image_bytes)} bytes -> {len(data_url)} chars")
+    return data_url
+def _parse_markdown_table(text: str) -> Optional[Tuple[List[str], List[List[str]]]]:
+    """
+    Parse a markdown table from text.
+    Returns (headers, rows) if table found, None otherwise.
+    Handles various table formats including malformed ones.
+    """
+    lines = [line.strip() for line in text.split('\n')]
+    # Find potential table start (line with multiple | and actual text content)
+    table_start = None
+    for i, line in enumerate(lines):
+        if '|' in line and line.count('|') >= 2:
+            # Skip separator lines (only |, -, :, spaces)
+            if re.match(r'^[\s\|\-:]+$', line):
+                continue
+            # Check if line has meaningful text (not just | characters)
+            cells = [cell.strip() for cell in line.split('|')]
+            if cells and not cells[0]:
+                cells = cells[1:]
+            if cells and not cells[-1]:
+                cells = cells[:-1]
+            # Must have at least 2 columns with some text
+            meaningful_cells = [c for c in cells if len(c) > 0]
+            if len(meaningful_cells) >= 2:
+                table_start = i
+                break
+    if table_start is None:
+        return None
+    # Find table end (first non-empty line without | after table start)
+    table_end = None
+    for i in range(table_start + 1, len(lines)):
+        line = lines[i]
+        if not line:  # Empty line, continue
+            continue
+        if '|' not in line:
+            # Non-empty line without | means table ended
+            table_end = i
+            break
+    if table_end is None:
+        table_end = len(lines)
+    table_lines = lines[table_start:table_end]
+    # Find the actual header row (should have meaningful text, not just | or separators)
+    headers = None
+    header_idx = None
+    for i, line in enumerate(table_lines):
+        if not line or '|' not in line:
+            continue
+        # Skip separator lines (lines with only |, -, :, spaces)
+        if re.match(r'^[\s\|\-:]+$', line):
+            continue
+        # Check if this line has meaningful content (not just | characters)
+        cells = [cell.strip() for cell in line.split('|')]
+        # Remove empty cells at start/end
+        if cells and not cells[0]:
+            cells = cells[1:]
+        if cells and not cells[-1]:
+            cells = cells[:-1]
+        # Header should have at least 3 columns and meaningful text
+        if len(cells) >= 3:
+            # Check if cells have actual text (not just empty or single char)
+            meaningful_cells = [c for c in cells if len(c) > 1]
+            if len(meaningful_cells) >= 3:
+                headers = cells
+                header_idx = i
+                break
+    if not headers or header_idx is None:
+        return None
+    # Parse data rows (skip separator line after header if present)
+    rows = []
+    num_columns = len(headers)
+    for i in range(header_idx + 1, len(table_lines)):
+        line = table_lines[i]
+        if not line:
+            continue
+        # Skip separator lines
+        if re.match(r'^[\s\|\-:]+$', line):
+            continue
+        if '|' not in line:
+            # No more table rows
+            break
+        cells = [cell.strip() for cell in line.split('|')]
+        # Remove empty cells at start/end
+        if cells and not cells[0]:
+            cells = cells[1:]
+        if cells and not cells[-1]:
+            cells = cells[:-1]
+        # Only add rows that match header column count (allow some flexibility)
+        if len(cells) == num_columns or (len(cells) >= num_columns - 1 and len(cells) <= num_columns + 1):
+            # Pad or trim to match header count
+            if len(cells) < num_columns:
+                cells.extend([''] * (num_columns - len(cells)))
+            elif len(cells) > num_columns:
+                cells = cells[:num_columns]
+            # Only add if row has at least one non-empty cell
+            if any(cell for cell in cells):
+                rows.append(cells)
+    if not rows:
+        return None
+    return (headers, rows)
+def _extract_metadata(text: str) -> Dict[str, str]:
+    """
+    Extract metadata from document header text.
+    Looks for title, office, notice number, and description.
+    """
+    metadata = {
+        "title": "",
+        "office": "",
+        "notice_no": "",
+        "description": ""
+    }
+    lines = [line.strip() for line in text.split('\n') if line.strip()]
+    # Extract office (usually first non-empty line)
+    if lines:
+        metadata["office"] = lines[0]
+    # Look for notice number pattern (like "पत्रक सं- 1239" or "सं- 1239")
+    notice_pattern = r'(?:पत्रक\s+)?सं[-\s:]*(\d+)'
+    for line in lines[:10]:  # Check first 10 lines
+        match = re.search(notice_pattern, line)
+        if match:
+            metadata["notice_no"] = match.group(1)
+            break
+    # Look for title - usually in quotes or contains specific keywords
+    # Check for quoted text first
+    quoted_title = re.search(r'["""]([^"""]+)["""]', text[:1000])
+    if quoted_title:
+        metadata["title"] = quoted_title.group(1).strip()
+    else:
+        # Look for title patterns
+        title_keywords = ['सम्पत्ति', 'सूचना', 'विज्ञप्ति', 'नाम परिवर्तन']
+        for line in lines[:5]:
+            if any(keyword in line for keyword in title_keywords):
+                # Extract the title phrase
+                title_match = re.search(r'(सम्पत्ति[^।]*|सूचना[^।]*|विज्ञप्ति[^।]*)', line)
+                if title_match:
+                    metadata["title"] = title_match.group(1).strip()
+                    break
+    # Extract description (text before table, usually contains key phrases)
+    description_keywords = ['नाम परिवर्तन', 'अधिनियम', 'धारा', 'प्रकाशन', 'आवेदन']
+    description_parts = []
+    for i, line in enumerate(lines[:15]):  # Check first 15 lines
+        if any(keyword in line for keyword in description_keywords):
+            description_parts.append(line)
+            # Get a few surrounding lines for context
+            if i > 0:
+                description_parts.insert(0, lines[i-1])
+            if i < len(lines) - 1:
+                description_parts.append(lines[i+1])
+            break
+    if description_parts:
+        description = ' '.join(description_parts).strip()
+        if len(description) > 30:  # Only if substantial
+            # Clean up and limit length
+            description = re.sub(r'\s+', ' ', description)
+            metadata["description"] = description[:300]  # Limit length
+    return metadata
+def _parse_model_response(response_text: str) -> Tuple[str, Dict[str, Any]]:
+    """
+    Parse model response to extract text and metadata.
+    The model may return text and metadata in various formats.
+    Returns: (extracted_text, metadata_dict)
+    """
+    metadata = {}
+    text = response_text
+    # Try to find JSON metadata section
+    # Look for METADATA: or metadata: section
+    metadata_patterns = [
+        r'METADATA:\s*\n?\s*({.*?})(?:\n\n|\nTEXT|$)',
+        r'metadata:\s*\n?\s*({.*?})(?:\n\n|\nTEXT|$)',
+        r'METADATA:\s*\n?\s*```json\s*({.*?})\s*```',
+        r'METADATA:\s*\n?\s*```\s*({.*?})\s*```',
+    ]
+    for pattern in metadata_patterns:
+        match = re.search(pattern, response_text, re.DOTALL | re.IGNORECASE)
+        if match:
+            try:
+                metadata_json = match.group(1).strip()
+                metadata = json.loads(metadata_json)
+                # Remove metadata section from text
+                text = response_text[:match.start()] + response_text[match.end():]
+                break
+            except (json.JSONDecodeError, IndexError):
+                continue
+    # If no JSON found, try to extract metadata from structured text format
+    if not metadata:
+        # Look for key-value pairs in METADATA section
+        metadata_section = re.search(r'METADATA:\s*\n(.*?)(?:\n\n|\nTEXT|$)', response_text, re.DOTALL | re.IGNORECASE)
+        if metadata_section:
+            metadata_text = metadata_section.group(1)
+            # Parse key-value pairs
+            for line in metadata_text.split('\n'):
+                if ':' in line:
+                    parts = line.split(':', 1)
+                    if len(parts) == 2:
+                        key = parts[0].strip().lower().replace(' ', '_')
+                        value = parts[1].strip()
+                        if value:
+                            metadata[key] = value
+    # Extract TEXT section if present
+    text_match = re.search(r'TEXT:\s*\n(.*?)(?:\n\nMETADATA|$)', response_text, re.DOTALL | re.IGNORECASE)
+    if text_match:
+        text = text_match.group(1).strip()
+    else:
+        # If no TEXT section, remove METADATA section if found
+        text = re.sub(r'METADATA:.*', '', response_text, flags=re.DOTALL | re.IGNORECASE).strip()
+    # Clean up text
+    text = text.strip()
+    # Clean up metadata - remove empty values
+    metadata = {k: v for k, v in metadata.items() if v and str(v).strip()}
+    return text, metadata
+def _extract_footer_notes(text: str) -> List[str]:
+    """
+    Extract footer notes from document.
+    Usually appears after the table.
+    """
+    notes = []
+    # Find table end
+    lines = text.split('\n')
+    table_end_idx = len(lines)
+    for i, line in enumerate(lines):
+        if '|' in line:
+            # Find last table line
+            j = i + 1
+            while j < len(lines) and ('|' in lines[j] or re.match(r'^[\s\|\-:]+$', lines[j])):
+                j += 1
+            table_end_idx = j
+            break
+    # Extract footer text (after table)
+    footer_lines = lines[table_end_idx:]
+    footer_text = '\n'.join(footer_lines).strip()
+    # Split into sentences/notes
+    # Look for sentences ending with period, exclamation, or specific keywords
+    sentences = re.split(r'[।\.!]\s+', footer_text)
+    for sentence in sentences:
+        sentence = sentence.strip()
+        if len(sentence) > 20:  # Only substantial notes
+            # Clean up
+            sentence = re.sub(r'\s+', ' ', sentence)
+            if sentence:
+                notes.append(sentence)
+    # Limit to most relevant notes (usually 2-4)
+    return notes[:5]
+def _parse_text_with_tables(text: str, page_metadata: Dict[str, Any] = None) -> Dict[str, Any]:
+    """
+    Parse text and extract structured data including tables.
+    Uses model-extracted metadata if provided, otherwise falls back to basic extraction.
+    Returns structured JSON format with metadata, table, and footer_notes.
+    """
+    result = {
+        "text": text,  # Keep original text
+        "metadata": page_metadata if page_metadata else {},
+        "table": [],
+        "footer_notes": []
+    }
+    # Check if text contains a table
+    table_data = _parse_markdown_table(text)
+    if table_data:
+        headers, rows = table_data
+        print(f"[INFO] Found table with {len(headers)} columns and {len(rows)} rows")
+        # Use provided metadata or extract basic metadata as fallback
+        if not result["metadata"]:
+            result["metadata"] = _extract_metadata(text)
+        # Map headers to field names using original header text
+        # Keep original language, just make valid JSON keys and handle duplicates
+        header_mapping = {}
+        header_counts = {}  # Track occurrences of each header
+        for i, header in enumerate(headers):
+            header_clean = header.strip()
+            # Create a valid JSON key from the original header
+            # Remove special characters that aren't valid in JSON keys, but keep the text
+            # Replace spaces and special chars with underscores, but preserve the original text
+            header_key = header_clean
+            # Track how many times we've seen this exact header
+            if header_key not in header_counts:
+                header_counts[header_key] = 0
+            header_counts[header_key] += 1
+            # If this header appears multiple times, append a number
+            if header_counts[header_key] > 1:
+                header_key = f"{header_key}_{header_counts[header_key]}"
+            # Clean the key to be valid for JSON (remove/replace problematic characters)
+            # Keep the original text but make it JSON-safe
+            header_key = re.sub(r'[^\w\s\u0900-\u097F]', '', header_key)  # Keep Unicode Hindi chars
+            header_key = re.sub(r'\s+', '_', header_key)  # Replace spaces with underscores
+            # If key is empty after cleaning, use column index
+            if not header_key:
+                header_key = f"column_{i+1}"
+            header_mapping[i] = header_key
+        # Parse table rows - each row becomes a separate section
+        table_rows_dict = {}
+        for idx, row in enumerate(rows, start=1):
+            row_dict = {}
+            for i, header_idx in header_mapping.items():
+                if i < len(row):
+                    row_dict[header_idx] = row[i].strip()
+            if row_dict:
+                # Each row is a separate section: row_1, row_2, etc.
+                table_rows_dict[f"row_{idx}"] = row_dict
+        # Store rows as separate sections instead of array
+        result["table"] = table_rows_dict
+        # Extract footer notes
+        result["footer_notes"] = _extract_footer_notes(text)
+    else:
+        # No table found, just extract basic metadata
+        result["metadata"] = _extract_metadata(text)
+        result["footer_notes"] = _extract_footer_notes(text)
+    return result
+async def _poll_runpod_job(job_id: str, client: httpx.AsyncClient, max_wait_time: int = 300) -> Dict[str, Any]:
+    """
+    Poll RunPod job status until completion.
+    Returns the final job result with output.
+    """
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {RUNPOD_API_KEY}"
+    }
+    start_time = time.time()
+    poll_interval = 2  # Poll every 2 seconds
+    while True:
+        # Check timeout
+        elapsed = time.time() - start_time
+        if elapsed > max_wait_time:
+            raise RuntimeError(f"Job {job_id} timed out after {max_wait_time} seconds")
+        # Poll job status
+        status_url = f"{RUNPOD_STATUS_ENDPOINT}/{job_id}"
+        response = await client.get(status_url, headers=headers)
+        response.raise_for_status()
+        status_result = response.json()
+        status = status_result.get("status", "").upper()
+        if status == "COMPLETED":
+            print(f"[INFO] Job {job_id} completed successfully")
+            return status_result
+        elif status == "FAILED":
+            error_msg = status_result.get("error", "Unknown error")
+            raise RuntimeError(f"Job {job_id} failed: {error_msg}")
+        elif status in ["IN_QUEUE", "IN_PROGRESS"]:
+            print(f"[INFO] Job {job_id} status: {status}, waiting...")
+            await asyncio.sleep(poll_interval)
+        else:
+            # Unknown status, wait and retry
+            print(f"[INFO] Job {job_id} status: {status}, waiting...")
+            await asyncio.sleep(poll_interval)
+async def _extract_text_with_ocr(image_bytes: bytes, page_num: int, total_pages: int, custom_prompt: str = None) -> Dict[str, Any]:
+    """
+    Extract text and metadata from a single page/image using the RunPod serverless OCR model.
+    Uses model-driven extraction to identify and extract metadata fields dynamically.
+    Returns text output in full_text field and extracted metadata.
+    Args:
+        image_bytes: Image bytes to process
+        page_num: Page number
+        total_pages: Total number of pages
+        custom_prompt: Optional custom prompt for field extraction
+    """
+    # Convert image bytes to base64
+    image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+    print(f"[INFO] OCR: Processing page {page_num}/{total_pages} with RunPod endpoint")
+    try:
+        # Use custom prompt if provided, otherwise use default
+        if custom_prompt:
+            metadata_prompt = custom_prompt
+        else:
+            # Default prompt for general text extraction
+            metadata_prompt = """Extract all text from this image."""
+        # Prepare request payload for RunPod
+        # RunPod serverless endpoints expect image_base64, image_url, or image_path
+        payload = {
+            "input": {
+                "prompt": metadata_prompt,
+                "image_base64": image_base64  # Base64 encoded image
+            }
+        }
+        # Make HTTP request to RunPod endpoint
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {RUNPOD_API_KEY}"
+        }
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            # Submit job
+            response = await client.post(
+                RUNPOD_ENDPOINT,
+                headers=headers,
+                json=payload
+            )
+            response.raise_for_status()
+            result = response.json()
+            # Check if this is an async job (has job ID and status)
+            job_id = result.get("id")
+            status = result.get("status", "").upper()
+            if job_id and status in ["IN_QUEUE", "IN_PROGRESS"]:
+                # This is an async job, need to poll for completion
+                print(f"[INFO] Job submitted with ID: {job_id}, status: {status}")
+                if not RUNPOD_STATUS_ENDPOINT:
+                    raise RuntimeError("RunPod status endpoint not configured. Cannot poll async job.")
+                # Poll until completion
+                result = await _poll_runpod_job(job_id, client)
+            # Extract text from RunPod response
+            # RunPod serverless typically returns: {"id": "...", "status": "...", "output": "..."}
+            # The output might be a string or a dict depending on the model
+            extracted_text = ""
+            if "output" in result:
+                output = result["output"]
+                if isinstance(output, str):
+                    extracted_text = output
+                elif isinstance(output, dict):
+                    # If output is a dict, try common fields
+                    extracted_text = output.get("text", output.get("result", output.get("content", "")))
+                    if not extracted_text and isinstance(output.get("text"), str):
+                        extracted_text = output["text"]
+                elif isinstance(output, list) and len(output) > 0:
+                    # If output is a list, take the first element
+                    extracted_text = str(output[0])
+            elif "result" in result:
+                extracted_text = str(result["result"])
+            elif "text" in result:
+                extracted_text = str(result["text"])
+            else:
+                # Fallback: convert entire response to string
+                extracted_text = str(result)
+            if not extracted_text:
+                extracted_text = ""
+            print(f"[INFO] OCR: Extracted {len(extracted_text)} characters from page {page_num}")
+            # Parse model response to extract text and metadata
+            parsed_text, parsed_metadata = _parse_model_response(extracted_text)
+            # Calculate confidence based on response quality
+            # Create a mock response object for compatibility with confidence calculation
+            mock_response = type('obj', (object,), {
+                'choices': [type('obj', (object,), {'finish_reason': 'stop'})()],
+                'usage': type('obj', (object,), {'completion_tokens': len(parsed_text.split())})()
+            })()
+            confidence = _calculate_ocr_confidence(mock_response, parsed_text)
+            # Determine document type from metadata if available
+            doc_type = parsed_metadata.get("document_type", "other")
+            if doc_type == "other" and parsed_metadata.get("title"):
+                # Try to infer from title
+                title_lower = parsed_metadata.get("title", "").lower()
+                if any(kw in title_lower for kw in ["tender", "bid", "quotation"]):
+                    doc_type = "tender"
+                elif any(kw in title_lower for kw in ["recruitment", "appointment", "vacancy"]):
+                    doc_type = "recruitment"
+                elif any(kw in title_lower for kw in ["notice", "notification", "circular"]):
+                    doc_type = "notice"
+            # Return text and extracted metadata
+            return {
+                "doc_type": doc_type,
+                "confidence": confidence,
+                "full_text": parsed_text,
+                "fields": parsed_metadata if parsed_metadata else {}  # Model-extracted metadata
+            }
+    except httpx.HTTPStatusError as e:
+        error_msg = f"HTTP {e.response.status_code}: {e.response.text}"
+        print(f"[ERROR] OCR API HTTP error for page {page_num}: {error_msg}")
+        raise RuntimeError(f"OCR API error for page {page_num}: {error_msg}")
+    except Exception as e:
+        error_msg = str(e)
+        print(f"[ERROR] OCR API error for page {page_num}: {error_msg}")
+        raise RuntimeError(f"OCR API error for page {page_num}: {error_msg}")
+def _calculate_ocr_confidence(response, extracted_text: str) -> float:
+    """
+    Calculate confidence score based on OCR response quality.
+    Returns a score from 0-100, with higher scores for better extraction quality.
+    """
+    # Start with a higher base confidence for successful extractions
+    base_confidence = 92.0
+    # Adjust confidence based on text quality heuristics
+    text_length = len(extracted_text.strip())
+    if text_length == 0:
+        return 0.0
+    elif text_length < 10:
+        # Very short text - might be error or empty
+        return max(30.0, base_confidence - 40.0)
+    elif text_length < 50:
+        # Short text - might be incomplete
+        return max(60.0, base_confidence - 20.0)
+    elif text_length > 1000:
+        # Long text - likely good extraction
+        confidence = min(100.0, base_confidence + 5.0)
+    elif text_length > 500:
+        # Medium-long text - good extraction
+        confidence = min(100.0, base_confidence + 3.0)
+    else:
+        confidence = base_confidence
+    # Check for structured content (tables, etc.) - indicates good extraction
+    if '|' in extracted_text and extracted_text.count('|') > 5:
+        # Table detected - boost confidence significantly
+        confidence = min(100.0, confidence + 6.0)
+    # Check for meaningful content (non-whitespace ratio)
+    non_whitespace = len([c for c in extracted_text if not c.isspace()])
+    if text_length > 0:
+        content_ratio = non_whitespace / text_length
+        if content_ratio > 0.85:
+            # Very high content ratio - excellent extraction
+            confidence = min(100.0, confidence + 5.0)
+        elif content_ratio > 0.75:
+            # High content ratio - good extraction
+            confidence = min(100.0, confidence + 3.0)
+        elif content_ratio > 0.6:
+            # Moderate content ratio - decent extraction
+            confidence = min(100.0, confidence + 1.0)
+        elif content_ratio < 0.3:
+            # Low content ratio - mostly whitespace
+            confidence = max(60.0, confidence - 15.0)
+    # Check for common OCR quality indicators
+    # Presence of numbers, dates, and structured patterns indicates good extraction
+    has_numbers = any(c.isdigit() for c in extracted_text)
+    has_letters = any(c.isalpha() for c in extracted_text)
+    has_punctuation = any(c in '.,;:!?()[]{}' for c in extracted_text)
+    if has_numbers and has_letters and has_punctuation:
+        # Well-structured text with mixed content - high confidence
+        confidence = min(100.0, confidence + 2.0)
+    # Cap at 100% and ensure minimum quality threshold
+    return round(min(100.0, max(0.0, confidence)), 1)
+async def extract_fields_from_document(
+    file_bytes: bytes,
+    content_type: str,
+    filename: str,
+    key_fields: str = None,
+) -> Dict[str, Any]:
+    """
+    Extract text from document using OCR model.
+    Processes pages separately for better reliability.
+    Returns text output in full_text, keeps JSON/XML fields empty for now.
+    """
+    # Get raw image bytes for processing
+    if content_type == "application/pdf" or content_type.endswith("/pdf"):
+        if not PDF_SUPPORT:
+            raise RuntimeError("PDF support requires PyMuPDF. Please install it.")
+        # For PDFs, convert to images
+        pdf_images = _pdf_to_images(file_bytes)
+        image_bytes_list = pdf_images
+    else:
+        # For regular images, process the file bytes
+        # Convert to JPEG for consistency
+        try:
+            img = Image.open(BytesIO(file_bytes))
+            if img.mode != "RGB":
+                img = img.convert("RGB")
+            # Resize if too large (max 1920px on longest side)
+            max_size = 1920
+            w, h = img.size
+            if w > max_size or h > max_size:
+                if w > h:
+                    new_w = max_size
+                    new_h = int(h * (max_size / w))
+                else:
+                    new_h = max_size
+                    new_w = int(w * (max_size / h))
+                img = img.resize((new_w, new_h), Image.LANCZOS)
+                print(f"[INFO] Resized image from {w}x{h} to {new_w}x{new_h}")
+            # Convert to JPEG bytes
+            img_bytes = BytesIO()
+            img.save(img_bytes, format="JPEG", quality=95)
+            image_bytes_list = [img_bytes.getvalue()]
+        except Exception as e:
+            # Fallback: use original file bytes
+            print(f"[WARNING] Could not process image with PIL: {e}. Using original bytes.")
+        image_bytes_list = [file_bytes]
+    total_pages = len(image_bytes_list)
+    print(f"[INFO] Processing {total_pages} page(s) with OCR model...")
+    # Process each page separately
+    page_results = []
+    for page_num, img_bytes in enumerate(image_bytes_list):
+        print(f"[INFO] Processing page {page_num + 1}/{total_pages}...")
+        try:
+            page_result = await _extract_text_with_ocr(img_bytes, page_num + 1, total_pages, None)
+            page_results.append({
+                "page_number": page_num + 1,
+                "text": page_result.get("full_text", ""),
+                "fields": page_result.get("fields", {}),
+                "confidence": page_result.get("confidence", 0),
+                "doc_type": page_result.get("doc_type", "other"),
+            })
+            print(f"[INFO] Page {page_num + 1} processed successfully")
+        except Exception as e:
+            print(f"[ERROR] Failed to process page {page_num + 1}: {e}")
+            page_results.append({
+                "page_number": page_num + 1,
+                "text": "",
+                "fields": {},
+                "confidence": 0,
+                "error": str(e)
+            })
+    # Combine results from all pages
+    combined_full_text = "\n\n".join([f"=== PAGE {p['page_number']} ===\n\n{p['text']}" for p in page_results if p.get("text")])
+    # Extract user-specified fields if key_fields provided
+    extracted_fields = {}
+    if key_fields and key_fields.strip():
+        # Parse user input: "Invoice Number, Invoice Date, PO Number" -> ['Invoice Number', 'Invoice Date', 'PO Number']
+        field_list = [f.strip() for f in key_fields.split(',') if f.strip()]
+        if field_list:
+            print(f"[INFO] Extracting user-specified fields: {field_list}")
+            # Format fields as JSON array string for prompt
+            fields_json = json.dumps(field_list)
+            custom_prompt = f"Extract the following fields from this image and return as JSON: {fields_json}. Return only a valid JSON object with the field names as keys and their extracted values."
+            # Run second OCR pass on first page (usually has most metadata) with custom prompt
+            if image_bytes_list and len(image_bytes_list) > 0:
+                try:
+                    print("[INFO] Running second OCR pass for field extraction...")
+                    field_result = await _extract_text_with_ocr(image_bytes_list[0], 1, 1, custom_prompt)
+                    field_text = field_result.get("full_text", "")
+                    # Try to parse JSON from the response
+                    try:
+                        # Look for JSON in the response
+                        json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', field_text, re.DOTALL)
+                        if json_match:
+                            extracted_fields = json.loads(json_match.group(0))
+                            print(f"[INFO] Successfully extracted {len(extracted_fields)} fields from second OCR pass")
+                        else:
+                            # Try parsing the entire response as JSON
+                            extracted_fields = json.loads(field_text)
+                            print(f"[INFO] Successfully extracted {len(extracted_fields)} fields from second OCR pass")
+                    except json.JSONDecodeError:
+                        print(f"[WARNING] Could not parse JSON from field extraction response: {field_text[:200]}")
+                        extracted_fields = {}
+                except Exception as e:
+                    print(f"[WARNING] Field extraction failed: {e}")
+                    extracted_fields = {}
+    # Parse each page for tables and structure the output
+    structured_pages = {}
+    for page_result in page_results:
+        if page_result.get("text"):
+            page_num = page_result.get("page_number", 1)
+            page_text = page_result.get("text", "")
+            # Parse text for tables and structure
+            parsed_data = _parse_text_with_tables(page_text, {})
+            # Build structured page output (without Fields - moved to root level)
+            page_key = f"page_{page_num}"
+            structured_pages[page_key] = {
+                "text": parsed_data["text"],
+                "table": parsed_data["table"],
+                "footer_notes": parsed_data["footer_notes"],
+                "confidence": page_result.get("confidence", 0),
+                "doc_type": page_result.get("doc_type", "other")
+            }
+    # If we have structured pages, use them; otherwise keep fields empty
+    if structured_pages:
+        # Always return pages with page_X keys (even for single page)
+        combined_fields = structured_pages
+    else:
+        combined_fields = {}
+    # Calculate average confidence
+    confidences = [p.get("confidence", 0) for p in page_results if p.get("confidence", 0) > 0]
+    avg_confidence = sum(confidences) / len(confidences) if confidences else 0
+    # Determine doc_type from first successful page
+    doc_type = "other"
+    for page_result in page_results:
+        if page_result.get("doc_type") and page_result["doc_type"] != "other":
+            doc_type = page_result["doc_type"]
+            break
+    # Build return object - add Fields at root level only if extracted_fields is not empty
+    return_obj = {
+        "doc_type": doc_type,
+        "confidence": avg_confidence,
+        "full_text": combined_full_text,
+        "fields": combined_fields,  # Now contains structured data with tables
+        "pages": page_results
+    }
+    # Add Fields at root level only if user provided key_fields and extraction succeeded
+    if extracted_fields:
+        return_obj["Fields"] = extracted_fields
+    return return_obj
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

backend/app/otp_service.py CHANGED Viewed

@@ -1,3 +1,4 @@
 """
 OTP (One-Time Password) service for email-based authentication.
 """
@@ -195,3 +196,202 @@ async def verify_otp(email: str, otp: str, db: Session) -> User:
     return user

+<<<<<<< HEAD
 """
 OTP (One-Time Password) service for email-based authentication.
 """
     return user
+=======
+"""
+OTP (One-Time Password) service for email-based authentication.
+"""
+import random
+import string
+from datetime import datetime, timedelta
+from typing import Dict, Optional
+from sqlalchemy.orm import Session
+from fastapi import HTTPException
+from .models import User
+from .brevo_service import send_otp_email
+# Store OTPs in memory (in production, use Redis or database)
+otp_store: Dict[str, dict] = {}
+def generate_otp(length: int = 6) -> str:
+    """
+    Generate a random OTP code.
+    Args:
+        length: Length of OTP (default: 6)
+    Returns:
+        Random OTP string
+    """
+    return ''.join(random.choices(string.digits, k=length))
+async def request_otp(email: str, db: Session) -> dict:
+    """
+    Generate and send OTP to email using Brevo.
+    Args:
+        email: Email address to send OTP to
+        db: Database session
+    Returns:
+        Dictionary with success message
+    """
+    # Generate OTP
+    otp = generate_otp()
+    expires_at = datetime.utcnow() + timedelta(minutes=10)
+    # Store OTP (in production, use Redis or database with TTL)
+    otp_store[email.lower()] = {
+        'otp': otp,
+        'expires_at': expires_at,
+        'attempts': 0,
+        'max_attempts': 5
+    }
+    # Send OTP via Brevo
+    try:
+        await send_otp_email(email, otp)
+        print(f"[INFO] OTP generated and sent to {email}")
+    except Exception as e:
+        # Remove OTP from store if email sending failed
+        if email.lower() in otp_store:
+            del otp_store[email.lower()]
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to send OTP email: {str(e)}"
+        )
+    return {
+        "message": "OTP sent to your email address",
+        "expires_in_minutes": 10
+    }
+async def verify_otp(email: str, otp: str, db: Session) -> User:
+    """
+    Verify OTP and return/create user.
+    Args:
+        email: Email address
+        otp: OTP code to verify
+        db: Database session
+    Returns:
+        User object
+    Raises:
+        HTTPException: If OTP is invalid, expired, or max attempts exceeded
+    """
+    email_lower = email.lower()
+    stored = otp_store.get(email_lower)
+    if not stored:
+        raise HTTPException(
+            status_code=400,
+            detail="OTP not found. Please request a new OTP."
+        )
+    # Check if expired
+    if datetime.utcnow() > stored['expires_at']:
+        del otp_store[email_lower]
+        raise HTTPException(
+            status_code=400,
+            detail="OTP has expired. Please request a new OTP."
+        )
+    # Check max attempts
+    if stored['attempts'] >= stored['max_attempts']:
+        del otp_store[email_lower]
+        raise HTTPException(
+            status_code=400,
+            detail="Maximum verification attempts exceeded. Please request a new OTP."
+        )
+    # Verify OTP
+    if stored['otp'] != otp:
+        stored['attempts'] += 1
+        remaining_attempts = stored['max_attempts'] - stored['attempts']
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid OTP. {remaining_attempts} attempt(s) remaining."
+        )
+    # OTP verified successfully
+    # Get or create user
+    user = db.query(User).filter(User.email == email_lower).first()
+    if not user:
+        user = User(
+            email=email_lower,
+            auth_method='otp',
+            email_verified=True
+        )
+        db.add(user)
+        db.commit()
+        db.refresh(user)
+        print(f"[INFO] New user created via OTP: {email_lower}")
+        # Enrich contact data from Apollo.io and update Brevo + Monday.com
+        try:
+            from .apollo_service import enrich_contact_by_email
+            from .brevo_service import create_brevo_contact, BREVO_TRIAL_LIST_ID
+            from .monday_service import create_monday_lead
+            # Enrich contact data from Apollo.io
+            enriched_data = await enrich_contact_by_email(email_lower)
+            # Use enriched data if available
+            first_name = enriched_data.get("first_name") if enriched_data else None
+            last_name = enriched_data.get("last_name") if enriched_data else None
+            org_name = enriched_data.get("organization_name") if enriched_data else None
+            # Fallback to email domain if Apollo didn't provide organization
+            if not org_name:
+                org_domain = email_lower.split('@')[1] if '@' in email_lower else None
+                org_name = org_domain.split('.')[0].capitalize() if org_domain else None
+            # Update Brevo contact with enriched data
+            await create_brevo_contact(
+                email=email_lower,
+                first_name=first_name,
+                last_name=last_name,
+                organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
+                phone_number=enriched_data.get("phone_number") if enriched_data else None,
+                linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
+                title=enriched_data.get("title") if enriched_data else None,
+                headline=enriched_data.get("headline") if enriched_data else None,
+                organization_website=enriched_data.get("organization_website") if enriched_data else None,
+                organization_address=enriched_data.get("organization_address") if enriched_data else None,
+                list_id=BREVO_TRIAL_LIST_ID
+            )
+            # Create lead in Monday.com
+            await create_monday_lead(
+                email=email_lower,
+                first_name=first_name,
+                last_name=last_name,
+                phone_number=enriched_data.get("phone_number") if enriched_data else None,
+                linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
+                title=enriched_data.get("title") if enriched_data else None,
+                headline=enriched_data.get("headline") if enriched_data else None,
+                organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
+                organization_website=enriched_data.get("organization_website") if enriched_data else None,
+                organization_address=enriched_data.get("organization_address") if enriched_data else None,
+            )
+        except Exception as e:
+            # Don't fail user creation if integrations fail
+            print(f"[WARNING] Failed to enrich/update contact for {email_lower}: {str(e)}")
+    else:
+        user.email_verified = True
+        if user.auth_method != 'otp':
+            user.auth_method = 'otp'
+        db.commit()
+        print(f"[INFO] User verified via OTP: {email_lower}")
+    # Remove OTP from store after successful verification
+    del otp_store[email_lower]
+    return user
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

backend/app/schemas.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from pydantic import BaseModel
 from typing import Dict, Optional
 from datetime import datetime
@@ -24,3 +25,31 @@ class ExtractionRecordBase(BaseModel):
     class Config:
         from_attributes = True

+<<<<<<< HEAD
 from pydantic import BaseModel
 from typing import Dict, Optional
 from datetime import datetime
     class Config:
         from_attributes = True
+=======
+from pydantic import BaseModel
+from typing import Dict, Optional
+from datetime import datetime
+class ExtractionStage(BaseModel):
+    time: int
+    status: str
+    variation: str
+class ExtractionRecordBase(BaseModel):
+    id: int
+    fileName: str
+    fileType: str
+    fileSize: str
+    extractedAt: datetime
+    status: str
+    confidence: float
+    fieldsExtracted: int
+    totalTime: int
+    stages: Dict[str, ExtractionStage]
+    errorMessage: Optional[str] = None
+    class Config:
+        from_attributes = True
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

backend/requirements.txt CHANGED Viewed

@@ -1,15 +1,15 @@
-fastapi
-uvicorn[standard]
-python-multipart
-pydantic[email]
-sqlalchemy
-httpx
-python-dotenv
-pymupdf
-pillow
-huggingface-hub
-openai
-firebase-admin
-pyjwt
-python-jose[cryptography]
 email-validator

+fastapi
+uvicorn[standard]
+python-multipart
+pydantic[email]
+sqlalchemy
+httpx
+python-dotenv
+pymupdf
+pillow
+huggingface-hub
+openai
+firebase-admin
+pyjwt
+python-jose[cryptography]
 email-validator

frontend/index.html CHANGED Viewed

@@ -1,3 +1,4 @@
 <!doctype html>
 <html lang="en">
   <head>
@@ -11,3 +12,18 @@
     <script type="module" src="/src/main.jsx"></script>
   </body>
 </html>

+<<<<<<< HEAD
 <!doctype html>
 <html lang="en">
   <head>
     <script type="module" src="/src/main.jsx"></script>
   </body>
 </html>
+=======
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/png" href="/logo.png" />
+    <title>EZOFIS AI - VRP Document Intelligence</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  </head>
+  <body class="bg-[#FAFAFA]">
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/package.json CHANGED Viewed

@@ -1,26 +1,26 @@
-{
-  "name": "document-capture-demo",
-  "version": "1.0.0",
-  "private": true,
-  "scripts": {
-    "dev": "vite",
-    "build": "vite build",
-    "preview": "vite preview"
-  },
-  "dependencies": {
-    "react": "^18.3.1",
-    "react-dom": "^18.3.1",
-    "react-router-dom": "^6.26.2",
-    "framer-motion": "^11.0.0",
-    "lucide-react": "^0.471.0",
-    "pdfjs-dist": "^4.0.379",
-    "firebase": "^10.7.1"
-  },
-  "devDependencies": {
-    "@vitejs/plugin-react": "^4.1.0",
-    "autoprefixer": "^10.4.20",
-    "postcss": "^8.4.47",
-    "tailwindcss": "^3.4.14",
-    "vite": "^5.4.0"
-  }
-}

+{
+  "name": "document-capture-demo",
+  "version": "1.0.0",
+  "private": true,
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1",
+    "react-router-dom": "^6.26.2",
+    "framer-motion": "^11.0.0",
+    "lucide-react": "^0.471.0",
+    "pdfjs-dist": "^4.0.379",
+    "firebase": "^10.7.1"
+  },
+  "devDependencies": {
+    "@vitejs/plugin-react": "^4.1.0",
+    "autoprefixer": "^10.4.20",
+    "postcss": "^8.4.47",
+    "tailwindcss": "^3.4.14",
+    "vite": "^5.4.0"
+  }
+}

frontend/src/App.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 // frontend/src/App.jsx
 import React, { useEffect } from "react";
@@ -104,3 +105,111 @@ export default function App() {
     </AuthProvider>
   );
 }

+<<<<<<< HEAD
 // frontend/src/App.jsx
 import React, { useEffect } from "react";
     </AuthProvider>
   );
 }
+=======
+// frontend/src/App.jsx
+import React, { useEffect } from "react";
+import { Routes, Route, useNavigate, useSearchParams } from "react-router-dom";
+import { AuthProvider, useAuth } from "./contexts/AuthContext";
+import Layout from "./Layout";
+import Dashboard from "./pages/Dashboard";
+import History from "./pages/History";
+import ShareHandler from "./pages/ShareHandler";
+import LoginForm from "./components/auth/LoginForm";
+// Auth callback handler component
+function AuthCallback() {
+  const [searchParams] = useSearchParams();
+  const { handleAuthCallback } = useAuth();
+  const navigate = useNavigate();
+  useEffect(() => {
+    const token = searchParams.get("token");
+    if (token) {
+      handleAuthCallback(token);
+      navigate("/");
+    } else {
+      navigate("/");
+    }
+  }, [searchParams, handleAuthCallback, navigate]);
+  return (
+    <div className="min-h-screen flex items-center justify-center">
+      <div className="text-center">
+        <p className="text-slate-600">Completing authentication...</p>
+      </div>
+    </div>
+  );
+}
+// Protected route wrapper
+function ProtectedRoute({ children }) {
+  const { isAuthenticated, loading } = useAuth();
+  if (loading) {
+    return (
+      <div className="min-h-screen flex items-center justify-center">
+        <div className="text-center">
+          <div className="h-16 w-16 mx-auto rounded-2xl bg-indigo-100 flex items-center justify-center mb-4 animate-pulse">
+            <div className="h-8 w-8 rounded-lg bg-indigo-600"></div>
+          </div>
+          <p className="text-slate-600">Loading...</p>
+        </div>
+      </div>
+    );
+  }
+  if (!isAuthenticated) {
+    return <LoginForm />;
+  }
+  return children;
+}
+function AppRoutes() {
+  return (
+    <Routes>
+      <Route
+        path="/auth/callback"
+        element={<AuthCallback />}
+      />
+      <Route
+        path="/share/:token"
+        element={
+          <ProtectedRoute>
+            <ShareHandler />
+          </ProtectedRoute>
+        }
+      />
+      <Route
+        path="/"
+        element={
+          <ProtectedRoute>
+            <Layout currentPageName="Dashboard">
+              <Dashboard />
+            </Layout>
+          </ProtectedRoute>
+        }
+      />
+      <Route
+        path="/history"
+        element={
+          <ProtectedRoute>
+            <Layout currentPageName="History">
+              <History />
+            </Layout>
+          </ProtectedRoute>
+        }
+      />
+    </Routes>
+  );
+}
+export default function App() {
+  return (
+    <AuthProvider>
+      <AppRoutes />
+    </AuthProvider>
+  );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/Layout.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 // frontend/src/Layout.jsx
 import React, { useState } from "react";
@@ -177,3 +178,184 @@ export default function Layout({ children, currentPageName }) {
     </div>
   );
 }

+<<<<<<< HEAD
 // frontend/src/Layout.jsx
 import React, { useState } from "react";
     </div>
   );
 }
+=======
+// frontend/src/Layout.jsx
+import React, { useState } from "react";
+import { Link } from "react-router-dom";
+import { createPageUrl } from "./utils";
+import {
+  LayoutDashboard,
+  History as HistoryIcon,
+  ChevronLeft,
+  Sparkles,
+  LogOut,
+  User,
+} from "lucide-react";
+import { cn } from "@/lib/utils";
+import { useAuth } from "./contexts/AuthContext";
+// Import logo - Vite will process this and handle the path correctly
+// For production, the logo should be in frontend/public/logo.png
+// Vite will copy it to dist/logo.png during build
+const logoPath = "/logo.png";
+export default function Layout({ children, currentPageName }) {
+  const [collapsed, setCollapsed] = useState(false);
+  const { user, logout } = useAuth();
+  const navItems = [
+    { name: "Dashboard", icon: LayoutDashboard, page: "Dashboard" },
+    { name: "History", icon: HistoryIcon, page: "History" },
+  ];
+  return (
+    <div className="min-h-screen bg-[#FAFAFA] flex">
+      {/* Sidebar */}
+      <aside
+        className={cn(
+          "fixed left-0 top-0 h-screen bg-white border-r border-slate-200/80 z-50 transition-all duration-300 ease-out flex flex-col",
+          collapsed ? "w-[72px]" : "w-[260px]"
+        )}
+      >
+        {/* Logo */}
+        <div
+          className={cn(
+            "h-16 flex items-center border-b border-slate-100 px-4",
+            collapsed ? "justify-center" : "justify-between"
+          )}
+        >
+          <Link to={createPageUrl("Dashboard")} className="flex items-center gap-3">
+            <div className="h-9 w-9 flex items-center justify-center flex-shrink-0">
+              <img
+                src={logoPath}
+                alt="EZOFIS AI Logo"
+                className="h-full w-full object-contain"
+                onError={(e) => {
+                  // Fallback: hide image and show placeholder if logo not found
+                  e.target.style.display = 'none';
+                }}
+              />
+            </div>
+            {!collapsed && (
+              <div className="flex flex-col">
+                <span className="font-semibold text-slate-900 tracking-tight">EZOFIS AI</span>
+                <span className="text-[10px] text-slate-400 font-medium tracking-wide uppercase">
+                  VRP Intelligence
+                </span>
+              </div>
+            )}
+          </Link>
+          {!collapsed && (
+            <button
+              onClick={() => setCollapsed(true)}
+              className="h-7 w-7 rounded-lg hover:bg-slate-100 flex items-center justify-center text-slate-400 hover:text-slate-600 transition-colors"
+            >
+              <ChevronLeft className="h-4 w-4" />
+            </button>
+          )}
+        </div>
+        {/* Navigation */}
+        <nav className="flex-1 p-3 space-y-1">
+          {navItems.map((item) => {
+            const isActive = currentPageName === item.page;
+            return (
+              <Link
+                key={item.name}
+                to={createPageUrl(item.page)}
+                className={cn(
+                  "flex items-center gap-3 px-3 py-2.5 rounded-xl transition-all duration-200 group",
+                  isActive
+                    ? "bg-gradient-to-r from-indigo-50 to-violet-50 text-indigo-600"
+                    : "text-slate-500 hover:bg-slate-50 hover:text-slate-700"
+                )}
+              >
+                <item.icon
+                  className={cn(
+                    "h-5 w-5 flex-shrink-0",
+                    isActive ? "text-indigo-600" : "text-slate-400 group-hover:text-slate-600"
+                  )}
+                />
+                {!collapsed && (
+                  <span className="font-medium text-sm">{item.name}</span>
+                )}
+              </Link>
+            );
+          })}
+        </nav>
+        {/* Collapse Toggle (when collapsed) */}
+        {collapsed && (
+          <button
+            onClick={() => setCollapsed(false)}
+            className="m-3 h-10 rounded-xl bg-slate-50 hover:bg-slate-100 flex items-center justify-center text-slate-400 hover:text-slate-600 transition-colors"
+          >
+            <ChevronLeft className="h-4 w-4 rotate-180" />
+          </button>
+        )}
+        {/* Pro Badge */}
+        {!collapsed && (
+          <div className="p-3">
+            <div className="p-4 rounded-2xl bg-gradient-to-br from-slate-900 to-slate-800 text-white">
+              <div className="flex items-center gap-2 mb-2">
+                <Sparkles className="h-4 w-4 text-amber-400" />
+                <span className="text-xs font-semibold tracking-wide">DEPLOY CUSTOM AGENT</span>
+              </div>
+              <p className="text-xs text-slate-400 mb-3">
+              Batch extractions, custom model, field mapping, complex lineitems, tables, workflows,  &amp; API access
+              </p>
+              <button className="w-full py-2 px-3 rounded-lg bg-white text-slate-900 text-sm font-semibold hover:bg-slate-100 transition-colors">
+                Book a Custom Demo
+              </button>
+            </div>
+          </div>
+        )}
+        {/* User Profile */}
+        {!collapsed && user && (
+          <div className="p-3 border-t border-slate-200">
+            <div className="flex items-center gap-3 p-3 rounded-xl bg-slate-50 hover:bg-slate-100 transition-colors">
+              {user.picture ? (
+                <img
+                  src={user.picture}
+                  alt={user.name || user.email}
+                  className="h-10 w-10 rounded-lg object-cover"
+                />
+              ) : (
+                <div className="h-10 w-10 rounded-lg bg-indigo-100 flex items-center justify-center">
+                  <User className="h-5 w-5 text-indigo-600" />
+                </div>
+              )}
+              <div className="flex-1 min-w-0">
+                <p className="text-sm font-medium text-slate-900 truncate">
+                  {user.name || "User"}
+                </p>
+                <p className="text-xs text-slate-500 truncate">{user.email}</p>
+              </div>
+            </div>
+            <button
+              onClick={logout}
+              className="mt-2 w-full flex items-center gap-2 px-3 py-2 rounded-xl text-sm text-slate-600 hover:bg-red-50 hover:text-red-600 transition-colors"
+            >
+              <LogOut className="h-4 w-4" />
+              <span>Sign Out</span>
+            </button>
+          </div>
+        )}
+      </aside>
+      {/* Main Content */}
+      <main
+        className={cn(
+          "flex-1 transition-all duration-300",
+          collapsed ? "ml-[72px]" : "ml-[260px]"
+        )}
+      >
+        {children}
+      </main>
+    </div>
+  );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/components/ExportButtons.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 import React, { useState } from "react";
 import { motion, AnimatePresence } from "framer-motion";
 import {
@@ -690,3 +691,697 @@ ${htmlContent}
     </motion.div>
   );
 }

+<<<<<<< HEAD
 import React, { useState } from "react";
 import { motion, AnimatePresence } from "framer-motion";
 import {
     </motion.div>
   );
 }
+=======
+import React, { useState } from "react";
+import { motion, AnimatePresence } from "framer-motion";
+import {
+  Download,
+  Braces,
+  FileCode2,
+  Check,
+  Share2,
+  FileText,
+  Link2,
+  Mail,
+} from "lucide-react";
+import { Button } from "@/components/ui/button";
+import {
+  DropdownMenu,
+  DropdownMenuContent,
+  DropdownMenuItem,
+  DropdownMenuSeparator,
+  DropdownMenuTrigger,
+} from "@/components/ui/dropdown-menu";
+import { cn } from "@/lib/utils";
+import ShareModal from "@/components/ShareModal";
+import ShareLinkModal from "@/components/ShareLinkModal";
+import { shareExtraction, createShareLink } from "@/services/api";
+// Helper functions from ExtractionOutput
+function prepareFieldsForOutput(fields, format = "json") {
+  if (!fields || typeof fields !== "object") {
+    return fields;
+  }
+  const output = { ...fields };
+  // Extract Fields from root level if it exists
+  const rootFields = output.Fields;
+  // Remove Fields from output temporarily (will be added back at top)
+  delete output.Fields;
+  // Remove full_text from top-level if pages array exists (to avoid duplication)
+  if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
+    delete output.full_text;
+    // Clean up each page: remove full_text from page.fields (it duplicates page.text)
+    output.pages = output.pages.map(page => {
+      const cleanedPage = { ...page };
+      if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
+        const cleanedFields = { ...cleanedPage.fields };
+        // Remove full_text from page fields (duplicates page.text)
+        delete cleanedFields.full_text;
+        cleanedPage.fields = cleanedFields;
+      }
+      return cleanedPage;
+    });
+  }
+  // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
+  if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
+    // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
+    const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields"));
+    output.pages.forEach((page, idx) => {
+      const pageNum = page.page_number || idx + 1;
+      const pageFields = page.fields || {};
+      // Remove duplicate fields from page.fields:
+      // 1. Remove full_text (duplicates page.text)
+      // 2. Remove fields that match top-level fields (already shown at root)
+      const cleanedPageFields = {};
+      for (const [key, value] of Object.entries(pageFields)) {
+        // Skip full_text and fields that match top-level exactly
+        if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
+          cleanedPageFields[key] = value;
+        }
+      }
+      const pageObj = {
+        text: page.text || "",
+        confidence: page.confidence || 0,
+        doc_type: page.doc_type || "other"
+      };
+      // Add table and footer_notes if they exist
+      if (page.table && Array.isArray(page.table) && page.table.length > 0) {
+        pageObj.table = page.table;
+      }
+      if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) {
+        pageObj.footer_notes = page.footer_notes;
+      }
+      // Only add fields if there are unique page-specific fields
+      if (Object.keys(cleanedPageFields).length > 0) {
+        pageObj.fields = cleanedPageFields;
+      }
+      output[`page_${pageNum}`] = pageObj;
+    });
+    // Remove pages array - we now have page_1, page_2, etc. as separate fields
+    delete output.pages;
+  }
+  // Handle page_X structure (from backend) - remove Fields from page objects if they exist
+  if (output && typeof output === "object") {
+    const pageKeys = Object.keys(output).filter(k => k.startsWith("page_"));
+    for (const pageKey of pageKeys) {
+      const pageData = output[pageKey];
+      if (pageData && typeof pageData === "object") {
+        // Remove Fields from page objects (it's now at root level)
+        delete pageData.Fields;
+        delete pageData.metadata;
+      }
+    }
+  }
+  // Rebuild output with Fields at the top (only if it exists and is not empty)
+  const finalOutput = {};
+  if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) {
+    finalOutput.Fields = rootFields;
+  }
+  // Add all other keys
+  Object.keys(output).forEach(key => {
+    finalOutput[key] = output[key];
+  });
+  return finalOutput;
+}
+function escapeXML(str) {
+  return str
+    .replace(/&/g, "&amp;")
+    .replace(/</g, "&lt;")
+    .replace(/>/g, "&gt;")
+    .replace(/"/g, "&quot;")
+    .replace(/'/g, "&apos;");
+}
+function objectToXML(obj, rootName = "extraction") {
+  // Prepare fields - remove full_text if pages exist
+  const preparedObj = prepareFieldsForOutput(obj, "xml");
+  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
+  const convert = (obj, indent = "  ") => {
+    for (const [key, value] of Object.entries(obj)) {
+      if (value === null || value === undefined) continue;
+      // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
+      if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
+        continue;
+      }
+      if (Array.isArray(value)) {
+        value.forEach((item) => {
+          xml += `${indent}<${key}>\n`;
+          if (typeof item === "object") {
+            convert(item, indent + "  ");
+          } else {
+            xml += `${indent}  ${escapeXML(String(item))}\n`;
+          }
+          xml += `${indent}</${key}>\n`;
+        });
+      } else if (typeof value === "object") {
+        xml += `${indent}<${key}>\n`;
+        convert(value, indent + "  ");
+        xml += `${indent}</${key}>\n`;
+      } else {
+        xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
+      }
+    }
+  };
+  convert(preparedObj);
+  xml += `</${rootName}>`;
+  return xml;
+}
+export default function ExportButtons({ isComplete, extractionResult }) {
+  const [downloading, setDownloading] = useState(null);
+  const [copied, setCopied] = useState(false);
+  const [isShareModalOpen, setIsShareModalOpen] = useState(false);
+  const [isShareLinkModalOpen, setIsShareLinkModalOpen] = useState(false);
+  const [shareLink, setShareLink] = useState("");
+  const [isGeneratingLink, setIsGeneratingLink] = useState(false);
+  // Helper function to extract text from fields (same as in ExtractionOutput)
+  const extractTextFromFields = (fields) => {
+    if (!fields || typeof fields !== "object") {
+      return "";
+    }
+    // Check for page_X structure first (preferred format)
+    const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_"));
+    if (pageKeys.length > 0) {
+      // Get text from first page (or combine all pages)
+      const pageTexts = pageKeys.map(key => {
+        const page = fields[key];
+        if (page && page.text) {
+          return page.text;
+        }
+        return "";
+      }).filter(text => text);
+      if (pageTexts.length > 0) {
+        return pageTexts.join("\n\n");
+      }
+    }
+    // Fallback to full_text
+    if (fields.full_text) {
+      return fields.full_text;
+    }
+    return "";
+  };
+  // Helper function to escape HTML
+  const escapeHtml = (text) => {
+    if (!text) return '';
+    const div = document.createElement('div');
+    div.textContent = text;
+    return div.innerHTML;
+  };
+  // Helper function to convert pipe-separated tables to HTML tables
+  const convertPipeTablesToHTML = (text) => {
+    if (!text) return text;
+    const lines = text.split('\n');
+    const result = [];
+    let i = 0;
+    while (i < lines.length) {
+      const line = lines[i];
+      // Check if this line looks like a table row (has multiple pipes)
+      if (line.includes('|') && line.split('|').length >= 3) {
+        // Check if it's a separator line (only |, -, :, spaces)
+        const isSeparator = /^[\s|\-:]+$/.test(line.trim());
+        if (!isSeparator) {
+          // Start of a table - collect all table rows
+          const tableRows = [];
+          let j = i;
+          // Collect header row
+          const headerLine = lines[j];
+          const headerCells = headerLine.split('|').map(cell => cell.trim()).filter(cell => cell || cell === '');
+          // Remove empty cells at start/end
+          if (headerCells.length > 0 && !headerCells[0]) headerCells.shift();
+          if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop();
+          if (headerCells.length >= 2) {
+            tableRows.push(headerCells);
+            j++;
+            // Skip separator line if present
+            if (j < lines.length && /^[\s|\-:]+$/.test(lines[j].trim())) {
+              j++;
+            }
+            // Collect data rows
+            while (j < lines.length) {
+              const rowLine = lines[j];
+              if (!rowLine.trim()) break; // Empty line ends table
+              // Check if it's still a table row
+              if (rowLine.includes('|') && rowLine.split('|').length >= 2) {
+                const isRowSeparator = /^[\s|\-:]+$/.test(rowLine.trim());
+                if (!isRowSeparator) {
+                  const rowCells = rowLine.split('|').map(cell => cell.trim());
+                  // Remove empty cells at start/end
+                  if (rowCells.length > 0 && !rowCells[0]) rowCells.shift();
+                  if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop();
+                  tableRows.push(rowCells);
+                  j++;
+                } else {
+                  j++;
+                }
+              } else {
+                break; // Not a table row anymore
+              }
+            }
+            // Convert to HTML table
+            if (tableRows.length > 0) {
+              let htmlTable = '<table class="border-collapse border border-gray-300 w-full my-4">\n<thead>\n<tr>';
+              // Header row
+              tableRows[0].forEach(cell => {
+                htmlTable += `<th class="border border-gray-300 px-4 py-2 bg-gray-100 font-semibold text-left">${escapeHtml(cell)}</th>`;
+              });
+              htmlTable += '</tr>\n</thead>\n<tbody>\n';
+              // Data rows
+              for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) {
+                htmlTable += '<tr>';
+                tableRows[rowIdx].forEach((cell, colIdx) => {
+                  // Use header cell count to ensure alignment
+                  const cellContent = cell || '';
+                  htmlTable += `<td class="border border-gray-300 px-4 py-2">${escapeHtml(cellContent)}</td>`;
+                });
+                htmlTable += '</tr>\n';
+              }
+              htmlTable += '</tbody>\n</table>';
+              result.push(htmlTable);
+              i = j;
+              continue;
+            }
+          }
+        }
+      }
+      // Not a table row, add as-is
+      result.push(line);
+      i++;
+    }
+    return result.join('\n');
+  };
+  // Helper function to render markdown to HTML (same as in ExtractionOutput)
+  const renderMarkdownToHTML = (text) => {
+    if (!text) return "";
+    let html = text;
+    // FIRST: Convert pipe-separated tables to HTML tables
+    html = convertPipeTablesToHTML(html);
+    // Convert LaTeX-style superscripts/subscripts FIRST
+    html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '<sup>$1</sup>');
+    html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '<sup>$1</sup>');
+    html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '<sub>$1</sub>');
+    html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '<sub>$1</sub>');
+    // Protect HTML table blocks
+    const htmlBlocks = [];
+    let htmlBlockIndex = 0;
+    html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => {
+      const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`;
+      htmlBlocks[htmlBlockIndex] = match;
+      htmlBlockIndex++;
+      return placeholder;
+    });
+    // Convert markdown headers
+    html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>');
+    html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>');
+    html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>');
+    // Convert markdown bold/italic
+    html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
+    html = html.replace(/\*(.*?)\*/g, '<em>$1</em>');
+    // Convert markdown links
+    html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>');
+    // Process line breaks
+    const parts = html.split(/(__HTML_BLOCK_\d+__)/);
+    const processedParts = parts.map((part) => {
+      if (part.match(/^__HTML_BLOCK_\d+__$/)) {
+        const blockIndex = parseInt(part.match(/\d+/)[0]);
+        return htmlBlocks[blockIndex];
+      } else {
+        let processed = part;
+        processed = processed.replace(/\n\n+/g, '</p><p>');
+        processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2');
+        if (processed.trim() && !processed.trim().startsWith('<')) {
+          processed = '<p>' + processed + '</p>';
+        }
+        return processed;
+      }
+    });
+    html = processedParts.join('');
+    html = html.replace(/<p><\/p>/g, '');
+    html = html.replace(/<p>\s*<br>\s*<\/p>/g, '');
+    html = html.replace(/<p>\s*<\/p>/g, '');
+    return html;
+  };
+  const handleDownload = async (format) => {
+    if (!extractionResult || !extractionResult.fields) {
+      console.error("No extraction data available");
+      return;
+    }
+    setDownloading(format);
+    try {
+      const fields = extractionResult.fields;
+      let content = "";
+      let filename = "";
+      let mimeType = "";
+      if (format === "json") {
+        const preparedFields = prepareFieldsForOutput(fields, "json");
+        content = JSON.stringify(preparedFields, null, 2);
+        filename = `extraction_${new Date().toISOString().split('T')[0]}.json`;
+        mimeType = "application/json";
+      } else if (format === "xml") {
+        content = objectToXML(fields);
+        filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`;
+        mimeType = "application/xml";
+      } else if (format === "docx") {
+        // For DOCX, create a Word-compatible HTML document that preserves layout
+        // Extract text and convert to HTML (same as text viewer)
+        const textContent = extractTextFromFields(fields);
+        const htmlContent = renderMarkdownToHTML(textContent);
+        // Create a Word-compatible HTML document with proper MIME type
+        // Word can open HTML files with .docx extension if we use the right MIME type
+        const wordHTML = `<!DOCTYPE html>
+<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns="http://www.w3.org/TR/REC-html40">
+<head>
+  <meta charset="UTF-8">
+  <meta name="ProgId" content="Word.Document">
+  <meta name="Generator" content="Microsoft Word">
+  <meta name="Originator" content="Microsoft Word">
+  <!--[if gte mso 9]><xml>
+   <w:WordDocument>
+    <w:View>Print</w:View>
+    <w:Zoom>100</w:Zoom>
+    <w:DoNotOptimizeForBrowser/>
+   </w:WordDocument>
+  </xml><![endif]-->
+  <title>Document Extraction</title>
+  <style>
+    @page {
+      size: 8.5in 11in;
+      margin: 1in;
+    }
+    body {
+      font-family: 'Calibri', 'Arial', sans-serif;
+      font-size: 11pt;
+      line-height: 1.6;
+      margin: 0;
+      color: #333;
+    }
+    h1 {
+      font-size: 18pt;
+      font-weight: bold;
+      color: #0f172a;
+      margin-top: 24pt;
+      margin-bottom: 12pt;
+      page-break-after: avoid;
+    }
+    h2 {
+      font-size: 16pt;
+      font-weight: 600;
+      color: #0f172a;
+      margin-top: 20pt;
+      margin-bottom: 10pt;
+      page-break-after: avoid;
+    }
+    h3 {
+      font-size: 14pt;
+      font-weight: 600;
+      color: #1e293b;
+      margin-top: 16pt;
+      margin-bottom: 8pt;
+      page-break-after: avoid;
+    }
+    p {
+      margin-top: 6pt;
+      margin-bottom: 6pt;
+    }
+    table {
+      width: 100%;
+      border-collapse: collapse;
+      margin: 12pt 0;
+      font-size: 10pt;
+      page-break-inside: avoid;
+    }
+    table th {
+      background-color: #f8fafc;
+      border: 1pt solid #cbd5e1;
+      padding: 6pt;
+      text-align: left;
+      font-weight: 600;
+      color: #0f172a;
+    }
+    table td {
+      border: 1pt solid #cbd5e1;
+      padding: 6pt;
+      color: #334155;
+    }
+    table tr:nth-child(even) {
+      background-color: #f8fafc;
+    }
+    sup {
+      font-size: 0.75em;
+      vertical-align: super;
+      line-height: 0;
+    }
+    sub {
+      font-size: 0.75em;
+      vertical-align: sub;
+      line-height: 0;
+    }
+    strong {
+      font-weight: 600;
+    }
+    em {
+      font-style: italic;
+    }
+    a {
+      color: #4f46e5;
+      text-decoration: underline;
+    }
+  </style>
+</head>
+<body>
+${htmlContent}
+</body>
+</html>`;
+        content = wordHTML;
+        filename = `extraction_${new Date().toISOString().split('T')[0]}.doc`;
+        mimeType = "application/msword";
+      }
+      // Create blob and download
+      const blob = new Blob([content], { type: mimeType });
+      const url = URL.createObjectURL(blob);
+      const link = document.createElement("a");
+      link.href = url;
+      link.download = filename;
+      document.body.appendChild(link);
+      link.click();
+      document.body.removeChild(link);
+      URL.revokeObjectURL(url);
+      setDownloading(null);
+    } catch (error) {
+      console.error("Download error:", error);
+      setDownloading(null);
+    }
+  };
+  const handleCopyLink = async () => {
+    if (!extractionResult?.id) return;
+    setIsGeneratingLink(true);
+    setIsShareLinkModalOpen(true);
+    setShareLink("");
+    try {
+      const result = await createShareLink(extractionResult.id);
+      if (result.success && result.share_link) {
+        setShareLink(result.share_link);
+      } else {
+        throw new Error("Failed to generate share link");
+      }
+    } catch (err) {
+      console.error("Failed to create share link:", err);
+      setShareLink("");
+      // Still show modal but with error state
+    } finally {
+      setIsGeneratingLink(false);
+    }
+  };
+  const handleShare = async (extractionId, recipientEmail) => {
+    await shareExtraction(extractionId, recipientEmail);
+  };
+  if (!isComplete) return null;
+  return (
+    <motion.div
+      initial={{ opacity: 0, y: 20 }}
+      animate={{ opacity: 1, y: 0 }}
+      className="flex items-center gap-3"
+    >
+      {/* Export Options Dropdown */}
+      <DropdownMenu>
+        <DropdownMenuTrigger asChild>
+          <Button
+            variant="ghost"
+            className="h-11 w-11 rounded-xl hover:bg-slate-100"
+            disabled={downloading !== null}
+          >
+            {downloading ? (
+              <motion.div
+                animate={{ rotate: 360 }}
+                transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
+              >
+                <Download className="h-4 w-4" />
+              </motion.div>
+            ) : (
+              <Share2 className="h-4 w-4" />
+            )}
+          </Button>
+        </DropdownMenuTrigger>
+        <DropdownMenuContent align="end" className="w-56 rounded-xl p-2">
+          <DropdownMenuItem
+            className="rounded-lg cursor-pointer"
+            onClick={() => setIsShareModalOpen(true)}
+          >
+            <Mail className="h-4 w-4 mr-2 text-indigo-600" />
+            Share output
+          </DropdownMenuItem>
+          <DropdownMenuItem
+            className="rounded-lg cursor-pointer"
+            onClick={handleCopyLink}
+          >
+            <Link2 className="h-4 w-4 mr-2 text-indigo-600" />
+            Copy share link
+          </DropdownMenuItem>
+          <DropdownMenuSeparator />
+          <DropdownMenuItem
+            className="rounded-lg cursor-pointer"
+            onClick={() => handleDownload("docx")}
+            disabled={downloading === "docx"}
+          >
+            {downloading === "docx" ? (
+              <motion.div
+                animate={{ rotate: 360 }}
+                transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
+                className="h-4 w-4 mr-2"
+              >
+                <Download className="h-4 w-4" />
+              </motion.div>
+            ) : (
+              <FileText className="h-4 w-4 mr-2 text-blue-600" />
+            )}
+            Download Docx
+          </DropdownMenuItem>
+          <DropdownMenuItem
+            className="rounded-lg cursor-pointer"
+            onClick={() => handleDownload("json")}
+            disabled={downloading === "json"}
+          >
+            {downloading === "json" ? (
+              <motion.div
+                animate={{ rotate: 360 }}
+                transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
+                className="h-4 w-4 mr-2"
+              >
+                <Download className="h-4 w-4" />
+              </motion.div>
+            ) : (
+              <Braces className="h-4 w-4 mr-2 text-indigo-600" />
+            )}
+            Download JSON
+          </DropdownMenuItem>
+          <DropdownMenuItem
+            className="rounded-lg cursor-pointer"
+            onClick={() => handleDownload("xml")}
+            disabled={downloading === "xml"}
+          >
+            {downloading === "xml" ? (
+              <motion.div
+                animate={{ rotate: 360 }}
+                transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
+                className="h-4 w-4 mr-2"
+              >
+                <Download className="h-4 w-4" />
+              </motion.div>
+            ) : (
+              <FileCode2 className="h-4 w-4 mr-2 text-slate-600" />
+            )}
+            Download XML
+          </DropdownMenuItem>
+        </DropdownMenuContent>
+      </DropdownMenu>
+      {/* Share Modal */}
+      <ShareModal
+        isOpen={isShareModalOpen}
+        onClose={() => setIsShareModalOpen(false)}
+        onShare={handleShare}
+        extractionId={extractionResult?.id}
+      />
+      {/* Share Link Modal */}
+      <ShareLinkModal
+        isOpen={isShareLinkModalOpen}
+        onClose={() => {
+          setIsShareLinkModalOpen(false);
+          setShareLink("");
+        }}
+        shareLink={shareLink}
+        isLoading={isGeneratingLink}
+      />
+    </motion.div>
+  );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/components/ShareLinkModal.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 import React, { useState, useEffect } from "react";
 import { motion, AnimatePresence } from "framer-motion";
 import { X, Copy, Check, Loader2 } from "lucide-react";
@@ -139,3 +140,146 @@ export default function ShareLinkModal({ isOpen, onClose, shareLink, isLoading }
   );
 }

+<<<<<<< HEAD
 import React, { useState, useEffect } from "react";
 import { motion, AnimatePresence } from "framer-motion";
 import { X, Copy, Check, Loader2 } from "lucide-react";
   );
 }
+=======
+import React, { useState, useEffect } from "react";
+import { motion, AnimatePresence } from "framer-motion";
+import { X, Copy, Check, Loader2 } from "lucide-react";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+export default function ShareLinkModal({ isOpen, onClose, shareLink, isLoading }) {
+  const [copied, setCopied] = useState(false);
+  useEffect(() => {
+    if (!isOpen) {
+      setCopied(false);
+    }
+  }, [isOpen]);
+  const handleCopy = async () => {
+    if (!shareLink) return;
+    try {
+      await navigator.clipboard.writeText(shareLink);
+      setCopied(true);
+      setTimeout(() => setCopied(false), 2000);
+    } catch (err) {
+      // Fallback for older browsers
+      const textArea = document.createElement("textarea");
+      textArea.value = shareLink;
+      textArea.style.position = "fixed";
+      textArea.style.opacity = "0";
+      document.body.appendChild(textArea);
+      textArea.select();
+      try {
+        document.execCommand("copy");
+        setCopied(true);
+        setTimeout(() => setCopied(false), 2000);
+      } catch (fallbackErr) {
+        console.error("Failed to copy:", fallbackErr);
+      }
+      document.body.removeChild(textArea);
+    }
+  };
+  if (!isOpen) return null;
+  return (
+    <AnimatePresence>
+      <div className="fixed inset-0 z-50 flex items-center justify-center">
+        {/* Backdrop */}
+        <motion.div
+          initial={{ opacity: 0 }}
+          animate={{ opacity: 1 }}
+          exit={{ opacity: 0 }}
+          className="absolute inset-0 bg-black/50 backdrop-blur-sm"
+          onClick={onClose}
+        />
+        {/* Modal */}
+        <motion.div
+          initial={{ opacity: 0, scale: 0.95, y: 20 }}
+          animate={{ opacity: 1, scale: 1, y: 0 }}
+          exit={{ opacity: 0, scale: 0.95, y: 20 }}
+          className="relative z-10 w-full max-w-md mx-4 bg-white rounded-2xl shadow-2xl overflow-hidden"
+          onClick={(e) => e.stopPropagation()}
+        >
+          {/* Header */}
+          <div className="px-6 py-4 border-b border-slate-200 flex items-center justify-between">
+            <h2 className="text-xl font-semibold text-slate-900">Copy Share Link</h2>
+            <button
+              onClick={onClose}
+              disabled={isLoading}
+              className="p-2 rounded-lg hover:bg-slate-100 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
+            >
+              <X className="h-5 w-5 text-slate-500" />
+            </button>
+          </div>
+          {/* Content */}
+          <div className="px-6 py-6">
+            {isLoading ? (
+              <div className="text-center py-8">
+                <Loader2 className="h-8 w-8 mx-auto mb-4 text-indigo-600 animate-spin" />
+                <p className="text-sm text-slate-600">Generating share link...</p>
+              </div>
+            ) : shareLink ? (
+              <div className="space-y-4">
+                <div>
+                  <label className="block text-sm font-medium text-slate-700 mb-2">
+                    Share Link
+                  </label>
+                  <div className="flex gap-2">
+                    <Input
+                      type="text"
+                      value={shareLink}
+                      readOnly
+                      className="flex-1 h-12 rounded-xl border-slate-200 bg-slate-50 text-sm font-mono"
+                    />
+                    <Button
+                      onClick={handleCopy}
+                      className="h-12 px-4 rounded-xl bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700"
+                    >
+                      {copied ? (
+                        <>
+                          <Check className="h-4 w-4 mr-2" />
+                          Copied!
+                        </>
+                      ) : (
+                        <>
+                          <Copy className="h-4 w-4 mr-2" />
+                          Copy
+                        </>
+                      )}
+                    </Button>
+                  </div>
+                </div>
+                <p className="text-xs text-slate-500">
+                  Share this link with anyone you want to give access to this extraction. They'll need to sign in to view it.
+                </p>
+              </div>
+            ) : (
+              <div className="text-center py-8">
+                <p className="text-sm text-slate-600">No share link available</p>
+              </div>
+            )}
+            <div className="pt-4 mt-6 border-t border-slate-200">
+              <Button
+                type="button"
+                variant="outline"
+                onClick={onClose}
+                disabled={isLoading}
+                className="w-full h-11 rounded-xl"
+              >
+                Close
+              </Button>
+            </div>
+          </div>
+        </motion.div>
+      </div>
+    </AnimatePresence>
+  );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/components/ShareModal.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 import React, { useState } from "react";
 import { motion, AnimatePresence } from "framer-motion";
 import { X, Mail, Send, Loader2 } from "lucide-react";
@@ -195,3 +196,202 @@ export default function ShareModal({ isOpen, onClose, onShare, extractionId }) {
   );
 }

+<<<<<<< HEAD
 import React, { useState } from "react";
 import { motion, AnimatePresence } from "framer-motion";
 import { X, Mail, Send, Loader2 } from "lucide-react";
   );
 }
+=======
+import React, { useState } from "react";
+import { motion, AnimatePresence } from "framer-motion";
+import { X, Mail, Send, Loader2 } from "lucide-react";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+export default function ShareModal({ isOpen, onClose, onShare, extractionId }) {
+  const [email, setEmail] = useState("");
+  const [isLoading, setIsLoading] = useState(false);
+  const [error, setError] = useState("");
+  const [success, setSuccess] = useState(false);
+  const [successMessage, setSuccessMessage] = useState("");
+  const handleSubmit = async (e) => {
+    e.preventDefault();
+    setError("");
+    setSuccess(false);
+    // Parse and validate multiple emails (comma or semicolon separated)
+    if (!email.trim()) {
+      setError("Please enter at least one recipient email address");
+      return;
+    }
+    // Split by comma or semicolon, trim each email, and filter out empty strings
+    const emailList = email
+      .split(/[,;]/)
+      .map((e) => e.trim())
+      .filter((e) => e.length > 0);
+    if (emailList.length === 0) {
+      setError("Please enter at least one recipient email address");
+      return;
+    }
+    // Validate each email
+    const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
+    const invalidEmails = emailList.filter((e) => !emailRegex.test(e));
+    if (invalidEmails.length > 0) {
+      setError(`Invalid email address(es): ${invalidEmails.join(", ")}`);
+      return;
+    }
+    setIsLoading(true);
+    try {
+      const result = await onShare(extractionId, emailList);
+      setSuccessMessage(result?.message || `Successfully shared with ${emailList.length} recipient(s)`);
+      setSuccess(true);
+      setEmail("");
+      // Close modal after 2 seconds
+      setTimeout(() => {
+        setSuccess(false);
+        setSuccessMessage("");
+        onClose();
+      }, 2000);
+    } catch (err) {
+      setError(err.message || "Failed to share extraction. Please try again.");
+    } finally {
+      setIsLoading(false);
+    }
+  };
+  const handleClose = () => {
+    if (!isLoading) {
+      setEmail("");
+      setError("");
+      setSuccess(false);
+      onClose();
+    }
+  };
+  if (!isOpen) return null;
+  return (
+    <AnimatePresence>
+      <div className="fixed inset-0 z-50 flex items-center justify-center">
+        {/* Backdrop */}
+        <motion.div
+          initial={{ opacity: 0 }}
+          animate={{ opacity: 1 }}
+          exit={{ opacity: 0 }}
+          className="absolute inset-0 bg-black/50 backdrop-blur-sm"
+          onClick={handleClose}
+        />
+        {/* Modal */}
+        <motion.div
+          initial={{ opacity: 0, scale: 0.95, y: 20 }}
+          animate={{ opacity: 1, scale: 1, y: 0 }}
+          exit={{ opacity: 0, scale: 0.95, y: 20 }}
+          className="relative z-10 w-full max-w-md mx-4 bg-white rounded-2xl shadow-2xl overflow-hidden"
+          onClick={(e) => e.stopPropagation()}
+        >
+          {/* Header */}
+          <div className="px-6 py-4 border-b border-slate-200 flex items-center justify-between">
+            <h2 className="text-xl font-semibold text-slate-900">Share Output</h2>
+            <button
+              onClick={handleClose}
+              disabled={isLoading}
+              className="p-2 rounded-lg hover:bg-slate-100 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
+            >
+              <X className="h-5 w-5 text-slate-500" />
+            </button>
+          </div>
+          {/* Content */}
+          <div className="px-6 py-6">
+            {success ? (
+              <motion.div
+                initial={{ opacity: 0, scale: 0.9 }}
+                animate={{ opacity: 1, scale: 1 }}
+                className="text-center py-8"
+              >
+                <div className="w-16 h-16 mx-auto mb-4 rounded-full bg-emerald-100 flex items-center justify-center">
+                  <Send className="h-8 w-8 text-emerald-600" />
+                </div>
+                <h3 className="text-lg font-semibold text-slate-900 mb-2">
+                  Share Sent Successfully!
+                </h3>
+                <p className="text-sm text-slate-600">
+                  {successMessage || "The recipient(s) will receive an email with a link to view the extraction."}
+                </p>
+              </motion.div>
+            ) : (
+              <form onSubmit={handleSubmit} className="space-y-4">
+                <div>
+                  <label
+                    htmlFor="recipient-email"
+                    className="block text-sm font-medium text-slate-700 mb-2"
+                  >
+                    Recipient Email(s)
+                  </label>
+                  <p className="text-xs text-slate-500 mb-2">
+                    Separate multiple emails with commas or semicolons
+                  </p>
+                  <div className="relative">
+                    <Mail className="absolute left-3 top-1/2 -translate-y-1/2 h-5 w-5 text-slate-400" />
+                    <Input
+                      id="recipient-email"
+                      type="text"
+                      value={email}
+                      onChange={(e) => setEmail(e.target.value)}
+                      placeholder="Enter email addresses (comma or semicolon separated)"
+                      className="pl-10 h-12 rounded-xl border-slate-200 focus:border-indigo-500 focus:ring-indigo-500"
+                      disabled={isLoading}
+                      autoFocus
+                    />
+                  </div>
+                  {error && (
+                    <motion.p
+                      initial={{ opacity: 0, y: -10 }}
+                      animate={{ opacity: 1, y: 0 }}
+                      className="mt-2 text-sm text-red-600"
+                    >
+                      {error}
+                    </motion.p>
+                  )}
+                </div>
+                <div className="pt-4 flex gap-3">
+                  <Button
+                    type="button"
+                    variant="outline"
+                    onClick={handleClose}
+                    disabled={isLoading}
+                    className="flex-1 h-11 rounded-xl"
+                  >
+                    Cancel
+                  </Button>
+                  <Button
+                    type="submit"
+                    disabled={isLoading || !email.trim()}
+                    className="flex-1 h-11 rounded-xl bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700"
+                  >
+                    {isLoading ? (
+                      <>
+                        <Loader2 className="h-4 w-4 mr-2 animate-spin" />
+                        Sending...
+                      </>
+                    ) : (
+                      <>
+                        <Send className="h-4 w-4 mr-2" />
+                        Send
+                      </>
+                    )}
+                  </Button>
+                </div>
+              </form>
+            )}
+          </div>
+        </motion.div>
+      </div>
+    </AnimatePresence>
+  );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/components/auth/LoginForm.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 import React, { useState } from "react";
 import { motion } from "framer-motion";
 import { Button } from "@/components/ui/button";
@@ -510,3 +511,517 @@ export default function LoginForm() {
     </div>
   );
 }

+<<<<<<< HEAD
 import React, { useState } from "react";
 import { motion } from "framer-motion";
 import { Button } from "@/components/ui/button";
     </div>
   );
 }
+=======
+import React, { useState } from "react";
+import { motion } from "framer-motion";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { Separator } from "@/components/ui/separator";
+import {
+  Zap,
+  Target,
+  Upload,
+  CheckCircle2,
+  ArrowRight,
+  Mail,
+  Sparkles,
+  Shield,
+  Globe,
+  AlertCircle,
+  Loader2,
+} from "lucide-react";
+import { useAuth } from "@/contexts/AuthContext";
+export default function LoginForm() {
+  const { firebaseLogin, requestOTP, verifyOTP } = useAuth();
+  const [email, setEmail] = useState("");
+  const [showOtp, setShowOtp] = useState(false);
+  const [otp, setOtp] = useState(["", "", "", "", "", ""]);
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState("");
+  // Business email validation
+  const PERSONAL_EMAIL_DOMAINS = [
+    "gmail.com",
+    "yahoo.com",
+    "hotmail.com",
+    "outlook.com",
+    "aol.com",
+    "icloud.com",
+    "mail.com",
+    "protonmail.com",
+    "yandex.com",
+    "zoho.com",
+    "gmx.com",
+    "live.com",
+    "msn.com",
+  ];
+  const isBusinessEmail = (email) => {
+    if (!email || !email.includes("@")) return false;
+    const domain = email.split("@")[1].toLowerCase();
+    return !PERSONAL_EMAIL_DOMAINS.includes(domain);
+  };
+  const handleGoogleLogin = async () => {
+    setLoading(true);
+    setError("");
+    try {
+      await firebaseLogin();
+    } catch (err) {
+      setError(err.message || "Failed to sign in with Google");
+    } finally {
+      setLoading(false);
+    }
+  };
+  const handleEmailSubmit = async (e) => {
+    e.preventDefault();
+    setLoading(true);
+    setError("");
+    if (!email) {
+      setError("Please enter your email address");
+      setLoading(false);
+      return;
+    }
+    if (!isBusinessEmail(email)) {
+      setError("Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, etc.) are not permitted.");
+      setLoading(false);
+      return;
+    }
+    try {
+      await requestOTP(email);
+      setShowOtp(true);
+    } catch (err) {
+      setError(err.message || "Failed to send OTP");
+    } finally {
+      setLoading(false);
+    }
+  };
+  const handleOtpChange = (index, value) => {
+    if (value.length <= 1 && /^\d*$/.test(value)) {
+      const newOtp = [...otp];
+      newOtp[index] = value;
+      setOtp(newOtp);
+      setError("");
+      // Auto-focus next input
+      if (value && index < 5) {
+        const nextInput = document.getElementById(`otp-${index + 1}`);
+        nextInput?.focus();
+      }
+    }
+  };
+  const handleOtpPaste = (e, startIndex = 0) => {
+    e.preventDefault();
+    const pastedData = e.clipboardData.getData("text");
+    // Extract only digits from pasted content
+    const digits = pastedData.replace(/\D/g, "").slice(0, 6);
+    if (digits.length > 0) {
+      const newOtp = [...otp];
+      // Fill the OTP array with pasted digits starting from the current field
+      for (let i = 0; i < digits.length && (startIndex + i) < 6; i++) {
+        newOtp[startIndex + i] = digits[i];
+      }
+      setOtp(newOtp);
+      setError("");
+      // Focus on the next empty input or the last input if all are filled
+      const nextEmptyIndex = Math.min(startIndex + digits.length, 5);
+      const nextInput = document.getElementById(`otp-${nextEmptyIndex}`);
+      nextInput?.focus();
+    }
+  };
+  const handleOtpKeyDown = (index, e) => {
+    if (e.key === "Backspace" && !otp[index] && index > 0) {
+      const prevInput = document.getElementById(`otp-${index - 1}`);
+      prevInput?.focus();
+    }
+  };
+  const handleOtpVerify = async (e) => {
+    e.preventDefault();
+    setLoading(true);
+    setError("");
+    const otpString = otp.join("");
+    if (otpString.length !== 6) {
+      setError("Please enter a valid 6-digit OTP");
+      setLoading(false);
+      return;
+    }
+    try {
+      await verifyOTP(email, otpString);
+      // Success - user will be redirected by AuthContext
+    } catch (err) {
+      setError(err.message || "Invalid OTP. Please try again.");
+      setOtp(["", "", "", "", "", ""]);
+    } finally {
+      setLoading(false);
+    }
+  };
+  const features = [
+    {
+      icon: Zap,
+      title: "Lightning Fast",
+      description: "Process documents in seconds and get outputs for ERP ingestion",
+      color: "text-amber-500",
+      bg: "bg-amber-50",
+    },
+    {
+      icon: Target,
+      title: "100% Accuracy",
+      description: "Industry-leading extraction with Visual Reasoning Processor",
+      color: "text-emerald-500",
+      bg: "bg-emerald-50",
+    },
+    {
+      icon: Globe,
+      title: "Any Format, Any Language",
+      description: "PDF, images, scanned docs — multi-lingual support included",
+      color: "text-blue-500",
+      bg: "bg-blue-50",
+    },
+  ];
+  const supportedFormats = [
+    { ext: "PDF", color: "bg-red-500" },
+    { ext: "PNG", color: "bg-blue-500" },
+    { ext: "JPG", color: "bg-green-500" },
+    { ext: "TIFF", color: "bg-purple-500" },
+  ];
+  return (
+    <div className="min-h-screen bg-gradient-to-br from-slate-50 via-white to-blue-50 flex">
+      {/* Left Side - Product Showcase */}
+      <div className="hidden lg:flex lg:w-[56%] flex-col justify-between p-8 relative overflow-hidden">
+        {/* Background Elements */}
+        <div className="absolute top-0 right-0 w-96 h-96 bg-blue-100/40 rounded-full blur-3xl -translate-y-1/2 translate-x-1/2" />
+        <div className="absolute bottom-0 left-0 w-80 h-80 bg-emerald-100/40 rounded-full blur-3xl translate-y-1/2 -translate-x-1/2" />
+        {/* Logo & Brand */}
+        <motion.div
+          initial={{ opacity: 0, y: -20 }}
+          animate={{ opacity: 1, y: 0 }}
+          className="relative z-10 mb-6"
+        >
+          <div className="flex items-center gap-3">
+            <div className="h-12 w-12 flex items-center justify-center flex-shrink-0">
+              <img
+                src="/logo.png"
+                alt="EZOFIS AI Logo"
+                className="h-full w-full object-contain"
+                onError={(e) => {
+                  // Fallback: hide image if logo not found
+                  e.target.style.display = 'none';
+                }}
+              />
+            </div>
+            <div>
+              <h1 className="text-2xl font-bold text-slate-900 tracking-tight">EZOFISOCR</h1>
+              <p className="text-sm text-slate-500 font-medium">VRP Intelligence</p>
+            </div>
+          </div>
+        </motion.div>
+        {/* Main Content */}
+        <motion.div
+          initial={{ opacity: 0, y: 20 }}
+          animate={{ opacity: 1, y: 0 }}
+          transition={{ delay: 0.1 }}
+          className="relative z-10 space-y-5 flex-1 flex flex-col justify-center ml-24 xl:ml-36"
+        >
+          <div className="space-y-3">
+            <h2 className="text-3xl xl:text-4xl font-bold text-slate-900 leading-tight">
+              Pure Agentic
+              <span className="block text-transparent bg-clip-text bg-gradient-to-r from-blue-600 to-indigo-600">
+                Document Intelligence
+              </span>
+            </h2>
+            <p className="text-base text-slate-600 max-w-lg leading-relaxed">
+              Deterministic, layout-aware extraction (without LLM) using our proprietary{" "}
+              <span className="font-semibold text-slate-800">Visual Reasoning Processor (VRP)</span>
+            </p>
+          </div>
+          {/* Product Preview Card */}
+          <motion.div
+            initial={{ opacity: 0, scale: 0.95 }}
+            animate={{ opacity: 1, scale: 1 }}
+            transition={{ delay: 0.3 }}
+            className="bg-white rounded-2xl border border-slate-200/80 shadow-xl shadow-slate-200/50 p-4 max-w-lg"
+          >
+            <div className="border-2 border-dashed border-slate-200 rounded-xl p-5 text-center bg-slate-50/50">
+              <div className="w-12 h-12 rounded-full bg-slate-100 flex items-center justify-center mx-auto mb-3">
+                <Upload className="w-5 h-5 text-slate-400" />
+              </div>
+              <p className="text-slate-700 font-medium mb-1 text-sm">Drop a document to extract data</p>
+              <p className="text-xs text-slate-400">Invoices, purchase orders, delivery notes, receipts, and operational documents</p>
+              <div className="flex items-center justify-center gap-2 mt-3">
+                {supportedFormats.map((format, i) => (
+                  <span key={i} className={`${format.color} text-white text-xs font-bold px-2 py-1 rounded`}>
+                    {format.ext}
+                  </span>
+                ))}
+              </div>
+            </div>
+            <div className="flex items-center justify-between mt-3 pt-3 border-t border-slate-100">
+              <div className="flex items-center gap-2">
+                <div className="w-2 h-2 rounded-full bg-emerald-500 animate-pulse" />
+                <span className="text-xs text-slate-600">Ready to extract</span>
+              </div>
+              <div className="flex items-center gap-1 text-emerald-600">
+                <CheckCircle2 className="w-3.5 h-3.5" />
+                <span className="text-xs font-semibold">99.8% Accuracy</span>
+              </div>
+            </div>
+          </motion.div>
+          {/* Features */}
+          <div className="grid gap-3">
+            {features.map((feature, index) => (
+              <motion.div
+                key={feature.title}
+                initial={{ opacity: 0, x: -20 }}
+                animate={{ opacity: 1, x: 0 }}
+                transition={{ delay: 0.4 + index * 0.1 }}
+                className="flex items-start gap-3 group"
+              >
+                <div
+                  className={`w-9 h-9 rounded-xl ${feature.bg} flex items-center justify-center flex-shrink-0 group-hover:scale-110 transition-transform`}
+                >
+                  <feature.icon className={`w-4 h-4 ${feature.color}`} />
+                </div>
+                <div>
+                  <h3 className="font-semibold text-slate-900 text-sm">{feature.title}</h3>
+                  <p className="text-xs text-slate-500">{feature.description}</p>
+                </div>
+              </motion.div>
+            ))}
+          </div>
+        </motion.div>
+        {/* Trust Badge */}
+        <motion.div
+          initial={{ opacity: 0 }}
+          animate={{ opacity: 1 }}
+          transition={{ delay: 0.6 }}
+          className="relative z-10 flex items-center gap-3 text-xs text-slate-500 mt-6"
+        >
+          <Shield className="w-4 h-4" />
+          <span>Enterprise-grade security • SOC 2 Compliant • GDPR Ready</span>
+        </motion.div>
+      </div>
+      {/* Right Side - Sign In Form */}
+      <div className="w-full lg:w-[44%] flex items-center justify-center p-6 sm:p-10">
+        <motion.div
+          initial={{ opacity: 0, y: 20 }}
+          animate={{ opacity: 1, y: 0 }}
+          transition={{ delay: 0.2 }}
+          className="w-full max-w-md"
+        >
+          {/* Mobile Logo */}
+          <div className="lg:hidden flex items-center justify-center gap-3 mb-8">
+            <div className="h-12 w-12 flex items-center justify-center flex-shrink-0">
+              <img
+                src="/logo.png"
+                alt="EZOFIS AI Logo"
+                className="h-full w-full object-contain"
+                onError={(e) => {
+                  // Fallback: hide image if logo not found
+                  e.target.style.display = 'none';
+                }}
+              />
+            </div>
+            <div>
+              <h1 className="text-2xl font-bold text-slate-900 tracking-tight">EZOFISOCR</h1>
+              <p className="text-sm text-slate-500 font-medium">VRP Intelligence</p>
+            </div>
+          </div>
+          <div className="bg-white rounded-3xl border border-slate-200/80 shadow-2xl shadow-slate-200/50 p-8 sm:p-10">
+            <div className="text-center mb-8">
+              <h2 className="text-2xl font-bold text-slate-900 mb-2">
+                {showOtp ? "Enter verification code" : "Secure Access"}
+              </h2>
+              <p className="text-slate-500">
+                {showOtp ? `We sent a code to ${email}` : "Access your document intelligence workspace"}
+              </p>
+            </div>
+            {/* Error Message */}
+            {error && (
+              <motion.div
+                initial={{ opacity: 0, y: -10 }}
+                animate={{ opacity: 1, y: 0 }}
+                className="mb-6 p-3 bg-red-50 border border-red-200 rounded-xl flex items-start gap-2 text-sm text-red-700"
+              >
+                <AlertCircle className="h-4 w-4 flex-shrink-0 mt-0.5" />
+                <p>{error}</p>
+              </motion.div>
+            )}
+            {!showOtp ? (
+              <>
+                {/* Google Sign In */}
+                <Button
+                  onClick={handleGoogleLogin}
+                  disabled={loading}
+                  variant="outline"
+                  className="w-full h-12 text-base font-medium border-slate-200 hover:bg-slate-50 hover:border-slate-300 transition-all group"
+                >
+                  {loading ? (
+                    <Loader2 className="w-5 h-5 mr-3 animate-spin" />
+                  ) : (
+                    <svg className="w-5 h-5 mr-3" viewBox="0 0 24 24">
+                      <path fill="#4285F4" d="M22.56 12.25c0-.78-.07-1.53-.2-2.25H12v4.26h5.92c-.26 1.37-1.04 2.53-2.21 3.31v2.77h3.57c2.08-1.92 3.28-4.74 3.28-8.09z" />
+                      <path fill="#34A853" d="M12 23c2.97 0 5.46-.98 7.28-2.66l-3.57-2.77c-.98.66-2.23 1.06-3.71 1.06-2.86 0-5.29-1.93-6.16-4.53H2.18v2.84C3.99 20.53 7.7 23 12 23z" />
+                      <path fill="#FBBC05" d="M5.84 14.09c-.22-.66-.35-1.36-.35-2.09s.13-1.43.35-2.09V7.07H2.18C1.43 8.55 1 10.22 1 12s.43 3.45 1.18 4.93l2.85-2.22.81-.62z" />
+                      <path fill="#EA4335" d="M12 5.38c1.62 0 3.06.56 4.21 1.64l3.15-3.15C17.45 2.09 14.97 1 12 1 7.7 1 3.99 3.47 2.18 7.07l3.66 2.84c.87-2.6 3.3-4.53 6.16-4.53z" />
+                    </svg>
+                  )}
+                  Continue with Google
+                  <ArrowRight className="w-4 h-4 ml-auto opacity-0 -translate-x-2 group-hover:opacity-100 group-hover:translate-x-0 transition-all" />
+                </Button>
+                <div className="relative my-8">
+                  <Separator />
+                  <span className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2 bg-white px-4 text-sm text-slate-400">
+                    or continue with email
+                  </span>
+                </div>
+                {/* Email Input */}
+                <form onSubmit={handleEmailSubmit} className="space-y-4">
+                  <div className="relative">
+                    <Mail className="absolute left-4 top-1/2 -translate-y-1/2 w-5 h-5 text-slate-400" />
+                    <Input
+                      type="email"
+                      placeholder="name@company.com"
+                      value={email}
+                      onChange={(e) => {
+                        setEmail(e.target.value);
+                        setError("");
+                      }}
+                      className="h-12 pl-12 text-base border-slate-200 focus:border-blue-500 focus:ring-blue-500"
+                    />
+                  </div>
+                  <Button
+                    type="submit"
+                    disabled={loading}
+                    className="w-full h-12 text-base font-medium bg-gradient-to-r from-blue-600 to-indigo-600 hover:from-blue-700 hover:to-indigo-700 shadow-lg shadow-blue-500/25 transition-all"
+                  >
+                    {loading ? (
+                      <>
+                        <Loader2 className="w-4 h-4 mr-2 animate-spin" />
+                        Sending...
+                      </>
+                    ) : (
+                      <>
+                        Continue with Email
+                        <ArrowRight className="w-4 h-4 ml-2" />
+                      </>
+                    )}
+                  </Button>
+                </form>
+              </>
+            ) : (
+              /* OTP Input */
+              <form onSubmit={handleOtpVerify} className="space-y-6">
+                <div className="flex justify-center gap-2">
+                  {otp.map((digit, index) => (
+                    <Input
+                      key={index}
+                      id={`otp-${index}`}
+                      type="text"
+                      inputMode="numeric"
+                      maxLength={1}
+                      value={digit}
+                      onChange={(e) => handleOtpChange(index, e.target.value)}
+                      onKeyDown={(e) => handleOtpKeyDown(index, e)}
+                      onPaste={(e) => handleOtpPaste(e, index)}
+                      className="w-12 h-14 text-center text-xl font-semibold border-slate-200 focus:border-blue-500 focus:ring-blue-500"
+                    />
+                  ))}
+                </div>
+                <Button
+                  type="submit"
+                  disabled={loading || otp.join("").length !== 6}
+                  className="w-full h-12 text-base font-medium bg-gradient-to-r from-blue-600 to-indigo-600 hover:from-blue-700 hover:to-indigo-700 shadow-lg shadow-blue-500/25"
+                >
+                  {loading ? (
+                    <>
+                      <Loader2 className="w-4 h-4 mr-2 animate-spin" />
+                      Verifying...
+                    </>
+                  ) : (
+                    <>
+                      Verify & Sign In
+                      <ArrowRight className="w-4 h-4 ml-2" />
+                    </>
+                  )}
+                </Button>
+                <button
+                  type="button"
+                  onClick={() => {
+                    setShowOtp(false);
+                    setOtp(["", "", "", "", "", ""]);
+                    setError("");
+                  }}
+                  className="w-full text-sm text-slate-500 hover:text-slate-700 transition-colors"
+                >
+                  ← Back to sign in options
+                </button>
+              </form>
+            )}
+            {/* Notice */}
+            <div className="mt-8 pt-6 border-t border-slate-100">
+              <div className="flex items-start gap-2 text-xs text-slate-400 mb-4">
+                <Shield className="w-4 h-4 flex-shrink-0 mt-0.5" />
+                <span>Only business email addresses are allowed</span>
+              </div>
+              <p className="text-xs text-slate-400 text-center leading-relaxed">
+                By signing in, you agree to our{" "}
+                <a href="#" className="text-blue-600 hover:underline">
+                  Terms of Service
+                </a>{" "}
+                and{" "}
+                <a href="#" className="text-blue-600 hover:underline">
+                  Privacy Policy
+                </a>
+              </p>
+            </div>
+          </div>
+          {/* Mobile Features */}
+          <div className="lg:hidden mt-8 space-y-4">
+            {features.map((feature) => (
+              <div key={feature.title} className="flex items-center gap-3 text-sm">
+                <div className={`w-8 h-8 rounded-lg ${feature.bg} flex items-center justify-center`}>
+                  <feature.icon className={`w-4 h-4 ${feature.color}`} />
+                </div>
+                <span className="text-slate-600">{feature.title}</span>
+              </div>
+            ))}
+          </div>
+        </motion.div>
+      </div>
+    </div>
+  );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/components/ocr/DocumentPreview.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 import React, { useState, useEffect, useRef } from "react";
 import { motion } from "framer-motion";
 import { FileText, ZoomIn, ZoomOut, RotateCw } from "lucide-react";
@@ -227,3 +228,234 @@ export default function DocumentPreview({ file, isProcessing, isFromHistory = fa
     </div>
   );
 }

+<<<<<<< HEAD
 import React, { useState, useEffect, useRef } from "react";
 import { motion } from "framer-motion";
 import { FileText, ZoomIn, ZoomOut, RotateCw } from "lucide-react";
     </div>
   );
 }
+=======
+import React, { useState, useEffect, useRef } from "react";
+import { motion } from "framer-motion";
+import { FileText, ZoomIn, ZoomOut, RotateCw } from "lucide-react";
+import { Button } from "@/components/ui/button";
+export default function DocumentPreview({ file, isProcessing, isFromHistory = false }) {
+  const [previewUrls, setPreviewUrls] = useState([]);
+  const [zoom, setZoom] = useState(100);
+  const [rotation, setRotation] = useState(0);
+  const objectUrlsRef = useRef([]);
+  useEffect(() => {
+    if (!file) {
+      // Cleanup previous URLs
+      objectUrlsRef.current.forEach((url) => {
+        if (url && url.startsWith("blob:")) {
+          URL.revokeObjectURL(url);
+        }
+      });
+      objectUrlsRef.current = [];
+      setPreviewUrls([]);
+      return;
+    }
+    const loadPreview = async () => {
+      const urls = [];
+      const newObjectUrls = [];
+      // Check if it's a PDF
+      if (file.type === "application/pdf" || file.name?.toLowerCase().endsWith(".pdf")) {
+        try {
+          // Use pdf.js to render PDF pages
+          const pdfjsLib = await import("pdfjs-dist");
+          // Configure worker - use jsdelivr CDN which is more reliable
+          // This will use the same version as the installed package
+          const version = pdfjsLib.version || "4.0.379";
+          pdfjsLib.GlobalWorkerOptions.workerSrc = `https://cdn.jsdelivr.net/npm/pdfjs-dist@${version}/build/pdf.worker.min.mjs`;
+          const arrayBuffer = await file.arrayBuffer();
+          const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
+          const numPages = pdf.numPages;
+          for (let pageNum = 1; pageNum <= numPages; pageNum++) {
+            const page = await pdf.getPage(pageNum);
+            const viewport = page.getViewport({ scale: 2.0 });
+            const canvas = document.createElement("canvas");
+            const context = canvas.getContext("2d");
+            canvas.height = viewport.height;
+            canvas.width = viewport.width;
+            await page.render({
+              canvasContext: context,
+              viewport: viewport,
+            }).promise;
+            urls.push(canvas.toDataURL("image/jpeg", 0.95));
+          }
+        } catch (error) {
+          console.error("Error loading PDF:", error);
+          // Fallback: show error message
+          urls.push(null);
+        }
+      } else {
+        // For images, create object URL
+        const url = URL.createObjectURL(file);
+        urls.push(url);
+        newObjectUrls.push(url);
+      }
+      // Cleanup old object URLs
+      objectUrlsRef.current.forEach((url) => {
+        if (url && url.startsWith("blob:")) {
+          URL.revokeObjectURL(url);
+        }
+      });
+      objectUrlsRef.current = newObjectUrls;
+      setPreviewUrls(urls);
+    };
+    loadPreview();
+    // Cleanup function - revoke object URLs when component unmounts or file changes
+    return () => {
+      objectUrlsRef.current.forEach((url) => {
+        if (url && url.startsWith("blob:")) {
+          URL.revokeObjectURL(url);
+        }
+      });
+      objectUrlsRef.current = [];
+    };
+  }, [file]);
+  return (
+    <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden">
+      {/* Header */}
+      <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100">
+        <div className="flex items-center gap-3">
+          <div className="h-8 w-8 rounded-lg bg-indigo-50 flex items-center justify-center">
+            <FileText className="h-4 w-4 text-indigo-600" />
+          </div>
+          <div>
+            <h3 className="font-semibold text-slate-800 text-sm">Document Preview</h3>
+            <p className="text-xs text-slate-400">{file?.name || "No file selected"}</p>
+          </div>
+        </div>
+        {file && (
+          <div className="flex items-center gap-1">
+            <Button
+              variant="ghost"
+              size="icon"
+              className="h-8 w-8 text-slate-400 hover:text-slate-600"
+              onClick={() => setZoom(Math.max(50, zoom - 25))}
+            >
+              <ZoomOut className="h-4 w-4" />
+            </Button>
+            <span className="text-xs text-slate-500 w-12 text-center">{zoom}%</span>
+            <Button
+              variant="ghost"
+              size="icon"
+              className="h-8 w-8 text-slate-400 hover:text-slate-600"
+              onClick={() => setZoom(Math.min(200, zoom + 25))}
+            >
+              <ZoomIn className="h-4 w-4" />
+            </Button>
+            <div className="w-px h-4 bg-slate-200 mx-2" />
+            <Button
+              variant="ghost"
+              size="icon"
+              className="h-8 w-8 text-slate-400 hover:text-slate-600"
+              onClick={() => setRotation((rotation + 90) % 360)}
+            >
+              <RotateCw className="h-4 w-4" />
+            </Button>
+          </div>
+        )}
+      </div>
+      {/* Preview Area */}
+      <div className="flex-1 p-6 bg-slate-50/50 overflow-auto">
+        {!file ? (
+          <div className="h-full flex items-center justify-center">
+            <div className="text-center">
+              <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
+                <FileText className="h-10 w-10 text-slate-300" />
+              </div>
+              <p className="text-slate-400 text-sm">Upload a document to preview</p>
+            </div>
+          </div>
+        ) : previewUrls.length === 0 ? (
+          <div className="h-full flex items-center justify-center">
+            <div className="text-center">
+              <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
+                <FileText className="h-10 w-10 text-slate-300" />
+              </div>
+              <p className="text-slate-400 text-sm">Loading preview...</p>
+            </div>
+          </div>
+        ) : (
+          <div className="space-y-4">
+            {previewUrls.map((url, index) => (
+              <motion.div
+                key={index}
+                initial={{ opacity: 0, y: 20 }}
+                animate={{ opacity: 1, y: 0 }}
+                transition={{ delay: index * 0.1 }}
+                className="relative bg-white rounded-xl shadow-sm border border-slate-200 overflow-hidden flex items-center justify-center"
+                style={{
+                  minHeight: "400px",
+                }}
+              >
+                {url ? (
+                  <img
+                    src={url}
+                    alt={`Page ${index + 1}`}
+                    className="w-full h-auto"
+                    style={{
+                      transform: `scale(${zoom / 100}) rotate(${rotation}deg)`,
+                      maxWidth: "100%",
+                      objectFit: "contain",
+                      transition: "transform 0.2s ease",
+                    }}
+                  />
+                ) : (
+                  <div className="p-8 text-center">
+                    <p className="text-slate-400 text-sm">
+                      {isFromHistory
+                        ? "Original document not available for historical extractions"
+                        : "Unable to load preview"}
+                    </p>
+                  </div>
+                )}
+                {/* Processing overlay */}
+                {isProcessing && (
+                  <motion.div
+                    initial={{ opacity: 0 }}
+                    animate={{ opacity: 1 }}
+                    className="absolute inset-0 bg-indigo-600/5 backdrop-blur-[1px] pointer-events-none"
+                  >
+                    <motion.div
+                      initial={{ top: 0 }}
+                      animate={{ top: "100%" }}
+                      transition={{
+                        duration: 2,
+                        repeat: Infinity,
+                        ease: "linear",
+                      }}
+                      className="absolute left-0 right-0 h-1 bg-gradient-to-r from-transparent via-indigo-500 to-transparent"
+                    />
+                  </motion.div>
+                )}
+                {/* Page number */}
+                {previewUrls.length > 1 && (
+                  <div className="absolute bottom-3 right-3 text-xs text-slate-400 bg-white/90 px-2 py-1 rounded">
+                    Page {index + 1}
+                  </div>
+                )}
+              </motion.div>
+            ))}
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/components/ocr/ExtractionOutput.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 import React, { useState, useEffect, useRef } from "react";
 import { motion, AnimatePresence } from "framer-motion";
 import {
@@ -1199,3 +1200,1206 @@ export default function ExtractionOutput({ hasFile, isProcessing, isComplete, ex
     </div>
   );
 }

+<<<<<<< HEAD
 import React, { useState, useEffect, useRef } from "react";
 import { motion, AnimatePresence } from "framer-motion";
 import {
     </div>
   );
 }
+=======
+import React, { useState, useEffect, useRef } from "react";
+import { motion, AnimatePresence } from "framer-motion";
+import {
+  Code2,
+  Copy,
+  Check,
+  Braces,
+  FileCode2,
+  FileText,
+  Sparkles,
+  ChevronDown,
+  Upload,
+} from "lucide-react";
+import { Button } from "@/components/ui/button";
+import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
+import { cn } from "@/lib/utils";
+// Helper function to convert pipe-separated tables to HTML tables
+function convertPipeTablesToHTML(text) {
+  if (!text) return text;
+  const lines = text.split('\n');
+  const result = [];
+  let i = 0;
+  while (i < lines.length) {
+    const line = lines[i];
+    // Check if this line looks like a table row (has multiple pipes)
+    if (line.includes('|') && line.split('|').length >= 3) {
+      // Check if it's a separator line (only |, -, :, spaces)
+      const isSeparator = /^[\s|\-:]+$/.test(line.trim());
+      if (!isSeparator) {
+        // Start of a table - collect all table rows
+        const tableRows = [];
+        let j = i;
+        // Collect header row
+        const headerLine = lines[j];
+        const headerCells = headerLine.split('|').map(cell => cell.trim()).filter(cell => cell || cell === '');
+        // Remove empty cells at start/end
+        if (headerCells.length > 0 && !headerCells[0]) headerCells.shift();
+        if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop();
+        if (headerCells.length >= 2) {
+          tableRows.push(headerCells);
+          j++;
+          // Skip separator line if present
+          if (j < lines.length && /^[\s|\-:]+$/.test(lines[j].trim())) {
+            j++;
+          }
+          // Collect data rows
+          while (j < lines.length) {
+            const rowLine = lines[j];
+            if (!rowLine.trim()) break; // Empty line ends table
+            // Check if it's still a table row
+            if (rowLine.includes('|') && rowLine.split('|').length >= 2) {
+              const isRowSeparator = /^[\s|\-:]+$/.test(rowLine.trim());
+              if (!isRowSeparator) {
+                const rowCells = rowLine.split('|').map(cell => cell.trim());
+                // Remove empty cells at start/end
+                if (rowCells.length > 0 && !rowCells[0]) rowCells.shift();
+                if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop();
+                tableRows.push(rowCells);
+                j++;
+              } else {
+                j++;
+              }
+            } else {
+              break; // Not a table row anymore
+            }
+          }
+          // Convert to HTML table
+          if (tableRows.length > 0) {
+            let htmlTable = '<table class="border-collapse border border-gray-300 w-full my-4">\n<thead>\n<tr>';
+            // Header row
+            tableRows[0].forEach(cell => {
+              htmlTable += `<th class="border border-gray-300 px-4 py-2 bg-gray-100 font-semibold text-left">${escapeHtml(cell)}</th>`;
+            });
+            htmlTable += '</tr>\n</thead>\n<tbody>\n';
+            // Data rows
+            for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) {
+              htmlTable += '<tr>';
+              tableRows[rowIdx].forEach((cell, colIdx) => {
+                // Use header cell count to ensure alignment
+                const cellContent = cell || '';
+                htmlTable += `<td class="border border-gray-300 px-4 py-2">${escapeHtml(cellContent)}</td>`;
+              });
+              htmlTable += '</tr>\n';
+            }
+            htmlTable += '</tbody>\n</table>';
+            result.push(htmlTable);
+            i = j;
+            continue;
+          }
+        }
+      }
+    }
+    // Not a table row, add as-is
+    result.push(line);
+    i++;
+  }
+  return result.join('\n');
+}
+// Helper function to escape HTML
+function escapeHtml(text) {
+  if (!text) return '';
+  const div = document.createElement('div');
+  div.textContent = text;
+  return div.innerHTML;
+}
+// Helper function to convert markdown/HTML text to safe HTML
+function renderMarkdownToHTML(text) {
+  if (!text) return "";
+  let html = text;
+  // FIRST: Convert pipe-separated tables to HTML tables
+  html = convertPipeTablesToHTML(html);
+  // Convert LaTeX-style superscripts/subscripts FIRST (before protecting tables)
+  // This ensures they're converted everywhere, including inside tables
+  // Convert LaTeX-style superscripts: $^{text}$ or $^text$ to <sup>text</sup>
+  html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '<sup>$1</sup>');
+  html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '<sup>$1</sup>');
+  // Convert LaTeX-style subscripts: $_{text}$ or $_text$ to <sub>text</sub>
+  html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '<sub>$1</sub>');
+  html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '<sub>$1</sub>');
+  // Split by HTML tags to preserve existing HTML (like tables)
+  // Process markdown only in non-HTML sections
+  // First, protect existing HTML blocks (tables, etc.)
+  const htmlBlocks = [];
+  let htmlBlockIndex = 0;
+  // Extract and protect HTML table blocks
+  html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => {
+    const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`;
+    htmlBlocks[htmlBlockIndex] = match;
+    htmlBlockIndex++;
+    return placeholder;
+  });
+  // Convert markdown headers (only if not inside HTML)
+  html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>');
+  html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>');
+  html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>');
+  // Convert markdown bold/italic (but not inside HTML tags)
+  html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
+  html = html.replace(/\*(.*?)\*/g, '<em>$1</em>');
+  // Convert markdown links
+  html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" target="_blank" rel="noopener noreferrer">$1</a>');
+  // Convert line breaks to paragraphs (but preserve structure around HTML blocks)
+  const parts = html.split(/(__HTML_BLOCK_\d+__)/);
+  const processedParts = parts.map((part, index) => {
+    if (part.match(/^__HTML_BLOCK_\d+__$/)) {
+      // Restore HTML block
+      const blockIndex = parseInt(part.match(/\d+/)[0]);
+      return htmlBlocks[blockIndex];
+    } else {
+      // Process markdown in this part
+      let processed = part;
+      // Convert double line breaks to paragraph breaks
+      processed = processed.replace(/\n\n+/g, '</p><p>');
+      // Convert single line breaks to <br> (but not if already in a tag)
+      processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2');
+      // Wrap in paragraph if there's content
+      if (processed.trim() && !processed.trim().startsWith('<')) {
+        processed = '<p>' + processed + '</p>';
+      }
+      return processed;
+    }
+  });
+  html = processedParts.join('');
+  // Process LaTeX notation in restored HTML blocks (tables) as well
+  // This handles any LaTeX that might be in table cells
+  html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*\^\s*\{([^}]+)\}\s*\$([^<]*)(<\/td>|<\/th>)/gi,
+    (match, openTag, before, supText, after, closeTag) => {
+      return openTag + before + '<sup>' + supText + '</sup>' + after + closeTag;
+    });
+  html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*\^\s*([^\s$<>]+)\s*\$([^<]*)(<\/td>|<\/th>)/gi,
+    (match, openTag, before, supText, after, closeTag) => {
+      return openTag + before + '<sup>' + supText + '</sup>' + after + closeTag;
+    });
+  html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*_\s*\{([^}]+)\}\s*\$([^<]*)(<\/td>|<\/th>)/gi,
+    (match, openTag, before, subText, after, closeTag) => {
+      return openTag + before + '<sub>' + subText + '</sub>' + after + closeTag;
+    });
+  html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*_\s*([^\s$<>]+)\s*\$([^<]*)(<\/td>|<\/th>)/gi,
+    (match, openTag, before, subText, after, closeTag) => {
+      return openTag + before + '<sub>' + subText + '</sub>' + after + closeTag;
+    });
+  // Clean up empty paragraphs and fix paragraph structure
+  html = html.replace(/<p><\/p>/g, '');
+  html = html.replace(/<p>\s*<br>\s*<\/p>/g, '');
+  html = html.replace(/<p>\s*<\/p>/g, '');
+  // Ensure proper spacing around HTML blocks
+  html = html.replace(/(<\/table>)\s*(<h[1-3])/g, '$1</p><p>$2');
+  html = html.replace(/(<\/h[1-3]>)\s*(<table)/g, '$1<p>$2');
+  html = html.replace(/(<\/table>)\s*(<p>)/g, '$1$2');
+  return html;
+}
+// Mock extracted data
+const mockData = {
+  document: {
+    type: "Invoice",
+    confidence: 0.98,
+  },
+  vendor: {
+    name: "Acme Corporation",
+    address: "123 Business Ave, Suite 400",
+    city: "San Francisco",
+    state: "CA",
+    zip: "94102",
+    phone: "+1 (555) 123-4567",
+  },
+  invoice: {
+    number: "INV-2024-0847",
+    date: "2024-01-15",
+    due_date: "2024-02-14",
+    po_number: "PO-9823",
+  },
+  items: [
+    { description: "Professional Services", quantity: 40, unit_price: 150.0, total: 6000.0 },
+    { description: "Software License", quantity: 5, unit_price: 299.99, total: 1499.95 },
+    { description: "Support Package", quantity: 1, unit_price: 500.0, total: 500.0 },
+  ],
+  totals: {
+    subtotal: 7999.95,
+    tax_rate: 0.0875,
+    tax_amount: 699.99,
+    total: 8699.94,
+  },
+};
+const mockXML = `<?xml version="1.0" encoding="UTF-8"?>
+<extraction>
+  <document type="Invoice" confidence="0.98"/>
+  <vendor>
+    <name>Acme Corporation</name>
+    <address>123 Business Ave, Suite 400</address>
+    <city>San Francisco</city>
+    <state>CA</state>
+    <zip>94102</zip>
+  </vendor>
+  <invoice>
+    <number>INV-2024-0847</number>
+    <date>2024-01-15</date>
+    <due_date>2024-02-14</due_date>
+  </invoice>
+  <items>
+    <item>
+      <description>Professional Services</description>
+      <quantity>40</quantity>
+      <total>6000.00</total>
+    </item>
+  </items>
+  <totals>
+    <subtotal>7999.95</subtotal>
+    <tax>699.99</tax>
+    <total>8699.94</total>
+  </totals>
+</extraction>`;
+const mockText = `INVOICE
+ACME CORPORATION
+123 Business Ave, Suite 400
+San Francisco, CA 94102
+Phone: +1 (555) 123-4567
+Invoice Number: INV-2024-0847
+Invoice Date: January 15, 2024
+Due Date: February 14, 2024
+PO Number: PO-9823
+BILL TO:
+Customer Name
+456 Client Street
+New York, NY 10001
+ITEMS:
+─────────────────────────────────────────────────────────
+Description                  Qty    Unit Price    Total
+─────────────────────────────────────────────────────────
+Professional Services         40      $150.00    $6,000.00
+Software License               5      $299.99    $1,499.95
+Support Package                1      $500.00      $500.00
+─────────────────────────────────────────────────────────
+                              Subtotal:    $7,999.95
+                              Tax (8.75%):   $699.99
+                              ─────────────────────────
+                              TOTAL:       $8,699.94
+Payment Terms: Net 30
+Thank you for your business!`;
+// Helper function to convert object to XML
+// Prepare fields for JSON/XML output - remove duplicates and restructure
+function prepareFieldsForOutput(fields, format = "json") {
+  if (!fields || typeof fields !== "object") {
+    return fields;
+  }
+  const output = { ...fields };
+  // Extract Fields from root level if it exists
+  const rootFields = output.Fields;
+  // Remove Fields from output temporarily (will be added back at top)
+  delete output.Fields;
+  // Remove full_text from top-level if pages array exists (to avoid duplication)
+  if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
+    delete output.full_text;
+    // Clean up each page: remove full_text from page.fields (it duplicates page.text)
+    output.pages = output.pages.map(page => {
+      const cleanedPage = { ...page };
+      if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
+        const cleanedFields = { ...cleanedPage.fields };
+        // Remove full_text from page fields (duplicates page.text)
+        delete cleanedFields.full_text;
+        cleanedPage.fields = cleanedFields;
+      }
+      return cleanedPage;
+    });
+  }
+  // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
+  if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
+    // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
+    const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields"));
+    output.pages.forEach((page, idx) => {
+      const pageNum = page.page_number || idx + 1;
+      const pageFields = page.fields || {};
+      // Remove duplicate fields from page.fields:
+      // 1. Remove full_text (duplicates page.text)
+      // 2. Remove fields that match top-level fields (already shown at root)
+      const cleanedPageFields = {};
+      for (const [key, value] of Object.entries(pageFields)) {
+        // Skip full_text and fields that match top-level exactly
+        if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
+          cleanedPageFields[key] = value;
+        }
+      }
+      const pageObj = {
+        text: page.text || "",
+        confidence: page.confidence || 0,
+        doc_type: page.doc_type || "other"
+      };
+      // Add table and footer_notes if they exist
+      if (page.table && Array.isArray(page.table) && page.table.length > 0) {
+        pageObj.table = page.table;
+      }
+      if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) {
+        pageObj.footer_notes = page.footer_notes;
+      }
+      // Only add fields if there are unique page-specific fields
+      if (Object.keys(cleanedPageFields).length > 0) {
+        pageObj.fields = cleanedPageFields;
+      }
+      output[`page_${pageNum}`] = pageObj;
+    });
+    // Remove pages array - we now have page_1, page_2, etc. as separate fields
+    delete output.pages;
+  }
+  // Handle page_X structure (from backend) - remove Fields from page objects if they exist
+  if (output && typeof output === "object") {
+    const pageKeys = Object.keys(output).filter(k => k.startsWith("page_"));
+    for (const pageKey of pageKeys) {
+      const pageData = output[pageKey];
+      if (pageData && typeof pageData === "object") {
+        // Remove Fields from page objects (it's now at root level)
+        delete pageData.Fields;
+        delete pageData.metadata;
+      }
+    }
+  }
+  // Rebuild output with Fields at the top (only if it exists and is not empty)
+  const finalOutput = {};
+  if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) {
+    finalOutput.Fields = rootFields;
+  }
+  // Add all other keys
+  Object.keys(output).forEach(key => {
+    finalOutput[key] = output[key];
+  });
+  return finalOutput;
+}
+function objectToXML(obj, rootName = "extraction") {
+  // Prepare fields - remove full_text if pages exist
+  const preparedObj = prepareFieldsForOutput(obj, "xml");
+  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
+  const convert = (obj, indent = "  ") => {
+    for (const [key, value] of Object.entries(obj)) {
+      if (value === null || value === undefined) continue;
+      // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
+      if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
+        continue;
+      }
+      if (Array.isArray(value)) {
+        value.forEach((item) => {
+          xml += `${indent}<${key}>\n`;
+          if (typeof item === "object") {
+            convert(item, indent + "  ");
+          } else {
+            xml += `${indent}  ${escapeXML(String(item))}\n`;
+          }
+          xml += `${indent}</${key}>\n`;
+        });
+      } else if (typeof value === "object") {
+        xml += `${indent}<${key}>\n`;
+        convert(value, indent + "  ");
+        xml += `${indent}</${key}>\n`;
+      } else {
+        xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
+      }
+    }
+  };
+  convert(preparedObj);
+  xml += `</${rootName}>`;
+  return xml;
+}
+function escapeXML(str) {
+  return str
+    .replace(/&/g, "&amp;")
+    .replace(/</g, "&lt;")
+    .replace(/>/g, "&gt;")
+    .replace(/"/g, "&quot;")
+    .replace(/'/g, "&apos;");
+}
+// Helper function to extract text from page structure
+function extractTextFromFields(fields) {
+  if (!fields || typeof fields !== "object") {
+    return "";
+  }
+  // Check for page_X structure first (preferred format)
+  const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_"));
+  if (pageKeys.length > 0) {
+    // Get text from first page (or combine all pages)
+    const pageTexts = pageKeys.map(key => {
+      const page = fields[key];
+      if (page && page.text) {
+        return page.text;
+      }
+      return "";
+    }).filter(text => text);
+    if (pageTexts.length > 0) {
+      return pageTexts.join("\n\n");
+    }
+  }
+  // Fallback to full_text
+  if (fields.full_text) {
+    return fields.full_text;
+  }
+  return "";
+}
+// Helper function to format fields as readable text
+function fieldsToText(fields) {
+  if (!fields || typeof fields !== "object") {
+    return "No data extracted.";
+  }
+  // Extract text from page structure or full_text
+  const extractedText = extractTextFromFields(fields);
+  if (extractedText) {
+    return extractedText;
+    // Don't show pages array separately if full_text already contains page markers
+    // (full_text from backend already includes "=== PAGE 1 ===" etc.)
+    const hasPageMarkers = fields.full_text.includes("=== PAGE") || fields.full_text.includes("--- Page");
+    // Only show pages array if full_text doesn't already have page breakdown
+    if (!hasPageMarkers && fields.pages && Array.isArray(fields.pages)) {
+      text += "\n\n=== TEXT BY PAGE ===\n\n";
+      fields.pages.forEach((page, idx) => {
+        text += `--- Page ${page.page_number || idx + 1} ---\n`;
+        text += page.text || "";
+        text += "\n\n";
+      });
+    }
+    // Then show other structured fields
+    const otherFields = { ...fields };
+    delete otherFields.full_text;
+    delete otherFields.pages;
+    if (Object.keys(otherFields).length > 0) {
+      text += "\n\n=== STRUCTURED FIELDS ===\n\n";
+      const formatValue = (key, value, indent = "") => {
+        if (Array.isArray(value)) {
+          text += `${indent}${key}:\n`;
+          value.forEach((item, idx) => {
+            if (typeof item === "object") {
+              text += `${indent}  Item ${idx + 1}:\n`;
+              Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + "    "));
+            } else {
+              text += `${indent}  - ${item}\n`;
+            }
+          });
+        } else if (typeof value === "object" && value !== null) {
+          text += `${indent}${key}:\n`;
+          Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + "  "));
+        } else {
+          text += `${indent}${key}: ${value}\n`;
+        }
+      };
+      Object.entries(otherFields).forEach(([key, value]) => {
+        formatValue(key, value);
+        text += "\n";
+      });
+    }
+    return text.trim();
+  }
+  // Fallback: format all fields normally
+  let text = "";
+  const formatValue = (key, value, indent = "") => {
+    if (Array.isArray(value)) {
+      text += `${indent}${key}:\n`;
+      value.forEach((item, idx) => {
+        if (typeof item === "object") {
+          text += `${indent}  Item ${idx + 1}:\n`;
+          Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + "    "));
+        } else {
+          text += `${indent}  - ${item}\n`;
+        }
+      });
+    } else if (typeof value === "object" && value !== null) {
+      text += `${indent}${key}:\n`;
+      Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + "  "));
+    } else {
+      text += `${indent}${key}: ${value}\n`;
+    }
+  };
+  Object.entries(fields).forEach(([key, value]) => {
+    formatValue(key, value);
+    text += "\n";
+  });
+  return text.trim() || "No data extracted.";
+}
+export default function ExtractionOutput({ hasFile, isProcessing, isComplete, extractionResult, onNewUpload }) {
+  const [activeTab, setActiveTab] = useState("json");
+  const [copied, setCopied] = useState(false);
+  const [statusMessage, setStatusMessage] = useState("Preparing document...");
+  // Get fields from extraction result, default to empty object
+  const fields = extractionResult?.fields || {};
+  const confidence = extractionResult?.confidence || 0;
+  const fieldsExtracted = extractionResult?.fieldsExtracted || 0;
+  const totalTime = extractionResult?.totalTime || 0;
+  // Dynamic status messages that rotate during processing
+  const statusMessages = [
+    "Preparing document...",
+    "Converting pages to images...",
+    "Visual Reasoning...",
+    "Reading text from document...",
+    "Identifying document structure...",
+    "Extracting tables and data...",
+    "Analyzing content...",
+    "Processing pages...",
+    "Organizing extracted information...",
+    "Finalizing results...",
+  ];
+  // Rotate status messages during processing
+  const messageIndexRef = useRef(0);
+  useEffect(() => {
+    if (!isProcessing) {
+      setStatusMessage("Analyzing document structure");
+      messageIndexRef.current = 0;
+      return;
+    }
+    setStatusMessage(statusMessages[0]);
+    messageIndexRef.current = 0;
+    const interval = setInterval(() => {
+      messageIndexRef.current = (messageIndexRef.current + 1) % statusMessages.length;
+      setStatusMessage(statusMessages[messageIndexRef.current]);
+    }, 2500); // Change message every 2.5 seconds
+    return () => clearInterval(interval);
+  }, [isProcessing]);
+  // Initialize expanded sections based on available fields
+  const [expandedSections, setExpandedSections] = useState(() =>
+    Object.keys(fields).slice(0, 5) // Expand first 5 sections by default
+  );
+  // Helper function to convert HTML to formatted plain text with layout preserved
+  const htmlToFormattedText = (html) => {
+    if (!html) return "";
+    // Create a temporary div to parse HTML
+    const tempDiv = document.createElement("div");
+    tempDiv.innerHTML = html;
+    let text = "";
+    // Process each element
+    const processNode = (node) => {
+      if (node.nodeType === Node.TEXT_NODE) {
+        return node.textContent;
+      }
+      if (node.nodeType !== Node.ELEMENT_NODE) {
+        return "";
+      }
+      const tagName = node.tagName?.toLowerCase();
+      const children = Array.from(node.childNodes);
+      switch (tagName) {
+        case "h1":
+          return "\n\n" + processChildren(children).trim() + "\n\n";
+        case "h2":
+          return "\n\n" + processChildren(children).trim() + "\n\n";
+        case "h3":
+          return "\n" + processChildren(children).trim() + "\n";
+        case "p":
+          return processChildren(children) + "\n\n";
+        case "br":
+          return "\n";
+        case "strong":
+        case "b":
+          return processChildren(children);
+        case "em":
+        case "i":
+          return processChildren(children);
+        case "sup":
+          return processChildren(children);
+        case "sub":
+          return processChildren(children);
+        case "table":
+          return "\n" + processTable(node) + "\n\n";
+        case "ul":
+        case "ol":
+          return "\n" + processList(node) + "\n\n";
+        case "li":
+          return "  • " + processChildren(children).trim() + "\n";
+        default:
+          return processChildren(children);
+      }
+    };
+    const processChildren = (children) => {
+      return children.map(processNode).join("");
+    };
+    const processTable = (table) => {
+      let tableText = "";
+      const rows = table.querySelectorAll("tr");
+      if (rows.length === 0) return "";
+      // First pass: calculate column widths
+      const allRows = Array.from(rows);
+      const columnCount = Math.max(...allRows.map(row => row.querySelectorAll("td, th").length));
+      const columnWidths = new Array(columnCount).fill(0);
+      allRows.forEach(row => {
+        const cells = row.querySelectorAll("td, th");
+        cells.forEach((cell, colIndex) => {
+          const cellText = processChildren(Array.from(cell.childNodes)).trim().replace(/\s+/g, " ");
+          columnWidths[colIndex] = Math.max(columnWidths[colIndex] || 0, cellText.length, 10);
+        });
+      });
+      // Second pass: format rows
+      allRows.forEach((row, rowIndex) => {
+        const cells = row.querySelectorAll("td, th");
+        const cellTexts = Array.from(cells).map(cell => {
+          let cellContent = processChildren(Array.from(cell.childNodes)).trim();
+          cellContent = cellContent.replace(/\s+/g, " ");
+          return cellContent;
+        });
+        // Pad cells to column widths
+        const paddedCells = cellTexts.map((text, i) => {
+          const width = columnWidths[i] || 10;
+          return text.padEnd(width);
+        });
+        tableText += paddedCells.join(" | ") + "\n";
+        // Add separator after header row
+        if (rowIndex === 0 && row.querySelector("th")) {
+          tableText += columnWidths.map(w => "-".repeat(w)).join("-|-") + "\n";
+        }
+      });
+      return tableText;
+    };
+    const processList = (list) => {
+      const items = list.querySelectorAll("li");
+      return Array.from(items).map(item => {
+        return "  • " + processChildren(Array.from(item.childNodes)).trim();
+      }).join("\n");
+    };
+    text = processChildren(Array.from(tempDiv.childNodes));
+    // Clean up extra newlines
+    text = text.replace(/\n{3,}/g, "\n\n");
+    text = text.trim();
+    return text;
+  };
+  const handleCopy = () => {
+    let content = "";
+    if (activeTab === "json") {
+      const preparedFields = prepareFieldsForOutput(fields, "json");
+      content = JSON.stringify(preparedFields, null, 2);
+    } else if (activeTab === "xml") {
+      content = objectToXML(fields);
+    } else {
+      // For text tab, get the formatted HTML and convert to plain text with layout
+      const textContent = extractTextFromFields(fields);
+      const htmlContent = renderMarkdownToHTML(textContent);
+      content = htmlToFormattedText(htmlContent);
+    }
+    navigator.clipboard.writeText(content);
+    setCopied(true);
+    setTimeout(() => setCopied(false), 2000);
+  };
+  // Get prepared fields for display
+  const preparedFields = React.useMemo(() => {
+    return prepareFieldsForOutput(fields, "json");
+  }, [fields]);
+  // Update expanded sections when fields change
+  React.useEffect(() => {
+    if (extractionResult?.fields) {
+      setExpandedSections(Object.keys(extractionResult.fields).slice(0, 5));
+    }
+  }, [extractionResult]);
+  const toggleSection = (section) => {
+    setExpandedSections((prev) =>
+      prev.includes(section) ? prev.filter((s) => s !== section) : [...prev, section]
+    );
+  };
+  const renderValue = (value) => {
+    if (typeof value === "number") {
+      return <span className="text-amber-600">{value}</span>;
+    }
+    if (typeof value === "string") {
+      return <span className="text-emerald-600">"{value}"</span>;
+    }
+    return String(value);
+  };
+  const renderSection = (key, value, level = 0) => {
+    const isExpanded = expandedSections.includes(key);
+    const isObject = typeof value === "object" && value !== null;
+    const isArray = Array.isArray(value);
+    if (!isObject) {
+      return (
+        <div
+          key={key}
+          className="flex items-start gap-2 py-1"
+          style={{ paddingLeft: level * 16 }}
+        >
+          <span className="text-violet-500">"{key}"</span>
+          <span className="text-slate-400">:</span>
+          {renderValue(value)}
+        </div>
+      );
+    }
+    return (
+      <div key={key}>
+        <button
+          onClick={() => toggleSection(key)}
+          className="flex items-center gap-2 py-1 hover:bg-slate-50 w-full text-left rounded"
+          style={{ paddingLeft: level * 16 }}
+        >
+          <ChevronDown
+            className={cn(
+              "h-3 w-3 text-slate-400 transition-transform",
+              !isExpanded && "-rotate-90"
+            )}
+          />
+          <span className="text-violet-500">"{key}"</span>
+          <span className="text-slate-400">:</span>
+          <span className="text-slate-400">{isArray ? "[" : "{"}</span>
+          {!isExpanded && (
+            <span className="text-slate-300 text-xs">
+              {isArray ? `${value.length} items` : `${Object.keys(value).length} fields`}
+            </span>
+          )}
+        </button>
+        <AnimatePresence>
+          {isExpanded && (
+            <motion.div
+              initial={{ height: 0, opacity: 0 }}
+              animate={{ height: "auto", opacity: 1 }}
+              exit={{ height: 0, opacity: 0 }}
+              transition={{ duration: 0.2 }}
+              className="overflow-hidden"
+            >
+              {isArray ? (
+                value.map((item, idx) => (
+                  <div key={idx} className="border-l border-slate-100 ml-4">
+                    {Object.entries(item).map(([k, v]) => renderSection(k, v, level + 2))}
+                    {idx < value.length - 1 && <div className="h-2" />}
+                  </div>
+                ))
+              ) : (
+                Object.entries(value).map(([k, v]) => renderSection(k, v, level + 1))
+              )}
+              <div style={{ paddingLeft: level * 16 }} className="text-slate-400">
+                {isArray ? "]" : "}"}
+              </div>
+            </motion.div>
+          )}
+        </AnimatePresence>
+      </div>
+    );
+  };
+  return (
+    <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden">
+      {/* Header */}
+      <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100">
+        <div className="flex items-center gap-3">
+          <div className="h-8 w-8 rounded-lg bg-emerald-50 flex items-center justify-center">
+            <Code2 className="h-4 w-4 text-emerald-600" />
+          </div>
+          <div>
+            <h3 className="font-semibold text-slate-800 text-sm">Extracted Data</h3>
+            <p className="text-xs text-slate-400">
+              {isComplete
+                ? `${fieldsExtracted} field${fieldsExtracted !== 1 ? 's' : ''} extracted`
+                : "Waiting for extraction"}
+            </p>
+          </div>
+          {isComplete && onNewUpload && (
+            <Button
+              variant="ghost"
+              size="sm"
+              onClick={onNewUpload}
+              className="h-8 ml-auto text-xs gap-1.5 text-indigo-600 hover:text-indigo-700 hover:bg-indigo-50"
+              title="Upload new document"
+            >
+              <Upload className="h-3.5 w-3.5" />
+              New
+            </Button>
+          )}
+        </div>
+        {isComplete && (
+          <div className="flex items-center gap-2">
+            <Tabs value={activeTab} onValueChange={setActiveTab}>
+              <TabsList className="h-8 bg-slate-100 p-0.5">
+                <TabsTrigger value="text" className="h-7 text-xs gap-1.5">
+                  <FileText className="h-3 w-3" />
+                  Text
+                </TabsTrigger>
+                <TabsTrigger value="json" className="h-7 text-xs gap-1.5">
+                  <Braces className="h-3 w-3" />
+                  JSON
+                </TabsTrigger>
+                <TabsTrigger value="xml" className="h-7 text-xs gap-1.5">
+                  <FileCode2 className="h-3 w-3" />
+                  XML
+                </TabsTrigger>
+              </TabsList>
+            </Tabs>
+            <Button
+              variant="ghost"
+              size="sm"
+              onClick={handleCopy}
+              className="h-8 text-xs gap-1.5"
+            >
+              {copied ? (
+                <>
+                  <Check className="h-3 w-3 text-emerald-500" />
+                  Copied
+                </>
+              ) : (
+                <>
+                  <Copy className="h-3 w-3" />
+                  Copy
+                </>
+              )}
+            </Button>
+          </div>
+        )}
+      </div>
+      {/* Output Area */}
+      <div className="flex-1 overflow-auto">
+        {!hasFile ? (
+          <div className="h-full flex items-center justify-center p-6">
+            <div className="text-center">
+              <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
+                <Code2 className="h-10 w-10 text-slate-300" />
+              </div>
+              <p className="text-slate-400 text-sm">Extracted data will appear here</p>
+            </div>
+          </div>
+        ) : isProcessing ? (
+          <div className="h-full flex items-center justify-center p-6">
+            <div className="text-center">
+              <motion.div
+                animate={{ rotate: 360 }}
+                transition={{ duration: 2, repeat: Infinity, ease: "linear" }}
+                className="h-16 w-16 mx-auto rounded-2xl bg-gradient-to-br from-indigo-100 to-violet-100 flex items-center justify-center mb-4"
+              >
+                <Sparkles className="h-8 w-8 text-indigo-500" />
+              </motion.div>
+              <p className="text-slate-700 font-medium mb-1">Extracting data...</p>
+              <p className="text-slate-400 text-sm">{statusMessage}</p>
+              <div className="mt-6 flex items-center justify-center gap-1">
+                {[0, 1, 2].map((i) => (
+                  <motion.div
+                    key={i}
+                    animate={{ scale: [1, 1.2, 1] }}
+                    transition={{
+                      duration: 0.6,
+                      repeat: Infinity,
+                      delay: i * 0.2,
+                    }}
+                    className="h-2 w-2 rounded-full bg-indigo-400"
+                  />
+                ))}
+              </div>
+            </div>
+          </div>
+        ) : isComplete && Object.keys(fields).length === 0 ? (
+          <div className="h-full flex items-center justify-center p-6">
+            <div className="text-center">
+              <div className="h-20 w-20 mx-auto rounded-2xl bg-amber-100 flex items-center justify-center mb-4">
+                <Code2 className="h-10 w-10 text-amber-600" />
+              </div>
+              <p className="text-slate-600 font-medium mb-1">No data extracted</p>
+              <p className="text-slate-400 text-sm">The document may not contain extractable fields</p>
+            </div>
+          </div>
+        ) : (
+          <div className="p-4 font-mono text-sm">
+            {activeTab === "text" ? (
+              <div
+                className="text-sm text-slate-700 leading-relaxed"
+                style={{
+                  fontFamily: 'system-ui, -apple-system, sans-serif'
+                }}
+              >
+                <div
+                  className="markdown-content"
+                  dangerouslySetInnerHTML={{ __html: renderMarkdownToHTML(fieldsToText(fields)) }}
+                  style={{
+                    lineHeight: '1.6'
+                  }}
+                />
+                <style>{`
+                  .markdown-content h1 {
+                    font-size: 1.5rem;
+                    font-weight: 700;
+                    color: #0f172a;
+                    margin-top: 1.5rem;
+                    margin-bottom: 1rem;
+                    line-height: 1.3;
+                  }
+                  .markdown-content h2 {
+                    font-size: 1.25rem;
+                    font-weight: 600;
+                    color: #0f172a;
+                    margin-top: 1.25rem;
+                    margin-bottom: 0.75rem;
+                    line-height: 1.3;
+                  }
+                  .markdown-content h3 {
+                    font-size: 1.125rem;
+                    font-weight: 600;
+                    color: #1e293b;
+                    margin-top: 1rem;
+                    margin-bottom: 0.5rem;
+                    line-height: 1.3;
+                  }
+                  .markdown-content p {
+                    margin-top: 0.75rem;
+                    margin-bottom: 0.75rem;
+                    color: #334155;
+                  }
+                  .markdown-content table {
+                    width: 100%;
+                    border-collapse: collapse;
+                    margin: 1.5rem 0;
+                    font-size: 0.875rem;
+                    box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1);
+                  }
+                  .markdown-content table caption {
+                    font-weight: 600;
+                    margin-bottom: 0.5rem;
+                    text-align: left;
+                  }
+                  .markdown-content table th {
+                    background-color: #f8fafc;
+                    border: 1px solid #cbd5e1;
+                    padding: 0.75rem;
+                    text-align: left;
+                    font-weight: 600;
+                    color: #0f172a;
+                  }
+                  .markdown-content table td {
+                    border: 1px solid #cbd5e1;
+                    padding: 0.75rem;
+                    color: #334155;
+                  }
+                  .markdown-content table tr:nth-child(even) {
+                    background-color: #f8fafc;
+                  }
+                  .markdown-content table tr:hover {
+                    background-color: #f1f5f9;
+                  }
+                  .markdown-content strong {
+                    font-weight: 600;
+                    color: #0f172a;
+                  }
+                  .markdown-content em {
+                    font-style: italic;
+                  }
+                  .markdown-content a {
+                    color: #4f46e5;
+                    text-decoration: underline;
+                  }
+                  .markdown-content a:hover {
+                    color: #4338ca;
+                  }
+                  .markdown-content sup {
+                    font-size: 0.75em;
+                    vertical-align: super;
+                    line-height: 0;
+                    position: relative;
+                    top: -0.5em;
+                  }
+                  .markdown-content sub {
+                    font-size: 0.75em;
+                    vertical-align: sub;
+                    line-height: 0;
+                    position: relative;
+                    bottom: -0.25em;
+                  }
+                  .markdown-content ul, .markdown-content ol {
+                    margin: 0.75rem 0;
+                    padding-left: 1.5rem;
+                  }
+                  .markdown-content li {
+                    margin: 0.25rem 0;
+                  }
+                `}</style>
+              </div>
+            ) : activeTab === "json" ? (
+              <div className="space-y-1">
+                <span className="text-slate-400">{"{"}</span>
+                {Object.keys(preparedFields).length > 0 ? (
+                  Object.entries(preparedFields).map(([key, value]) =>
+                    renderSection(key, value, 1)
+                  )
+                ) : (
+                  <div className="pl-4 text-slate-400 italic">No fields extracted</div>
+                )}
+                <span className="text-slate-400">{"}"}</span>
+              </div>
+            ) : (
+              <pre className="text-sm text-slate-600 whitespace-pre-wrap">
+                {objectToXML(fields).split("\n").map((line, i) => (
+                  <div key={i} className="hover:bg-slate-50 px-2 -mx-2 rounded">
+                    {line.includes("<") ? (
+                      <>
+                        {line.split(/(<\/?[\w\s=".-]+>)/g).map((part, j) => {
+                          if (part.startsWith("</")) {
+                            return (
+                              <span key={j} className="text-rose-500">
+                                {part}
+                              </span>
+                            );
+                          }
+                          if (part.startsWith("<")) {
+                            return (
+                              <span key={j} className="text-indigo-500">
+                                {part}
+                              </span>
+                            );
+                          }
+                          return (
+                            <span key={j} className="text-slate-700">
+                              {part}
+                            </span>
+                          );
+                        })}
+                      </>
+                    ) : (
+                      line
+                    )}
+                  </div>
+                ))}
+              </pre>
+            )}
+          </div>
+        )}
+      </div>
+      {/* Confidence Footer */}
+      {isComplete && extractionResult && (
+        <div className="px-5 py-3 border-t border-slate-100 bg-slate-50/50">
+          <div className="flex items-center justify-between text-xs">
+            <div className="flex items-center gap-4">
+              <div className="flex items-center gap-1.5">
+                <div className={cn(
+                  "h-2 w-2 rounded-full",
+                  confidence >= 90 ? "bg-emerald-500" : confidence >= 70 ? "bg-amber-500" : "bg-red-500"
+                )} />
+                <span className="text-slate-500">Confidence:</span>
+                <span className="font-semibold text-slate-700">
+                  {confidence > 0 ? `${confidence.toFixed(1)}%` : "N/A"}
+                </span>
+              </div>
+              <div className="flex items-center gap-1.5">
+                <span className="text-slate-500">Fields:</span>
+                <span className="font-semibold text-slate-700">{fieldsExtracted}</span>
+              </div>
+            </div>
+            <span className="text-slate-400">
+              Processed in {totalTime >= 1000 ? `${(totalTime / 1000).toFixed(1)}s` : `${totalTime}ms`}
+            </span>
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/components/ocr/ProcessingStatus.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 import React from "react";
 import { motion } from "framer-motion";
 import {
@@ -116,3 +117,123 @@ export default function ProcessingStatus({ isProcessing, isComplete, currentStag
     </motion.div>
   );
 }

+<<<<<<< HEAD
 import React from "react";
 import { motion } from "framer-motion";
 import {
     </motion.div>
   );
 }
+=======
+import React from "react";
+import { motion } from "framer-motion";
+import {
+  FileSearch,
+  Cpu,
+  TableProperties,
+  CheckCircle2,
+  Loader2,
+} from "lucide-react";
+import { cn } from "@/lib/utils";
+const steps = [
+  { id: "upload", label: "Received", icon: FileSearch },
+  { id: "analyze", label: "Analysis", icon: Cpu },
+  { id: "extract", label: "Extraction", icon: TableProperties },
+  { id: "complete", label: "Done", icon: CheckCircle2 },
+];
+export default function ProcessingStatus({ isProcessing, isComplete, currentStage }) {
+  const getCurrentStep = () => {
+    if (isComplete) return 4; // Done
+    if (!isProcessing) return 0; // Not started
+    // Use provided currentStage or default based on isProcessing
+    if (currentStage === "extraction") return 3; // Extraction
+    if (currentStage === "analysis") return 2; // Analysis
+    if (currentStage === "received") return 1; // Received
+    // Default: if processing, start at Analysis
+    return 2; // Analysis
+  };
+  const currentStep = getCurrentStep();
+  if (!isProcessing && !isComplete) return null;
+  return (
+    <motion.div
+      initial={{ opacity: 0, y: -10 }}
+      animate={{ opacity: 1, y: 0 }}
+      className="bg-white rounded-xl border border-slate-200 px-4 py-3"
+    >
+      <div className="flex items-center justify-between gap-2">
+        {steps.map((step, index) => {
+          const isActive = index + 1 === currentStep;
+          const isCompleted = index + 1 < currentStep || isComplete;
+          const Icon = step.icon;
+          return (
+            <React.Fragment key={step.id}>
+              <div className="flex items-center gap-2">
+                <motion.div
+                  initial={false}
+                  animate={{
+                    scale: (isActive && !isComplete) ? 1.05 : 1,
+                    backgroundColor: isCompleted
+                      ? "rgb(16 185 129)"
+                      : (isActive && !isComplete)
+                      ? "rgb(99 102 241)"
+                      : "rgb(241 245 249)",
+                  }}
+                  className={cn(
+                    "h-8 w-8 rounded-lg flex items-center justify-center transition-colors",
+                    (isCompleted || isActive) && "shadow-md"
+                  )}
+                  style={{
+                    boxShadow: (isActive && !isComplete)
+                      ? "0 4px 8px -2px rgba(99, 102, 241, 0.3)"
+                      : isCompleted
+                      ? "0 4px 8px -2px rgba(16, 185, 129, 0.3)"
+                      : "none",
+                  }}
+                >
+                  {(isActive && !isComplete) ? (
+                    <motion.div
+                      animate={{ rotate: 360 }}
+                      transition={{ duration: 1.5, repeat: Infinity, ease: "linear" }}
+                    >
+                      <Loader2 className="h-4 w-4 text-white" />
+                    </motion.div>
+                  ) : isCompleted ? (
+                    <CheckCircle2 className="h-4 w-4 text-white" />
+                  ) : (
+                    <Icon className={cn("h-4 w-4 text-slate-400")} />
+                  )}
+                </motion.div>
+                <span
+                  className={cn(
+                    "text-xs font-medium hidden sm:inline",
+                    isActive ? "text-indigo-600" : isCompleted ? "text-emerald-600" : "text-slate-400"
+                  )}
+                >
+                  {step.label}
+                </span>
+              </div>
+              {index < steps.length - 1 && (
+                <div className="flex-1 h-0.5 mx-1 relative overflow-hidden rounded-full bg-slate-100">
+                  <motion.div
+                    initial={{ width: 0 }}
+                    animate={{
+                      width: isCompleted ? "100%" : isActive ? "50%" : "0%",
+                    }}
+                    transition={{ duration: 0.5 }}
+                    className={cn(
+                      "absolute inset-y-0 left-0",
+                      isCompleted ? "bg-emerald-500" : "bg-indigo-500"
+                    )}
+                  />
+                </div>
+              )}
+            </React.Fragment>
+          );
+        })}
+      </div>
+    </motion.div>
+  );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/components/ocr/UpgradeModal.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 import React from "react";
 import { motion } from "framer-motion";
 import { cn } from "@/lib/utils";
@@ -211,3 +212,218 @@ export default function UpgradeModal({ open, onClose }) {
     );
 }

+<<<<<<< HEAD
 import React from "react";
 import { motion } from "framer-motion";
 import { cn } from "@/lib/utils";
     );
 }
+=======
+import React from "react";
+import { motion } from "framer-motion";
+import { cn } from "@/lib/utils";
+import {
+    X,
+    Sparkles,
+    Zap,
+    Shield,
+    Cloud,
+    BarChart3,
+    Bot,
+    Globe,
+    Lock,
+    Rocket,
+    Users,
+    CheckCircle2,
+    ArrowRight
+} from "lucide-react";
+import { Button } from "@/components/ui/button";
+const features = [
+    {
+        icon: Zap,
+        title: "Production-Scale Processing",
+        description: "Remove trial limits and run live AP and operations workflows",
+        color: "amber",
+        cta: "Explore with a demo",
+        gradient: "from-amber-500 to-orange-500"
+    },
+    {
+        icon: Bot,
+        title: "Advanced Agentic Processing",
+        description: "You can customize your own agentic pipeline with your own data",
+        color: "indigo",
+        cta: "Talk to Sales",
+        gradient: "from-indigo-500 to-violet-500"
+    },
+    {
+        icon: Cloud,
+        title: "API Access",
+        description: "Integrate EZOFIS into your workflow with our REST API",
+        color: "blue",
+        cta: "Talk to a Techie!",
+        gradient: "from-blue-500 to-cyan-500"
+    }
+];
+export default function UpgradeModal({ open, onClose }) {
+    if (!open) return null;
+    return (
+        <div className="fixed inset-0 z-50 flex items-center justify-center">
+            {/* Backdrop */}
+            <motion.div
+                initial={{ opacity: 0 }}
+                animate={{ opacity: 1 }}
+                exit={{ opacity: 0 }}
+                className="absolute inset-0 bg-black/50 backdrop-blur-sm"
+                onClick={onClose}
+            />
+            {/* Modal */}
+            <motion.div
+                initial={{ opacity: 0, scale: 0.95, y: 20 }}
+                animate={{ opacity: 1, scale: 1, y: 0 }}
+                exit={{ opacity: 0, scale: 0.95, y: 20 }}
+                className="relative z-10 w-full max-w-6xl max-h-[90vh] mx-4 bg-white rounded-2xl shadow-2xl overflow-hidden flex flex-col"
+                onClick={(e) => e.stopPropagation()}
+            >
+                {/* Header */}
+                <div className="sticky top-0 bg-gradient-to-r from-indigo-600 via-violet-600 to-purple-600 text-white px-8 py-6 z-10">
+                    <button
+                        onClick={onClose}
+                        className="absolute right-6 top-6 h-8 w-8 rounded-lg bg-white/10 hover:bg-white/20 flex items-center justify-center transition-colors"
+                    >
+                        <X className="h-4 w-4" />
+                    </button>
+                    <motion.div
+                        initial={{ opacity: 0, y: 20 }}
+                        animate={{ opacity: 1, y: 0 }}
+                        className="text-center"
+                    >
+                        <div className="inline-flex items-center gap-2 px-4 py-1.5 rounded-full bg-white/10 backdrop-blur-sm mb-4">
+                            <Sparkles className="h-4 w-4" />
+                            <span className="text-sm font-medium">Trial Limit Reached</span>
+                        </div>
+                        <h2 className="text-3xl font-bold mb-2">You've processed 2 documents</h2>
+                        <p className="text-white/80 text-lg">Continue with production-ready document intelligence</p>
+                    </motion.div>
+                </div>
+                {/* Stats Bar */}
+                <div className="grid grid-cols-3 gap-6 px-8 py-6 bg-slate-50 border-b border-slate-200">
+                    {[
+                        { label: "Accuracy Rate", value: "99.8%", icon: CheckCircle2 },
+                        { label: "Processing Speed", value: "< 10s", icon: Zap },
+                        { label: "Operational Users", value: "10,000+", icon: Users }
+                    ].map((stat, i) => (
+                        <motion.div
+                            key={stat.label}
+                            initial={{ opacity: 0, y: 20 }}
+                            animate={{ opacity: 1, y: 0 }}
+                            transition={{ delay: i * 0.1 }}
+                            className="text-center"
+                        >
+                            <div className="flex items-center justify-center gap-2 mb-1">
+                                <stat.icon className="h-4 w-4 text-indigo-600" />
+                                <span className="text-2xl font-bold text-slate-900">{stat.value}</span>
+                            </div>
+                            <p className="text-sm text-slate-500">{stat.label}</p>
+                        </motion.div>
+                    ))}
+                </div>
+                {/* Features Grid - Scrollable */}
+                <div className="flex-1 overflow-auto px-8 py-8">
+                    <div className="text-center mb-8">
+                        <h3 className="text-2xl font-bold text-slate-900 mb-2">
+                        Continue to Production Use
+                        </h3>
+                    </div>
+                    <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
+                        {features.map((feature, index) => (
+                            <motion.div
+                                key={feature.title}
+                                initial={{ opacity: 0, y: 20 }}
+                                animate={{ opacity: 1, y: 0 }}
+                                transition={{ delay: 0.2 + index * 0.1 }}
+                                className="group relative bg-white rounded-2xl border border-slate-200 p-6 hover:shadow-xl hover:shadow-slate-200/50 transition-all duration-300 hover:-translate-y-1 overflow-hidden"
+                            >
+                                {/* Gradient Background on Hover */}
+                                <div className={`absolute inset-0 bg-gradient-to-br ${feature.gradient} opacity-0 group-hover:opacity-5 transition-opacity duration-300`} />
+                                <div className="relative">
+                                    <div className={cn(
+                                        "h-12 w-12 rounded-xl flex items-center justify-center mb-4 group-hover:scale-110 transition-transform duration-300",
+                                        feature.color === "amber" && "bg-amber-50",
+                                        feature.color === "indigo" && "bg-indigo-50",
+                                        feature.color === "blue" && "bg-blue-50",
+                                        feature.color === "emerald" && "bg-emerald-50",
+                                        feature.color === "slate" && "bg-slate-50",
+                                        feature.color === "purple" && "bg-purple-50"
+                                    )}>
+                                        <feature.icon className={cn(
+                                            "h-6 w-6",
+                                            feature.color === "amber" && "text-amber-600",
+                                            feature.color === "indigo" && "text-indigo-600",
+                                            feature.color === "blue" && "text-blue-600",
+                                            feature.color === "emerald" && "text-emerald-600",
+                                            feature.color === "slate" && "text-slate-600",
+                                            feature.color === "purple" && "text-purple-600"
+                                        )} />
+                                    </div>
+                                    <h4 className="font-semibold text-slate-900 mb-2">{feature.title}</h4>
+                                    <p className="text-sm text-slate-600 mb-4 leading-relaxed">{feature.description}</p>
+                                    <Button
+                                        variant="ghost"
+                                        size="sm"
+                                        className={cn(
+                                            "w-full h-9 border transition-all group-hover:shadow-md",
+                                            feature.color === "amber" && "text-amber-600 hover:bg-amber-50 border-amber-200 hover:border-amber-300",
+                                            feature.color === "indigo" && "text-indigo-600 hover:bg-indigo-50 border-indigo-200 hover:border-indigo-300",
+                                            feature.color === "blue" && "text-blue-600 hover:bg-blue-50 border-blue-200 hover:border-blue-300",
+                                            feature.color === "emerald" && "text-emerald-600 hover:bg-emerald-50 border-emerald-200 hover:border-emerald-300",
+                                            feature.color === "slate" && "text-slate-600 hover:bg-slate-50 border-slate-200 hover:border-slate-300",
+                                            feature.color === "purple" && "text-purple-600 hover:bg-purple-50 border-purple-200 hover:border-purple-300"
+                                        )}
+                                    >
+                                        {feature.cta}
+                                        <ArrowRight className="h-3.5 w-3.5 ml-2 group-hover:translate-x-1 transition-transform" />
+                                    </Button>
+                                </div>
+                            </motion.div>
+                        ))}
+                    </div>
+                </div>
+                {/* CTA Footer */}
+                <div className="sticky bottom-0 bg-white border-t border-slate-200 px-8 py-6">
+                    <div className="flex items-center justify-between gap-6">
+                        <div className="flex-1">
+                            <h4 className="font-semibold text-slate-900 mb-1">Ready to scale?</h4>
+                            <p className="text-sm text-slate-600">No commitment. We’ll tailor the demo to your documents and workflows.</p>
+                        </div>
+                        <div className="flex items-center gap-3">
+                            <Button
+                                variant="outline"
+                                size="lg"
+                                className="h-11 border-slate-300"
+                            >
+                                <Users className="h-4 w-4 mr-2" />
+                                Talk to Sales
+                            </Button>
+                            <Button
+                                size="lg"
+                                className="h-11 bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700 shadow-lg shadow-indigo-500/25 hover:shadow-xl hover:shadow-indigo-500/30"
+                            >
+                                <Rocket className="h-4 w-4 mr-2" />
+                                Start a production evaluation
+                                <Sparkles className="h-4 w-4 ml-2" />
+                            </Button>
+                        </div>
+                    </div>
+                </div>
+            </motion.div>
+        </div>
+    );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/components/ocr/UploadZone.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 import React, { useState, useEffect } from "react";
 import { motion, AnimatePresence } from "framer-motion";
 import { Upload, FileText, Image, FileSpreadsheet, X, Sparkles, AlertCircle } from "lucide-react";
@@ -249,3 +250,256 @@ export default function UploadZone({ onFileSelect, selectedFile, onClear, keyFie
     </div>
   );
 }

+<<<<<<< HEAD
 import React, { useState, useEffect } from "react";
 import { motion, AnimatePresence } from "framer-motion";
 import { Upload, FileText, Image, FileSpreadsheet, X, Sparkles, AlertCircle } from "lucide-react";
     </div>
   );
 }
+=======
+import React, { useState, useEffect } from "react";
+import { motion, AnimatePresence } from "framer-motion";
+import { Upload, FileText, Image, FileSpreadsheet, X, Sparkles, AlertCircle } from "lucide-react";
+import { cn } from "@/lib/utils";
+import { Input } from "@/components/ui/input";
+// Allowed file types
+const ALLOWED_TYPES = [
+  "application/pdf",
+  "image/png",
+  "image/jpeg",
+  "image/jpg",
+  "image/tiff",
+  "image/tif"
+];
+// Allowed file extensions (for fallback validation)
+const ALLOWED_EXTENSIONS = [".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"];
+// Maximum file size: 4 MB
+const MAX_FILE_SIZE = 4 * 1024 * 1024; // 4 MB in bytes
+export default function UploadZone({ onFileSelect, selectedFile, onClear, keyFields = "", onKeyFieldsChange = () => {} }) {
+  const [isDragging, setIsDragging] = useState(false);
+  const [error, setError] = useState(null);
+  const validateFile = (file) => {
+    // Reset error
+    setError(null);
+    // Check file type
+    const fileExtension = "." + file.name.split(".").pop().toLowerCase();
+    const isValidType = ALLOWED_TYPES.includes(file.type) || ALLOWED_EXTENSIONS.includes(fileExtension);
+    if (!isValidType) {
+      setError("Only PDF, PNG, JPG, and TIFF files are allowed.");
+      return false;
+    }
+    // Check file size
+    if (file.size > MAX_FILE_SIZE) {
+      const fileSizeMB = (file.size / 1024 / 1024).toFixed(2);
+      setError(`File size exceeds 4 MB limit. Your file is ${fileSizeMB} MB.`);
+      return false;
+    }
+    return true;
+  };
+  const handleFileSelect = (file) => {
+    if (validateFile(file)) {
+      setError(null);
+      onFileSelect(file);
+    }
+  };
+  const handleDragOver = (e) => {
+    e.preventDefault();
+    setIsDragging(true);
+  };
+  const handleDragLeave = () => {
+    setIsDragging(false);
+  };
+  const handleDrop = (e) => {
+    e.preventDefault();
+    setIsDragging(false);
+    const file = e.dataTransfer.files[0];
+    if (file) {
+      handleFileSelect(file);
+    }
+  };
+  const getFileIcon = (type) => {
+    if (type?.includes("image")) return Image;
+    if (type?.includes("spreadsheet") || type?.includes("excel")) return FileSpreadsheet;
+    return FileText;
+  };
+  const FileIcon = selectedFile ? getFileIcon(selectedFile.type) : FileText;
+  // Clear error when file is cleared
+  useEffect(() => {
+    if (!selectedFile) {
+      setError(null);
+    }
+  }, [selectedFile]);
+  return (
+    <div className="w-full">
+      <AnimatePresence mode="wait">
+        {!selectedFile ? (
+          <motion.div
+            key="upload"
+            initial={{ opacity: 0, y: 10 }}
+            animate={{ opacity: 1, y: 0 }}
+            exit={{ opacity: 0, y: -10 }}
+            transition={{ duration: 0.2 }}
+            onDragOver={handleDragOver}
+            onDragLeave={handleDragLeave}
+            onDrop={handleDrop}
+            className={cn(
+              "relative group cursor-pointer",
+              "border-2 border-dashed rounded-2xl",
+              "transition-all duration-300 ease-out",
+              isDragging
+                ? "border-indigo-400 bg-indigo-50/50"
+                : "border-slate-200 hover:border-indigo-300 hover:bg-slate-50/50"
+            )}
+          >
+            <label className="flex flex-col items-center justify-center py-16 px-8 cursor-pointer">
+              <motion.div
+                animate={isDragging ? { scale: 1.1, y: -5 } : { scale: 1, y: 0 }}
+                className={cn(
+                  "h-16 w-16 rounded-2xl flex items-center justify-center mb-6 transition-colors duration-300",
+                  isDragging
+                    ? "bg-indigo-100"
+                    : "bg-gradient-to-br from-slate-100 to-slate-50 group-hover:from-indigo-100 group-hover:to-violet-50"
+                )}
+              >
+                <Upload
+                  className={cn(
+                    "h-7 w-7 transition-colors duration-300",
+                    isDragging ? "text-indigo-600" : "text-slate-400 group-hover:text-indigo-500"
+                  )}
+                />
+              </motion.div>
+              <div className="text-center">
+                <p className="text-lg font-semibold text-slate-700 mb-1">
+                  {isDragging ? "Drop your file here" : "Drop your file here, or browse"}
+                </p>
+                <p className="text-sm text-slate-400">
+                  Supports PDF, PNG, JPG, TIFF up to 4MB
+                </p>
+              </div>
+              <div className="flex items-center gap-2 mt-6">
+                <div className="flex -space-x-1">
+                  {[
+                    "bg-red-100 text-red-600",
+                    "bg-blue-100 text-blue-600",
+                    "bg-green-100 text-green-600",
+                    "bg-amber-100 text-amber-600",
+                  ].map((color, i) => (
+                    <div
+                      key={i}
+                      className={`h-8 w-8 rounded-lg ${color.split(" ")[0]} flex items-center justify-center border-2 border-white`}
+                    >
+                      <FileText className={`h-4 w-4 ${color.split(" ")[1]}`} />
+                    </div>
+                  ))}
+                </div>
+                <span className="text-xs text-slate-400 ml-2">Multiple formats supported</span>
+              </div>
+              <input
+                type="file"
+                className="hidden"
+                accept=".pdf,.png,.jpg,.jpeg,.tiff,.tif"
+                onChange={(e) => {
+                  const file = e.target.files[0];
+                  if (file) {
+                    handleFileSelect(file);
+                  }
+                  // Reset input so same file can be selected again after error
+                  e.target.value = "";
+                }}
+              />
+            </label>
+            {/* Decorative gradient border on hover */}
+            <div className="absolute inset-0 -z-10 rounded-2xl bg-gradient-to-r from-indigo-500 via-violet-500 to-purple-500 opacity-0 group-hover:opacity-10 blur-xl transition-opacity duration-500" />
+          </motion.div>
+        ) : (
+          <motion.div
+            key="selected"
+            initial={{ opacity: 0, scale: 0.95 }}
+            animate={{ opacity: 1, scale: 1 }}
+            exit={{ opacity: 0, scale: 0.95 }}
+            className="grid grid-cols-1 lg:grid-cols-2 gap-3"
+          >
+            {/* File Info Box */}
+            <div className="relative bg-gradient-to-br from-indigo-50 to-violet-50 rounded-xl p-3 border border-indigo-100">
+              <div className="flex items-center gap-3">
+                <div className="h-10 w-10 rounded-lg bg-white shadow-sm flex items-center justify-center flex-shrink-0">
+                  <FileIcon className="h-5 w-5 text-indigo-600" />
+                </div>
+                <div className="flex-1 min-w-0">
+                  <p className="font-medium text-slate-800 truncate text-sm">{selectedFile.name}</p>
+                  <div className="flex items-center gap-2 text-xs text-slate-500">
+                    <span>{(selectedFile.size / 1024 / 1024).toFixed(2)} MB</span>
+                    <span className="text-indigo-500">•</span>
+                    <span className="text-indigo-600 flex items-center gap-1">
+                      <Sparkles className="h-3 w-3" />
+                      Ready for extraction
+                    </span>
+                  </div>
+                </div>
+                <button
+                  onClick={onClear}
+                  className="h-8 w-8 rounded-lg bg-white hover:bg-red-50 border border-slate-200 hover:border-red-200 flex items-center justify-center text-slate-400 hover:text-red-500 transition-colors"
+                >
+                  <X className="h-4 w-4" />
+                </button>
+              </div>
+            </div>
+            {/* Key Fields Box */}
+            <div className="relative bg-white rounded-xl p-3 border border-slate-200">
+              <label className="block text-xs font-medium text-slate-600 mb-1.5">
+                <span className="font-bold">Key Fields</span> <span className="font-normal">(if required)</span>
+              </label>
+              <Input
+                type="text"
+                value={keyFields || ""}
+                onChange={(e) => {
+                  if (onKeyFieldsChange) {
+                    onKeyFieldsChange(e.target.value);
+                  }
+                }}
+                placeholder="Invoice Number, Invoice Date, PO Number, Supplier Name, Total Amount, Payment terms, Additional Notes"
+                className="h-8 text-xs border-slate-200 focus:border-indigo-300 focus:ring-indigo-200"
+              />
+            </div>
+          </motion.div>
+        )}
+      </AnimatePresence>
+      {/* Error Message */}
+      {error && (
+        <motion.div
+          initial={{ opacity: 0, y: -10 }}
+          animate={{ opacity: 1, y: 0 }}
+          exit={{ opacity: 0, y: -10 }}
+          className="mt-3 p-3 bg-red-50 border border-red-200 rounded-xl flex items-start gap-2"
+        >
+          <AlertCircle className="h-4 w-4 text-red-600 flex-shrink-0 mt-0.5" />
+          <p className="text-sm text-red-700 flex-1">{error}</p>
+          <button
+            onClick={() => setError(null)}
+            className="text-red-600 hover:text-red-800 transition-colors"
+          >
+            <X className="h-4 w-4" />
+          </button>
+        </motion.div>
+      )}
+    </div>
+  );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/components/ui/separator.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 import React from "react";
 import { cn } from "@/lib/utils";
@@ -14,3 +15,21 @@ export function Separator({ className, orientation = "horizontal", ...props }) {
   );
 }

+<<<<<<< HEAD
 import React from "react";
 import { cn } from "@/lib/utils";
   );
 }
+=======
+import React from "react";
+import { cn } from "@/lib/utils";
+export function Separator({ className, orientation = "horizontal", ...props }) {
+  return (
+    <div
+      className={cn(
+        "shrink-0 bg-slate-200",
+        orientation === "horizontal" ? "h-px w-full" : "h-full w-px",
+        className
+      )}
+      {...props}
+    />
+  );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/config/firebase.js CHANGED Viewed

@@ -1,3 +1,4 @@
 /**
  * Firebase configuration and initialization
  */
@@ -28,3 +29,35 @@ googleProvider.setCustomParameters({
 export default app;

+<<<<<<< HEAD
 /**
  * Firebase configuration and initialization
  */
 export default app;
+=======
+/**
+ * Firebase configuration and initialization
+ */
+import { initializeApp } from 'firebase/app';
+import { getAuth, GoogleAuthProvider } from 'firebase/auth';
+// Firebase configuration from environment variables
+const firebaseConfig = {
+  apiKey: import.meta.env.VITE_FIREBASE_API_KEY,
+  authDomain: import.meta.env.VITE_FIREBASE_AUTH_DOMAIN,
+  projectId: import.meta.env.VITE_FIREBASE_PROJECT_ID,
+  storageBucket: import.meta.env.VITE_FIREBASE_STORAGE_BUCKET,
+  messagingSenderId: import.meta.env.VITE_FIREBASE_MESSAGING_SENDER_ID,
+  appId: import.meta.env.VITE_FIREBASE_APP_ID,
+};
+// Initialize Firebase
+const app = initializeApp(firebaseConfig);
+// Initialize Firebase Authentication and get a reference to the service
+export const auth = getAuth(app);
+// Configure Google Auth Provider
+export const googleProvider = new GoogleAuthProvider();
+googleProvider.setCustomParameters({
+  prompt: 'select_account'
+});
+export default app;
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/contexts/AuthContext.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 import React, { createContext, useContext, useState, useEffect } from "react";
 import { signInWithPopup, signOut as firebaseSignOut } from "firebase/auth";
 import { auth, googleProvider } from "@/config/firebase";
@@ -113,3 +114,120 @@ export function useAuth() {
   return context;
 }

+<<<<<<< HEAD
 import React, { createContext, useContext, useState, useEffect } from "react";
 import { signInWithPopup, signOut as firebaseSignOut } from "firebase/auth";
 import { auth, googleProvider } from "@/config/firebase";
   return context;
 }
+=======
+import React, { createContext, useContext, useState, useEffect } from "react";
+import { signInWithPopup, signOut as firebaseSignOut } from "firebase/auth";
+import { auth, googleProvider } from "@/config/firebase";
+import { getCurrentUser, firebaseLogin, requestOTP, verifyOTP, logout as apiLogout } from "@/services/auth";
+const AuthContext = createContext(null);
+export function AuthProvider({ children }) {
+  const [user, setUser] = useState(null);
+  const [loading, setLoading] = useState(true);
+  const [token, setToken] = useState(localStorage.getItem("auth_token"));
+  useEffect(() => {
+    // Check if user is already authenticated
+    if (token) {
+      checkAuth();
+    } else {
+      setLoading(false);
+    }
+  }, [token]);
+  const checkAuth = async () => {
+    try {
+      const userData = await getCurrentUser();
+      setUser(userData);
+    } catch (error) {
+      // Token is invalid, clear it
+      localStorage.removeItem("auth_token");
+      setToken(null);
+      setUser(null);
+    } finally {
+      setLoading(false);
+    }
+  };
+  const handleFirebaseLogin = async () => {
+    try {
+      const result = await signInWithPopup(auth, googleProvider);
+      const idToken = await result.user.getIdToken();
+      const response = await firebaseLogin(idToken);
+      handleAuthCallback(response.token);
+    } catch (error) {
+      if (error.code === 'auth/popup-closed' || error.code === 'auth/cancelled-popup-request') {
+        // User closed popup or cancelled - don't show error
+        return;
+      }
+      console.error("Firebase login error:", error);
+      throw new Error(error.message || "Firebase authentication failed");
+    }
+  };
+  const handleOTPRequest = async (email) => {
+    try {
+      await requestOTP(email);
+    } catch (error) {
+      console.error("OTP request error:", error);
+      throw error;
+    }
+  };
+  const handleOTPVerify = async (email, otp) => {
+    try {
+      const response = await verifyOTP(email, otp);
+      handleAuthCallback(response.token);
+    } catch (error) {
+      console.error("OTP verify error:", error);
+      throw error;
+    }
+  };
+  const handleLogout = async () => {
+    try {
+      // Sign out from Firebase if user was using Firebase auth
+      if (auth.currentUser) {
+        await firebaseSignOut(auth);
+      }
+      await apiLogout();
+    } catch (error) {
+      console.error("Logout error:", error);
+    } finally {
+      localStorage.removeItem("auth_token");
+      setToken(null);
+      setUser(null);
+    }
+  };
+  const handleAuthCallback = (newToken) => {
+    localStorage.setItem("auth_token", newToken);
+    setToken(newToken);
+    checkAuth();
+  };
+  const value = {
+    user,
+    token,
+    loading,
+    firebaseLogin: handleFirebaseLogin,
+    requestOTP: handleOTPRequest,
+    verifyOTP: handleOTPVerify,
+    logout: handleLogout,
+    handleAuthCallback,
+    isAuthenticated: !!user,
+  };
+  return <AuthContext.Provider value={value}>{children}</AuthContext.Provider>;
+}
+export function useAuth() {
+  const context = useContext(AuthContext);
+  if (!context) {
+    throw new Error("useAuth must be used within an AuthProvider");
+  }
+  return context;
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/pages/Dashboard.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 // frontend/src/pages/Dashboard.jsx
 import React, { useState, useEffect } from "react";
@@ -474,3 +475,481 @@ export default function Dashboard() {
     </div>
   );
 }

+<<<<<<< HEAD
 // frontend/src/pages/Dashboard.jsx
 import React, { useState, useEffect } from "react";
     </div>
   );
 }
+=======
+// frontend/src/pages/Dashboard.jsx
+import React, { useState, useEffect } from "react";
+import { useSearchParams } from "react-router-dom";
+import { motion } from "framer-motion";
+import { Sparkles, Zap, FileText, TrendingUp, Clock, AlertCircle } from "lucide-react";
+import { Button } from "@/components/ui/button";
+import UploadZone from "@/components/ocr/UploadZone";
+import DocumentPreview from "@/components/ocr/DocumentPreview";
+import ExtractionOutput from "@/components/ocr/ExtractionOutput";
+import ExportButtons from "@/components/ExportButtons";
+import ProcessingStatus from "@/components/ocr/ProcessingStatus";
+import UpgradeModal from "@/components/ocr/UpgradeModal";
+import { extractDocument, getHistory, getExtractionById } from "@/services/api";
+export default function Dashboard() {
+  const [searchParams, setSearchParams] = useSearchParams();
+  const [selectedFile, setSelectedFile] = useState(null);
+  const [keyFields, setKeyFields] = useState("");
+  const [isProcessing, setIsProcessing] = useState(false);
+  const [isComplete, setIsComplete] = useState(false);
+  const [extractionResult, setExtractionResult] = useState(null);
+  const [error, setError] = useState(null);
+  const [processingStage, setProcessingStage] = useState("received"); // received, analysis, extraction, done
+  const [stats, setStats] = useState({ totalExtracted: 0, averageAccuracy: 0 });
+  const [isLoadingFromHistory, setIsLoadingFromHistory] = useState(false);
+  const [showUpgradeModal, setShowUpgradeModal] = useState(false);
+  const TRIAL_LIMIT = 2; // Maximum number of extractions allowed in trial
+  const handleFileSelect = (file) => {
+    // Check if user has reached trial limit
+    if (stats.totalExtracted >= TRIAL_LIMIT) {
+      setShowUpgradeModal(true);
+      return;
+    }
+    setSelectedFile(file);
+    setIsComplete(false);
+    setExtractionResult(null);
+    setError(null);
+  };
+  const handleClear = () => {
+    setSelectedFile(null);
+    setKeyFields("");
+    setIsProcessing(false);
+    setIsComplete(false);
+    setExtractionResult(null);
+    setError(null);
+    setProcessingStage("received");
+  };
+  // Load extraction from history if extractionId is in URL
+  useEffect(() => {
+    const extractionId = searchParams.get("extractionId");
+    console.log("Dashboard useEffect - extractionId:", extractionId, "isLoadingFromHistory:", isLoadingFromHistory, "extractionResult:", extractionResult);
+    if (extractionId && !isLoadingFromHistory) {
+      // Only load if we don't already have this extraction loaded
+      const currentExtractionId = extractionResult?.id;
+      if (currentExtractionId && currentExtractionId === parseInt(extractionId)) {
+        console.log("Extraction already loaded, skipping");
+        return;
+      }
+      const loadExtractionFromHistory = async () => {
+        setIsLoadingFromHistory(true);
+        setError(null);
+        try {
+          console.log("Loading extraction from history, ID:", extractionId);
+          const extraction = await getExtractionById(parseInt(extractionId));
+          console.log("Extraction loaded:", extraction);
+          console.log("Extraction fields:", extraction.fields);
+          console.log("Fields type:", typeof extraction.fields);
+          console.log("Fields keys:", extraction.fields ? Object.keys(extraction.fields) : "none");
+          if (!extraction) {
+            throw new Error("No extraction data received");
+          }
+          // Ensure fields is an object, not a string
+          let fieldsData = extraction.fields || {};
+          if (typeof fieldsData === 'string') {
+            try {
+              fieldsData = JSON.parse(fieldsData);
+            } catch (e) {
+              console.error("Failed to parse fields as JSON:", e);
+              fieldsData = {};
+            }
+          }
+          console.log("Processed fields:", fieldsData);
+          // Create file object from base64 if available, otherwise create empty file
+          let fileForPreview;
+          if (extraction.fileBase64) {
+            // Convert base64 to binary
+            const binaryString = atob(extraction.fileBase64);
+            const bytes = new Uint8Array(binaryString.length);
+            for (let i = 0; i < binaryString.length; i++) {
+              bytes[i] = binaryString.charCodeAt(i);
+            }
+            const fileBlob = new Blob([bytes], { type: extraction.fileType || "application/pdf" });
+            fileForPreview = new File(
+              [fileBlob],
+              extraction.fileName || "document.pdf",
+              { type: extraction.fileType || "application/pdf" }
+            );
+            console.log("Created file from base64:", fileForPreview.name, fileForPreview.size, "bytes");
+          } else {
+            // Fallback: create empty file if base64 not available
+            const fileBlob = new Blob([], { type: extraction.fileType || "application/pdf" });
+            fileForPreview = new File(
+              [fileBlob],
+              extraction.fileName || "document.pdf",
+              { type: extraction.fileType || "application/pdf" }
+            );
+            console.log("No base64 available, created empty file");
+          }
+          // Set the extraction result - match the structure from extractDocument
+          const result = {
+            id: extraction.id,
+            fields: fieldsData,
+            confidence: extraction.confidence || 0,
+            fieldsExtracted: extraction.fieldsExtracted || 0,
+            totalTime: extraction.totalTime || 0,
+            fileName: extraction.fileName,
+            fileType: extraction.fileType,
+            fileSize: extraction.fileSize,
+          };
+          console.log("Setting extraction result:", result);
+          setExtractionResult(result);
+          setSelectedFile(fileForPreview);
+          setIsComplete(true);
+          setIsProcessing(false);
+          setProcessingStage("done");
+          // Remove the extractionId from URL
+          setSearchParams({});
+        } catch (err) {
+          console.error("Failed to load extraction from history:", err);
+          const errorMessage = err.message || "Failed to load extraction from history";
+          setError(errorMessage);
+          // Don't clear the URL params on error so user can see what went wrong
+        } finally {
+          setIsLoadingFromHistory(false);
+        }
+      };
+      loadExtractionFromHistory();
+    }
+  }, [searchParams, isLoadingFromHistory, setSearchParams]);
+  // Fetch and calculate stats from history
+  useEffect(() => {
+    const fetchStats = async () => {
+      try {
+        const history = await getHistory();
+        // Calculate total extracted (only completed extractions)
+        const completedExtractions = history.filter(item => item.status === "completed");
+        const totalExtracted = completedExtractions.length;
+        // Calculate average accuracy from completed extractions
+        const accuracies = completedExtractions
+          .map(item => item.confidence || 0)
+          .filter(acc => acc > 0);
+        const averageAccuracy = accuracies.length > 0
+          ? accuracies.reduce((sum, acc) => sum + acc, 0) / accuracies.length
+          : 0;
+        setStats({
+          totalExtracted,
+          averageAccuracy: Math.round(averageAccuracy * 10) / 10 // Round to 1 decimal place
+        });
+      } catch (err) {
+        console.error("Failed to fetch stats:", err);
+        // Keep default values on error
+      }
+    };
+    // Fetch stats on mount and when extraction completes
+    fetchStats();
+  }, [isComplete]);
+  const handleExtract = async () => {
+    if (!selectedFile) return;
+    // Check if user has reached trial limit before processing
+    if (stats.totalExtracted >= TRIAL_LIMIT) {
+      setShowUpgradeModal(true);
+      return;
+    }
+    setIsProcessing(true);
+    setIsComplete(false);
+    setError(null);
+    setExtractionResult(null);
+    setProcessingStage("received");
+    // Move to Analysis stage immediately after starting
+    setTimeout(() => {
+      setProcessingStage("analysis");
+    }, 100);
+    // Move to Extraction stage after analysis phase (2.5 seconds)
+    let extractionTimer = setTimeout(() => {
+      setProcessingStage("extraction");
+    }, 2500);
+    try {
+      const result = await extractDocument(selectedFile, keyFields);
+      // Clear the extraction timer
+      clearTimeout(extractionTimer);
+      // Move to extraction stage if not already there, then to done
+      setProcessingStage("extraction");
+      // Small delay to show extraction stage, then move to done when results are rendered
+      setTimeout(() => {
+        setProcessingStage("done");
+        setExtractionResult(result);
+        setIsComplete(true);
+        setIsProcessing(false);
+      }, 500); // Give time to see extraction stage
+    } catch (err) {
+      clearTimeout(extractionTimer);
+      console.error("Extraction error:", err);
+      setError(err.message || "Failed to extract document. Please try again.");
+      setIsComplete(false);
+      setProcessingStage("received");
+      setIsProcessing(false);
+    }
+  };
+  return (
+    <div className="min-h-screen bg-[#FAFAFA]">
+      {/* Header */}
+      <header className="bg-white border-b border-slate-200/80 sticky top-0 z-40 h-16">
+        <div className="px-8 h-full flex items-center justify-between">
+          <div>
+            <h1 className="text-xl font-bold text-slate-900 tracking-tight leading-tight">
+              Multi-Lingual Document Extraction
+            </h1>
+            <p className="text-sm text-slate-500 leading-tight">
+              Upload any document and extract structured data with VRP (No LLM)
+            </p>
+          </div>
+          <div className="flex items-center gap-3">
+            {/* Stats Pills */}
+            <div className="hidden lg:flex items-center gap-2">
+              <div className="flex items-center gap-2 px-3 py-1.5 bg-slate-100 rounded-lg">
+                <FileText className="h-4 w-4 text-slate-500" />
+                <span className="text-sm font-medium text-slate-700">
+                  {stats.totalExtracted}/{TRIAL_LIMIT} Used
+                </span>
+              </div>
+              <div className="flex items-center gap-2 px-3 py-1.5 bg-emerald-50 rounded-lg">
+                <TrendingUp className="h-4 w-4 text-emerald-600" />
+                <span className="text-sm font-medium text-emerald-700">
+                  {stats.averageAccuracy > 0 ? `${stats.averageAccuracy}%` : "0%"} Accuracy
+                </span>
+              </div>
+            </div>
+            <ExportButtons isComplete={isComplete} extractionResult={extractionResult} />
+          </div>
+        </div>
+      </header>
+      {/* Main Content */}
+      <div className="p-8">
+        {/* Upload Section */}
+        <motion.div
+          initial={{ opacity: 0, y: 20 }}
+          animate={{ opacity: 1, y: 0 }}
+          className="max-w-3xl mx-auto mb-4"
+        >
+          <UploadZone
+            onFileSelect={handleFileSelect}
+            selectedFile={selectedFile}
+            onClear={handleClear}
+            keyFields={keyFields}
+            onKeyFieldsChange={setKeyFields}
+          />
+          {/* Extract Button */}
+          {selectedFile && !isProcessing && !isComplete && (
+            <motion.div
+              initial={{ opacity: 0, y: 10 }}
+              animate={{ opacity: 1, y: 0 }}
+              className="mt-4 flex justify-center"
+            >
+              <Button
+                onClick={handleExtract}
+                size="lg"
+                className="h-14 px-8 rounded-2xl font-semibold text-base bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700 shadow-xl shadow-indigo-500/25 hover:shadow-2xl hover:shadow-indigo-500/30 transition-all duration-300 hover:-translate-y-0.5"
+              >
+                <Sparkles className="h-5 w-5 mr-2" />
+                Start Extraction
+                <Zap className="h-4 w-4 ml-2 opacity-70" />
+              </Button>
+            </motion.div>
+          )}
+        </motion.div>
+        {/* Error Message */}
+        {error && (
+          <motion.div
+            initial={{ opacity: 0, y: -10 }}
+            animate={{ opacity: 1, y: 0 }}
+            className="max-w-3xl mx-auto mb-6"
+          >
+            <div className="bg-red-50 border border-red-200 rounded-2xl p-4 flex items-start gap-3">
+              <AlertCircle className="h-5 w-5 text-red-600 flex-shrink-0 mt-0.5" />
+              <div className="flex-1">
+                <h3 className="font-semibold text-red-900 mb-1">Extraction Failed</h3>
+                <p className="text-sm text-red-700">{error}</p>
+              </div>
+              <button
+                onClick={() => setError(null)}
+                className="text-red-400 hover:text-red-600 transition-colors"
+              >
+                ×
+              </button>
+            </div>
+          </motion.div>
+        )}
+        {/* Loading from History */}
+        {isLoadingFromHistory && (
+          <motion.div
+            initial={{ opacity: 0, y: -10 }}
+            animate={{ opacity: 1, y: 0 }}
+            className="max-w-3xl mx-auto mb-6"
+          >
+            <div className="bg-blue-50 border border-blue-200 rounded-2xl p-4 flex items-center gap-3">
+              <Clock className="h-5 w-5 text-blue-600 animate-spin" />
+              <div className="flex-1">
+                <h3 className="font-semibold text-blue-900 mb-1">Loading extraction...</h3>
+                <p className="text-sm text-blue-700">Retrieving extraction data from history</p>
+              </div>
+            </div>
+          </motion.div>
+        )}
+        {/* Processing Status */}
+        {(isProcessing || isComplete) && !isLoadingFromHistory && (
+          <div className="max-w-3xl mx-auto mb-4">
+            <ProcessingStatus
+              isProcessing={isProcessing}
+              isComplete={isComplete}
+              currentStage={processingStage}
+            />
+          </div>
+        )}
+        {/* Split View */}
+        {selectedFile && (
+          <motion.div
+            initial={{ opacity: 0, y: 20 }}
+            animate={{ opacity: 1, y: 0 }}
+            transition={{ delay: 0.2 }}
+            className="grid grid-cols-1 lg:grid-cols-2 gap-4"
+            style={{ height: "calc(100vh - 320px)", minHeight: "450px" }}
+          >
+            <DocumentPreview
+              file={selectedFile}
+              isProcessing={isProcessing}
+              isFromHistory={!!extractionResult?.id}
+            />
+            <ExtractionOutput
+              hasFile={!!selectedFile}
+              isProcessing={isProcessing}
+              isComplete={isComplete}
+              extractionResult={extractionResult}
+              onNewUpload={handleClear}
+            />
+          </motion.div>
+        )}
+        {/* Empty State Features */}
+        {!selectedFile && (
+          <motion.div
+            initial={{ opacity: 0 }}
+            animate={{ opacity: 1 }}
+            transition={{ delay: 0.3 }}
+            className="max-w-5xl mx-auto mt-12"
+          >
+            <div className="text-center mb-10">
+              <h2 className="text-2xl font-bold text-slate-900 mb-2">
+                Pure Agentic Document Intelligence
+              </h2>
+              <p className="text-slate-500">
+                Extract structured data from any document without LLM using VRP (Visual Resoning Processor)
+              </p>
+            </div>
+            <div className="grid grid-cols-1 md:grid-cols-3 gap-6">
+              {[
+                {
+                  icon: Zap,
+                  title: "Lightning Fast",
+                  description:
+                    "Process documents faster with our agentic pipeline",
+                  color: "amber",
+                },
+                {
+                  icon: Sparkles,
+                  title: `${stats.averageAccuracy > 0 ? stats.averageAccuracy : "99.8"}% Accuracy`,
+                  description:
+                    "Industry-leading extraction accuracy",
+                  color: "indigo",
+                },
+                {
+                  icon: Clock,
+                  title: "Any Format",
+                  description:
+                    "Support for PDF, images, and scanned documents",
+                  color: "emerald",
+                },
+              ].map((feature, index) => (
+                <motion.div
+                  key={feature.title}
+                  initial={{ opacity: 0, y: 20 }}
+                  animate={{ opacity: 1, y: 0 }}
+                  transition={{ delay: 0.4 + index * 0.1 }}
+                  className="group bg-white rounded-2xl border border-slate-200 p-6 hover:shadow-xl hover:shadow-slate-200/50 transition-all duration-300 hover:-translate-y-1"
+                >
+                  <div
+                    className={`h-12 w-12 rounded-xl bg-${feature.color}-50 flex items-center justify-center mb-4 group-hover:scale-110 transition-transform duration-300`}
+                  >
+                    <feature.icon
+                      className={`h-6 w-6 text-${feature.color}-600`}
+                    />
+                  </div>
+                  <h3 className="font-semibold text-slate-900 mb-2">
+                    {feature.title}
+                  </h3>
+                  <p className="text-sm text-slate-500 leading-relaxed">
+                    {feature.description}
+                  </p>
+                </motion.div>
+              ))}
+            </div>
+            {/* Supported Formats */}
+            <div className="mt-12 text-center">
+              <p className="text-xs text-slate-400 uppercase tracking-wider mb-4 font-medium">
+                Supported Formats
+              </p>
+              <div className="flex items-center justify-center gap-6 flex-wrap">
+                {["PDF", "PNG", "JPG", "TIFF", "JPEG"].map((format) => (
+                  <div
+                    key={format}
+                    className="flex items-center gap-2 text-slate-400"
+                  >
+                    <FileText className="h-4 w-4" />
+                    <span className="text-sm font-medium">{format}</span>
+                  </div>
+                ))}
+              </div>
+            </div>
+          </motion.div>
+        )}
+      </div>
+      {/* Upgrade Modal */}
+      <UpgradeModal open={showUpgradeModal} onClose={() => setShowUpgradeModal(false)} />
+    </div>
+  );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/pages/History.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 // frontend/src/pages/History.jsx
 import React, { useState, useEffect } from "react";
@@ -857,3 +858,864 @@ export default function History() {
     </div>
   );
 }

+<<<<<<< HEAD
 // frontend/src/pages/History.jsx
 import React, { useState, useEffect } from "react";
     </div>
   );
 }
+=======
+// frontend/src/pages/History.jsx
+import React, { useState, useEffect } from "react";
+import { useNavigate, useSearchParams } from "react-router-dom";
+import { motion, AnimatePresence } from "framer-motion";
+import {
+  FileText,
+  Clock,
+  CheckCircle2,
+  ChevronRight,
+  Download,
+  Eye,
+  Trash2,
+  Search,
+  Filter,
+  Calendar,
+  Upload,
+  Cpu,
+  TableProperties,
+  MonitorPlay,
+  TrendingUp,
+  TrendingDown,
+  Minus,
+  AlertCircle,
+  X,
+  FileSpreadsheet,
+  Table2,
+} from "lucide-react";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { Badge } from "@/components/ui/badge";
+import {
+  Select,
+  SelectContent,
+  SelectItem,
+  SelectTrigger,
+  SelectValue,
+} from "@/components/ui/select";
+import {
+  DropdownMenu,
+  DropdownMenuContent,
+  DropdownMenuItem,
+  DropdownMenuSeparator,
+  DropdownMenuTrigger,
+} from "@/components/ui/dropdown-menu";
+import { cn } from "@/lib/utils";
+import { getHistory } from "@/services/api";
+// minimal "toast"
+const toastSuccess = (msg) => {
+  console.log(msg);
+};
+const stageConfig = {
+  uploading: { label: "Uploading", icon: Upload, color: "blue" },
+  aiAnalysis: { label: "AI Analysis", icon: Cpu, color: "violet" },
+  dataExtraction: { label: "Data Extraction", icon: TableProperties, color: "emerald" },
+  outputRendering: { label: "Output Rendering", icon: MonitorPlay, color: "amber" },
+};
+const variationConfig = {
+  fast: { icon: TrendingDown, color: "text-emerald-500", label: "Faster than avg" },
+  normal: { icon: Minus, color: "text-slate-400", label: "Normal" },
+  slow: { icon: TrendingUp, color: "text-amber-500", label: "Slower than avg" },
+  error: { icon: AlertCircle, color: "text-red-500", label: "Error" },
+  skipped: { icon: Minus, color: "text-slate-300", label: "Skipped" },
+};
+export default function History() {
+  const navigate = useNavigate();
+  const [searchParams, setSearchParams] = useSearchParams();
+  const [searchQuery, setSearchQuery] = useState("");
+  const [selectedStatus, setSelectedStatus] = useState("all");
+  const [expandedReport, setExpandedReport] = useState(null);
+  const [isExporting, setIsExporting] = useState(false);
+  const [history, setHistory] = useState([]);
+  const [isLoading, setIsLoading] = useState(true);
+  const [error, setError] = useState(null);
+  // Fetch history on component mount
+  useEffect(() => {
+    const fetchHistory = async () => {
+      setIsLoading(true);
+      setError(null);
+      try {
+        const data = await getHistory();
+        setHistory(data);
+        // Check if there's an extractionId in URL (from share link)
+        const extractionId = searchParams.get("extractionId");
+        if (extractionId) {
+          // Clear the query param and navigate to dashboard
+          setSearchParams({});
+          // Small delay to ensure history is loaded
+          setTimeout(() => {
+            navigate(`/?extractionId=${extractionId}`);
+          }, 100);
+        }
+      } catch (err) {
+        console.error("Failed to fetch history:", err);
+        setError(err.message || "Failed to load history");
+        setHistory([]); // Fallback to empty array
+      } finally {
+        setIsLoading(false);
+      }
+    };
+    fetchHistory();
+  }, [searchParams, setSearchParams, navigate]);
+  const filteredHistory = history.filter((item) => {
+    const matchesSearch = item.fileName?.toLowerCase().includes(searchQuery.toLowerCase()) ?? false;
+    const matchesStatus = selectedStatus === "all" || item.status === selectedStatus;
+    return matchesSearch && matchesStatus;
+  });
+  const formatTime = (ms) => {
+    if (ms >= 1000) {
+      return `${(ms / 1000).toFixed(2)}s`;
+    }
+    return `${ms}ms`;
+  };
+  const formatTimeForExport = (ms) => {
+    return ms >= 1000 ? `${(ms / 1000).toFixed(2)}s` : `${ms}ms`;
+  };
+  const formatDate = (dateString) => {
+    const date = new Date(dateString);
+    return date.toLocaleDateString("en-US", {
+      month: "short",
+      day: "numeric",
+      hour: "2-digit",
+      minute: "2-digit",
+    });
+  };
+  const formatDateForExport = (dateString) => {
+    const date = new Date(dateString);
+    return date.toISOString().replace("T", " ").slice(0, 19);
+  };
+  const generateCSV = (data) => {
+    const headers = [
+      "File Name",
+      "File Type",
+      "File Size",
+      "Extracted At",
+      "Status",
+      "Confidence (%)",
+      "Fields Extracted",
+      "Total Time (ms)",
+      "Upload Time (ms)",
+      "Upload Status",
+      "Upload Variation",
+      "AI Analysis Time (ms)",
+      "AI Analysis Status",
+      "AI Analysis Variation",
+      "Data Extraction Time (ms)",
+      "Data Extraction Status",
+      "Data Extraction Variation",
+      "Output Rendering Time (ms)",
+      "Output Rendering Status",
+      "Output Rendering Variation",
+      "Error Message",
+    ];
+    const rows = data.map((item) => [
+      item.fileName,
+      item.fileType,
+      item.fileSize,
+      formatDateForExport(item.extractedAt),
+      item.status,
+      item.confidence,
+      item.fieldsExtracted,
+      item.totalTime,
+      item.stages.uploading.time,
+      item.stages.uploading.status,
+      item.stages.uploading.variation,
+      item.stages.aiAnalysis.time,
+      item.stages.aiAnalysis.status,
+      item.stages.aiAnalysis.variation,
+      item.stages.dataExtraction.time,
+      item.stages.dataExtraction.status,
+      item.stages.dataExtraction.variation,
+      item.stages.outputRendering.time,
+      item.stages.outputRendering.status,
+      item.stages.outputRendering.variation,
+      item.errorMessage || "",
+    ]);
+    const csvContent = [
+      headers.join(","),
+      ...rows.map((row) => row.map((cell) => `"${cell}"`).join(",")),
+    ].join("\n");
+    return csvContent;
+  };
+  const downloadFile = (content, fileName, mimeType) => {
+    const blob = new Blob([content], { type: mimeType });
+    const url = URL.createObjectURL(blob);
+    const link = document.createElement("a");
+    link.href = url;
+    link.download = fileName;
+    document.body.appendChild(link);
+    link.click();
+    document.body.removeChild(link);
+    URL.revokeObjectURL(url);
+  };
+  const handleExportCSV = () => {
+    setIsExporting(true);
+    setTimeout(() => {
+      const csvContent = generateCSV(filteredHistory);
+      downloadFile(
+        csvContent,
+        `extraction_history_${new Date().toISOString().slice(0, 10)}.csv`,
+        "text/csv;charset=utf-8;"
+      );
+      toastSuccess("CSV exported successfully");
+      setIsExporting(false);
+    }, 500);
+  };
+  const generateExcelXML = (data) => {
+    const headers = [
+      "File Name",
+      "File Type",
+      "File Size",
+      "Extracted At",
+      "Status",
+      "Confidence (%)",
+      "Fields Extracted",
+      "Total Time (ms)",
+      "Upload Time (ms)",
+      "Upload Status",
+      "Upload Variation",
+      "AI Analysis Time (ms)",
+      "AI Analysis Status",
+      "AI Analysis Variation",
+      "Data Extraction Time (ms)",
+      "Data Extraction Status",
+      "Data Extraction Variation",
+      "Output Rendering Time (ms)",
+      "Output Rendering Status",
+      "Output Rendering Variation",
+      "Error Message",
+    ];
+    const rows = data.map((item) => [
+      item.fileName,
+      item.fileType,
+      item.fileSize,
+      formatDateForExport(item.extractedAt),
+      item.status,
+      item.confidence,
+      item.fieldsExtracted,
+      item.totalTime,
+      item.stages.uploading.time,
+      item.stages.uploading.status,
+      item.stages.uploading.variation,
+      item.stages.aiAnalysis.time,
+      item.stages.aiAnalysis.status,
+      item.stages.aiAnalysis.variation,
+      item.stages.dataExtraction.time,
+      item.stages.dataExtraction.status,
+      item.stages.dataExtraction.variation,
+      item.stages.outputRendering.time,
+      item.stages.outputRendering.status,
+      item.stages.outputRendering.variation,
+      item.errorMessage || "",
+    ]);
+    let xml = `<?xml version="1.0" encoding="UTF-8"?>
+<?mso-application progid="Excel.Sheet"?>
+<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
+ xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet">
+<Worksheet ss:Name="Extraction History">
+<Table>
+<Row>`;
+    headers.forEach((header) => {
+      xml += `<Cell><Data ss:Type="String">${header}</Data></Cell>`;
+    });
+    xml += `</Row>`;
+    rows.forEach((row) => {
+      xml += `<Row>`;
+      row.forEach((cell) => {
+        const type = typeof cell === "number" ? "Number" : "String";
+        xml += `<Cell><Data ss:Type="${type}">${cell}</Data></Cell>`;
+      });
+      xml += `</Row>`;
+    });
+    xml += `</Table></Worksheet></Workbook>`;
+    return xml;
+  };
+  const handleExportExcel = () => {
+    setIsExporting(true);
+    setTimeout(() => {
+      const excelContent = generateExcelXML(filteredHistory);
+      downloadFile(
+        excelContent,
+        `extraction_history_${new Date().toISOString().slice(0, 10)}.xls`,
+        "application/vnd.ms-excel"
+      );
+      toastSuccess("Excel file exported successfully");
+      setIsExporting(false);
+    }, 500);
+  };
+  const handleExportSingleReport = (item, format) => {
+    if (format === "csv") {
+      const csvContent = generateCSV([item]);
+      downloadFile(
+        csvContent,
+        `${item.fileName.replace(/\.[^/.]+$/, "")}_report.csv`,
+        "text/csv;charset=utf-8;"
+      );
+      toastSuccess("Report exported as CSV");
+    } else {
+      const excelContent = generateExcelXML([item]);
+      downloadFile(
+        excelContent,
+        `${item.fileName.replace(/\.[^/.]+$/, "")}_report.xls`,
+        "application/vnd.ms-excel"
+      );
+      toastSuccess("Report exported as Excel");
+    }
+  };
+  return (
+    <div className="min-h-screen bg-[#FAFAFA]">
+      {/* Header */}
+      <header className="bg-white border-b border-slate-200/80 sticky top-0 z-40 h-16">
+        <div className="px-8 h-full flex items-center">
+          <div>
+            <h1 className="text-xl font-bold text-slate-900 tracking-tight leading-tight">
+              Extraction History
+            </h1>
+            <p className="text-sm text-slate-500 leading-tight">
+              View detailed reports and performance metrics for all extractions
+            </p>
+          </div>
+        </div>
+      </header>
+      {/* Content */}
+      <div className="p-8">
+        {/* Filters */}
+        <div className="flex items-center gap-4 mb-6">
+          <div className="relative flex-1 max-w-md">
+            <Search className="absolute left-3 top-1/2 -translate-y-1/2 h-4 w-4 text-slate-400" />
+            <Input
+              placeholder="Search by file name..."
+              value={searchQuery}
+              onChange={(e) => setSearchQuery(e.target.value)}
+              className="pl-10 h-11 rounded-xl border-slate-200"
+            />
+          </div>
+          <Select
+            value={selectedStatus}
+            onValueChange={(value) => setSelectedStatus(value)}
+          >
+            <SelectTrigger className="w-40 h-11 rounded-xl border-slate-200">
+              <Filter className="h-4 w-4 mr-2 text-slate-400" />
+              <SelectValue placeholder="Status" />
+            </SelectTrigger>
+            <SelectContent>
+              <SelectItem value="all">All Status</SelectItem>
+              <SelectItem value="completed">Completed</SelectItem>
+              <SelectItem value="failed">Failed</SelectItem>
+            </SelectContent>
+          </Select>
+          {/* Export All Button */}
+          <DropdownMenu>
+            <DropdownMenuTrigger asChild>
+              <Button
+                className="h-11 px-4 rounded-xl bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700 shadow-lg shadow-indigo-500/25"
+                disabled={isExporting || filteredHistory.length === 0}
+              >
+                {isExporting ? (
+                  <motion.div
+                    animate={{ rotate: 360 }}
+                    transition={{
+                      duration: 1,
+                      repeat: Infinity,
+                      ease: "linear",
+                    }}
+                    className="mr-2"
+                  >
+                    <Download className="h-4 w-4" />
+                  </motion.div>
+                ) : (
+                  <Download className="h-4 w-4 mr-2" />
+                )}
+                Export All
+              </Button>
+            </DropdownMenuTrigger>
+            <DropdownMenuContent
+              align="end"
+              className="w-48 rounded-xl p-2"
+            >
+              <DropdownMenuItem
+                className="rounded-lg cursor-pointer"
+                onClick={handleExportCSV}
+              >
+                <Table2 className="h-4 w-4 mr-2 text-emerald-600" />
+                Export as CSV
+              </DropdownMenuItem>
+              <DropdownMenuItem
+                className="rounded-lg cursor-pointer"
+                onClick={handleExportExcel}
+              >
+                <FileSpreadsheet className="h-4 w-4 mr-2 text-green-600" />
+                Export as Excel
+              </DropdownMenuItem>
+              <DropdownMenuSeparator />
+              <div className="px-2 py-1.5 text-xs text-slate-500">
+                {filteredHistory.length} records will be exported
+              </div>
+            </DropdownMenuContent>
+          </DropdownMenu>
+        </div>
+        {/* Stats Overview */}
+        <div className="grid grid-cols-1 md:grid-cols-4 gap-4 mb-8">
+          {(() => {
+            const total = history.length;
+            const completed = history.filter((h) => h.status === "completed").length;
+            const successRate = total > 0 ? ((completed / total) * 100).toFixed(1) : 0;
+            const avgTime = history.length > 0
+              ? history.reduce((sum, h) => sum + (h.totalTime || 0), 0) / history.length
+              : 0;
+            const totalFields = history.reduce((sum, h) => sum + (h.fieldsExtracted || 0), 0);
+            return [
+              {
+                label: "Total Extractions",
+                value: total.toString(),
+                change: "",
+                color: "indigo",
+              },
+              {
+                label: "Success Rate",
+                value: `${successRate}%`,
+                change: total > 0 ? `${completed}/${total} successful` : "No data",
+                color: "emerald",
+              },
+              {
+                label: "Avg. Processing Time",
+                value: avgTime >= 1000 ? `${(avgTime / 1000).toFixed(1)}s` : `${Math.round(avgTime)}ms`,
+                change: "",
+                color: "violet",
+              },
+              {
+                label: "Fields Extracted",
+                value: totalFields.toLocaleString(),
+                change: "",
+                color: "amber",
+              },
+            ].map((stat, index) => (
+            <motion.div
+              key={stat.label}
+              initial={{ opacity: 0, y: 20 }}
+              animate={{ opacity: 1, y: 0 }}
+              transition={{ delay: index * 0.1 }}
+              className="bg-white rounded-2xl border border-slate-200 p-5"
+            >
+              <p className="text-sm text-slate-500 mb-1">{stat.label}</p>
+              <p className="text-2xl font-bold text-slate-900">{stat.value}</p>
+              <p className={`text-xs text-${stat.color}-600 mt-1`}>
+                {stat.change}
+              </p>
+            </motion.div>
+            ));
+          })()}
+        </div>
+        {/* Loading State */}
+        {isLoading && (
+          <div className="text-center py-16">
+            <motion.div
+              animate={{ rotate: 360 }}
+              transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
+              className="h-16 w-16 mx-auto rounded-2xl bg-indigo-100 flex items-center justify-center mb-4"
+            >
+              <Cpu className="h-8 w-8 text-indigo-600" />
+            </motion.div>
+            <p className="text-slate-500">Loading extraction history...</p>
+          </div>
+        )}
+        {/* History List */}
+        {!isLoading && (
+          <div className="space-y-4">
+          {filteredHistory.map((item, index) => (
+            <motion.div
+              key={item.id}
+              initial={{ opacity: 0, y: 20 }}
+              animate={{ opacity: 1, y: 0 }}
+              transition={{ delay: index * 0.05 }}
+              className="bg-white rounded-2xl border border-slate-200 overflow-hidden"
+            >
+              {/* Main Row */}
+              <div
+                className="p-5 cursor-pointer hover:bg-slate-50/50 transition-colors"
+                onClick={() =>
+                  setExpandedReport(
+                    expandedReport === item.id ? null : item.id
+                  )
+                }
+              >
+                <div className="flex items-center gap-4">
+                  {/* File Icon */}
+                  <div
+                    className={cn(
+                      "h-12 w-12 rounded-xl flex items-center justify-center",
+                      item.status === "completed" ? "bg-indigo-50" : "bg-red-50"
+                    )}
+                  >
+                    <FileText
+                      className={cn(
+                        "h-6 w-6",
+                        item.status === "completed"
+                          ? "text-indigo-600"
+                          : "text-red-500"
+                      )}
+                    />
+                  </div>
+                  {/* File Info */}
+                  <div className="flex-1 min-w-0">
+                    <div className="flex items-center gap-2">
+                      <h3 className="font-semibold text-slate-900 truncate">
+                        {item.fileName}
+                      </h3>
+                      <Badge variant="secondary" className="text-xs">
+                        {item.fileType}
+                      </Badge>
+                    </div>
+                    <div className="flex items-center gap-4 mt-1 text-sm text-slate-500">
+                      <span>{item.fileSize}</span>
+                      <span className="flex items-center gap-1">
+                        <Calendar className="h-3 w-3" />
+                        {formatDate(item.extractedAt)}
+                      </span>
+                    </div>
+                  </div>
+                  {/* Stats */}
+                  <div className="hidden md:flex items-center gap-6">
+                    <div className="text-center">
+                      <p className="text-xs text-slate-400">Time</p>
+                      <p className="font-semibold text-slate-700">
+                        {formatTime(item.totalTime)}
+                      </p>
+                    </div>
+                    <div className="text-center">
+                      <p className="text-xs text-slate-400">Fields</p>
+                      <p className="font-semibold text-slate-700">
+                        {item.fieldsExtracted}
+                      </p>
+                    </div>
+                    <div className="text-center">
+                      <p className="text-xs text-slate-400">Confidence</p>
+                      <p
+                        className={cn(
+                          "font-semibold",
+                          item.confidence >= 95
+                            ? "text-emerald-600"
+                            : item.confidence >= 90
+                            ? "text-amber-600"
+                            : "text-red-600"
+                        )}
+                      >
+                        {item.confidence > 0 ? `${item.confidence}%` : "-"}
+                      </p>
+                    </div>
+                  </div>
+                  {/* Status & Actions */}
+                  <div className="flex items-center gap-3">
+                    <Badge
+                      className={cn(
+                        "capitalize",
+                        item.status === "completed"
+                          ? "bg-emerald-50 text-emerald-700 border-emerald-200"
+                          : "bg-red-50 text-red-700 border-red-200"
+                      )}
+                    >
+                      {item.status === "completed" ? (
+                        <CheckCircle2 className="h-3 w-3 mr-1" />
+                      ) : (
+                        <AlertCircle className="h-3 w-3 mr-1" />
+                      )}
+                      {item.status}
+                    </Badge>
+                    <ChevronRight
+                      className={cn(
+                        "h-5 w-5 text-slate-400 transition-transform",
+                        expandedReport === item.id && "rotate-90"
+                      )}
+                    />
+                  </div>
+                </div>
+              </div>
+              {/* Expanded Report */}
+              <AnimatePresence>
+                {expandedReport === item.id && (
+                  <motion.div
+                    initial={{ height: 0, opacity: 0 }}
+                    animate={{ height: "auto", opacity: 1 }}
+                    exit={{ height: 0, opacity: 0 }}
+                    transition={{ duration: 0.2 }}
+                    className="overflow-hidden"
+                  >
+                    <div className="px-5 pb-5 pt-2 border-t border-slate-100">
+                      {/* Error Message */}
+                      {item.errorMessage && (
+                        <div className="mb-4 p-4 bg-red-50 border border-red-100 rounded-xl">
+                          <div className="flex items-center gap-2 text-red-700">
+                            <AlertCircle className="h-4 w-4" />
+                            <span className="font-medium">Error Details</span>
+                          </div>
+                          <p className="text-sm text-red-600 mt-1">
+                            {item.errorMessage}
+                          </p>
+                        </div>
+                      )}
+                      {/* Performance Report Header */}
+                      <div className="flex items-center justify-between mb-4">
+                        <h4 className="font-semibold text-slate-800">
+                          Performance Report
+                        </h4>
+                        <div className="flex items-center gap-2">
+                          <Button
+                            variant="ghost"
+                            size="sm"
+                            className="h-8 text-xs"
+                            onClick={(e) => {
+                              e.stopPropagation();
+                              navigate(`/?extractionId=${item.id}`);
+                            }}
+                          >
+                            <Eye className="h-3 w-3 mr-1" />
+                            View Output
+                          </Button>
+                          <DropdownMenu>
+                            <DropdownMenuTrigger asChild>
+                              <Button
+                                variant="outline"
+                                size="sm"
+                                className="h-8 text-xs"
+                              >
+                                <Download className="h-3 w-3 mr-1" />
+                                Export Report
+                              </Button>
+                            </DropdownMenuTrigger>
+                            <DropdownMenuContent
+                              align="end"
+                              className="w-44 rounded-xl p-2"
+                            >
+                              <DropdownMenuItem
+                                className="rounded-lg cursor-pointer text-xs"
+                                onClick={(e) => {
+                                  e.stopPropagation();
+                                  handleExportSingleReport(item, "csv");
+                                }}
+                              >
+                                <Table2 className="h-3 w-3 mr-2 text-emerald-600" />
+                                Download CSV
+                              </DropdownMenuItem>
+                              <DropdownMenuItem
+                                className="rounded-lg cursor-pointer text-xs"
+                                onClick={(e) => {
+                                  e.stopPropagation();
+                                  handleExportSingleReport(item, "excel");
+                                }}
+                              >
+                                <FileSpreadsheet className="h-3 w-3 mr-2 text-green-600" />
+                                Download Excel
+                              </DropdownMenuItem>
+                            </DropdownMenuContent>
+                          </DropdownMenu>
+                        </div>
+                      </div>
+                      {/* Stage Timing Cards */}
+                      <div className="grid grid-cols-1 md:grid-cols-4 gap-4">
+                        {Object.entries(item.stages).map(
+                          ([stageKey, stageData]) => {
+                            const config = stageConfig[stageKey];
+                            const variationInfo =
+                              variationConfig[stageData.variation];
+                            const Icon = config.icon;
+                            const VariationIcon = variationInfo.icon;
+                            return (
+                              <div
+                                key={stageKey}
+                                className={cn(
+                                  "relative p-4 rounded-xl border",
+                                  stageData.status === "completed"
+                                    ? "bg-slate-50 border-slate-200"
+                                    : stageData.status === "failed"
+                                    ? "bg-red-50 border-red-200"
+                                    : "bg-slate-50/50 border-slate-100"
+                                )}
+                              >
+                                <div className="flex items-center gap-2 mb-3">
+                                  <div
+                                    className={cn(
+                                      "h-8 w-8 rounded-lg flex items-center justify-center",
+                                      `bg-${config.color}-100`
+                                    )}
+                                  >
+                                    <Icon
+                                      className={cn(
+                                        "h-4 w-4",
+                                        `text-${config.color}-600`
+                                      )}
+                                    />
+                                  </div>
+                                  <span className="text-sm font-medium text-slate-700">
+                                    {config.label}
+                                  </span>
+                                </div>
+                                <div className="flex items-end justify-between">
+                                  <div>
+                                    <p
+                                      className={cn(
+                                        "text-2xl font-bold",
+                                        stageData.status === "skipped"
+                                          ? "text-slate-300"
+                                          : stageData.status === "failed"
+                                          ? "text-red-600"
+                                          : "text-slate-900"
+                                      )}
+                                    >
+                                      {stageData.status === "skipped"
+                                        ? "-"
+                                        : formatTime(stageData.time)}
+                                    </p>
+                                    {stageData.status !== "skipped" && (
+                                      <div className="flex items-center gap-1 mt-1">
+                                        <VariationIcon
+                                          className={cn(
+                                            "h-3 w-3",
+                                            variationInfo.color
+                                          )}
+                                        />
+                                        <span
+                                          className={cn(
+                                            "text-xs",
+                                            variationInfo.color
+                                          )}
+                                        >
+                                          {variationInfo.label}
+                                        </span>
+                                      </div>
+                                    )}
+                                  </div>
+                                  {stageData.status === "completed" && (
+                                    <CheckCircle2 className="h-5 w-5 text-emerald-500" />
+                                  )}
+                                  {stageData.status === "failed" && (
+                                    <X className="h-5 w-5 text-red-500" />
+                                  )}
+                                </div>
+                                {/* Progress bar */}
+                                <div className="mt-3 h-1.5 bg-slate-200 rounded-full overflow-hidden">
+                                  <motion.div
+                                    initial={{ width: 0 }}
+                                    animate={{
+                                      width:
+                                        stageData.status === "completed"
+                                          ? "100%"
+                                          : stageData.status === "failed"
+                                          ? "60%"
+                                          : "0%",
+                                    }}
+                                    transition={{ duration: 0.5, delay: 0.2 }}
+                                    className={cn(
+                                      "h-full rounded-full",
+                                      stageData.status === "failed"
+                                        ? "bg-red-500"
+                                        : `bg-${config.color}-500`
+                                    )}
+                                  />
+                                </div>
+                              </div>
+                            );
+                          }
+                        )}
+                      </div>
+                      {/* Total Time Summary */}
+                      <div className="mt-4 flex items-center justify-between p-4 bg-gradient-to-r from-indigo-50 to-violet-50 rounded-xl border border-indigo-100">
+                        <div className="flex items-center gap-3">
+                          <Clock className="h-5 w-5 text-indigo-600" />
+                          <div>
+                            <p className="text-sm font-medium text-slate-700">
+                              Total Processing Time
+                            </p>
+                            <p className="text-xs text-slate-500">
+                              From upload to output ready
+                            </p>
+                          </div>
+                        </div>
+                        <div className="text-right">
+                          <p className="text-2xl font-bold text-indigo-600">
+                            {formatTime(item.totalTime)}
+                          </p>
+                          <p className="text-xs text-slate-500">
+                            {item.status === "completed"
+                              ? "Completed successfully"
+                              : "Process failed"}
+                          </p>
+                        </div>
+                      </div>
+                    </div>
+                  </motion.div>
+                )}
+              </AnimatePresence>
+            </motion.div>
+          ))}
+          {filteredHistory.length === 0 && !error && (
+            <div className="text-center py-16">
+              <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
+                <FileText className="h-10 w-10 text-slate-300" />
+              </div>
+              <p className="text-slate-500 mb-2">
+                {history.length === 0
+                  ? "No extraction history yet"
+                  : "No extractions match your filters"}
+              </p>
+              {history.length === 0 && (
+                <p className="text-sm text-slate-400">
+                  Upload a document to get started
+                </p>
+              )}
+            </div>
+          )}
+        </div>
+        )}
+      </div>
+    </div>
+  );
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/pages/ShareHandler.jsx CHANGED Viewed

@@ -1,3 +1,4 @@
 import React, { useEffect, useState } from "react";
 import { useParams, useNavigate } from "react-router-dom";
 import { useAuth } from "@/contexts/AuthContext";
@@ -93,3 +94,100 @@ export default function ShareHandler() {
   return null;
 }

+<<<<<<< HEAD
 import React, { useEffect, useState } from "react";
 import { useParams, useNavigate } from "react-router-dom";
 import { useAuth } from "@/contexts/AuthContext";
   return null;
 }
+=======
+import React, { useEffect, useState } from "react";
+import { useParams, useNavigate } from "react-router-dom";
+import { useAuth } from "@/contexts/AuthContext";
+import { accessSharedExtraction } from "@/services/api";
+import LoginForm from "@/components/auth/LoginForm";
+export default function ShareHandler() {
+  const { token } = useParams();
+  const navigate = useNavigate();
+  const { isAuthenticated, loading } = useAuth();
+  const [isProcessing, setIsProcessing] = useState(false);
+  const [error, setError] = useState(null);
+  useEffect(() => {
+    const processShare = async () => {
+      if (loading) return; // Wait for auth to load
+      if (!isAuthenticated) {
+        // User not logged in - they'll be shown login form
+        // After login, AuthContext will trigger a re-render and this will run again
+        return;
+      }
+      // User is authenticated, process the share
+      if (isProcessing) return; // Prevent duplicate calls
+      setIsProcessing(true);
+      setError(null);
+      try {
+        const result = await accessSharedExtraction(token);
+        if (result.success && result.extraction_id) {
+          // Redirect to history page with the extraction ID
+          navigate(`/history?extractionId=${result.extraction_id}`);
+        } else {
+          setError("Failed to access shared extraction");
+        }
+      } catch (err) {
+        console.error("Share access error:", err);
+        setError(err.message || "Failed to access shared extraction");
+        // Still redirect to history after 3 seconds
+        setTimeout(() => {
+          navigate("/history");
+        }, 3000);
+      } finally {
+        setIsProcessing(false);
+      }
+    };
+    processShare();
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [token, isAuthenticated, loading]);
+  // Show login form if not authenticated
+  if (!isAuthenticated && !loading) {
+    return <LoginForm />;
+  }
+  // Show loading state while processing
+  if (isProcessing || loading) {
+    return (
+      <div className="min-h-screen flex items-center justify-center bg-[#FAFAFA]">
+        <div className="text-center">
+          <div className="h-16 w-16 mx-auto rounded-2xl bg-indigo-100 flex items-center justify-center mb-4 animate-pulse">
+            <div className="h-8 w-8 rounded-lg bg-indigo-600"></div>
+          </div>
+          <p className="text-slate-600">Loading shared extraction...</p>
+        </div>
+      </div>
+    );
+  }
+  // Show error state
+  if (error) {
+    return (
+      <div className="min-h-screen flex items-center justify-center bg-[#FAFAFA]">
+        <div className="text-center max-w-md mx-4">
+          <div className="h-16 w-16 mx-auto rounded-2xl bg-red-100 flex items-center justify-center mb-4">
+            <div className="h-8 w-8 rounded-lg bg-red-600"></div>
+          </div>
+          <h2 className="text-xl font-semibold text-slate-900 mb-2">Error</h2>
+          <p className="text-slate-600 mb-4">{error}</p>
+          <button
+            onClick={() => navigate("/history")}
+            className="px-4 py-2 bg-indigo-600 text-white rounded-lg hover:bg-indigo-700"
+          >
+            Go to History
+          </button>
+        </div>
+      </div>
+    );
+  }
+  return null;
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/services/api.js CHANGED Viewed

@@ -1,3 +1,4 @@
 /**
  * API service for communicating with the FastAPI backend
  */
@@ -171,3 +172,178 @@ export async function ping() {
   return await response.json();
 }

+<<<<<<< HEAD
 /**
  * API service for communicating with the FastAPI backend
  */
   return await response.json();
 }
+=======
+/**
+ * API service for communicating with the FastAPI backend
+ */
+const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || "";
+/**
+ * Get authorization headers with token
+ */
+function getAuthHeaders() {
+  const token = localStorage.getItem("auth_token");
+  return token ? { Authorization: `Bearer ${token}` } : {};
+}
+/**
+ * Extract data from a document
+ * @param {File} file - The file to extract data from
+ * @param {string} keyFields - Optional comma-separated list of fields to extract
+ * @returns {Promise<Object>} Extraction result with fields, confidence, etc.
+ */
+export async function extractDocument(file, keyFields = "") {
+  const formData = new FormData();
+  formData.append("file", file);
+  if (keyFields && keyFields.trim()) {
+    formData.append("key_fields", keyFields.trim());
+  }
+  const response = await fetch(`${API_BASE_URL}/api/extract`, {
+    method: "POST",
+    headers: getAuthHeaders(),
+    body: formData,
+  });
+  if (!response.ok) {
+    const errorData = await response.json().catch(() => ({
+      error: `HTTP ${response.status}: ${response.statusText}`,
+    }));
+    throw new Error(errorData.error || errorData.detail || "Extraction failed");
+  }
+  return await response.json();
+}
+/**
+ * Get extraction history
+ * @returns {Promise<Array>} Array of extraction records
+ */
+export async function getHistory() {
+  const response = await fetch(`${API_BASE_URL}/api/history`, {
+    headers: getAuthHeaders(),
+  });
+  if (!response.ok) {
+    const errorData = await response.json().catch(() => ({
+      error: `HTTP ${response.status}: ${response.statusText}`,
+    }));
+    throw new Error(errorData.error || errorData.detail || "Failed to fetch history");
+  }
+  return await response.json();
+}
+/**
+ * Get a specific extraction by ID with full fields data
+ * @param {number} extractionId - The extraction ID
+ * @returns {Promise<Object>} Extraction result with fields
+ */
+export async function getExtractionById(extractionId) {
+  const response = await fetch(`${API_BASE_URL}/api/extraction/${extractionId}`, {
+    headers: getAuthHeaders(),
+  });
+  if (!response.ok) {
+    const errorData = await response.json().catch(() => ({
+      error: `HTTP ${response.status}: ${response.statusText}`,
+    }));
+    throw new Error(errorData.error || errorData.detail || "Failed to fetch extraction");
+  }
+  return await response.json();
+}
+/**
+ * Create a shareable link for an extraction
+ * @param {number} extractionId - The extraction ID to share
+ * @returns {Promise<Object>} Share link result with share_link
+ */
+export async function createShareLink(extractionId) {
+  const response = await fetch(`${API_BASE_URL}/api/share/link`, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      ...getAuthHeaders(),
+    },
+    body: JSON.stringify({
+      extraction_id: extractionId,
+    }),
+  });
+  if (!response.ok) {
+    const errorData = await response.json().catch(() => ({
+      error: `HTTP ${response.status}: ${response.statusText}`,
+    }));
+    throw new Error(errorData.error || errorData.detail || "Failed to create share link");
+  }
+  return await response.json();
+}
+/**
+ * Share an extraction with another user(s)
+ * @param {number} extractionId - The extraction ID to share
+ * @param {string|string[]} recipientEmails - Recipient email address(es) - can be a single email or array of emails
+ * @returns {Promise<Object>} Share result
+ */
+export async function shareExtraction(extractionId, recipientEmails) {
+  // Ensure recipient_emails is always an array
+  const emailsArray = Array.isArray(recipientEmails) ? recipientEmails : [recipientEmails];
+  const response = await fetch(`${API_BASE_URL}/api/share`, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      ...getAuthHeaders(),
+    },
+    body: JSON.stringify({
+      extraction_id: extractionId,
+      recipient_emails: emailsArray,
+    }),
+  });
+  if (!response.ok) {
+    const errorData = await response.json().catch(() => ({
+      error: `HTTP ${response.status}: ${response.statusText}`,
+    }));
+    throw new Error(errorData.error || errorData.detail || "Failed to share extraction");
+  }
+  return await response.json();
+}
+/**
+ * Access a shared extraction by token
+ * @param {string} token - Share token
+ * @returns {Promise<Object>} Share access result with extraction_id
+ */
+export async function accessSharedExtraction(token) {
+  const response = await fetch(`${API_BASE_URL}/api/share/${token}`, {
+    headers: getAuthHeaders(),
+  });
+  if (!response.ok) {
+    const errorData = await response.json().catch(() => ({
+      error: `HTTP ${response.status}: ${response.statusText}`,
+    }));
+    throw new Error(errorData.error || errorData.detail || "Failed to access shared extraction");
+  }
+  return await response.json();
+}
+/**
+ * Health check endpoint
+ * @returns {Promise<Object>} Status object
+ */
+export async function ping() {
+  const response = await fetch(`${API_BASE_URL}/ping`);
+  if (!response.ok) {
+    throw new Error("Backend is not available");
+  }
+  return await response.json();
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

frontend/src/services/auth.js CHANGED Viewed

@@ -1,3 +1,4 @@
 /**
  * Authentication service for Firebase and OTP authentication
  */
@@ -109,3 +110,116 @@ export async function logout() {
   return Promise.resolve();
 }

+<<<<<<< HEAD
 /**
  * Authentication service for Firebase and OTP authentication
  */
   return Promise.resolve();
 }
+=======
+/**
+ * Authentication service for Firebase and OTP authentication
+ */
+const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || "";
+/**
+ * Get the current authenticated user
+ * @returns {Promise<Object>} User object
+ */
+export async function getCurrentUser() {
+  const token = localStorage.getItem("auth_token");
+  if (!token) {
+    throw new Error("No token found");
+  }
+  const response = await fetch(`${API_BASE_URL}/api/auth/me`, {
+    method: "GET",
+    headers: {
+      Authorization: `Bearer ${token}`,
+    },
+  });
+  if (!response.ok) {
+    if (response.status === 401) {
+      localStorage.removeItem("auth_token");
+    }
+    const errorData = await response.json().catch(() => ({}));
+    throw new Error(errorData.detail || "Failed to get user");
+  }
+  return await response.json();
+}
+/**
+ * Login with Firebase ID token
+ * @param {string} idToken - Firebase ID token
+ * @returns {Promise<Object>} Response with token and user
+ */
+export async function firebaseLogin(idToken) {
+  const response = await fetch(`${API_BASE_URL}/api/auth/firebase/login`, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({ id_token: idToken }),
+  });
+  if (!response.ok) {
+    const errorData = await response.json().catch(() => ({}));
+    throw new Error(errorData.detail || "Firebase login failed");
+  }
+  return await response.json();
+}
+/**
+ * Request OTP for email login
+ * @param {string} email - Email address
+ * @returns {Promise<Object>} Response with success message
+ */
+export async function requestOTP(email) {
+  const response = await fetch(`${API_BASE_URL}/api/auth/otp/request`, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({ email }),
+  });
+  if (!response.ok) {
+    const errorData = await response.json().catch(() => ({}));
+    throw new Error(errorData.detail || "Failed to send OTP");
+  }
+  return await response.json();
+}
+/**
+ * Verify OTP and login
+ * @param {string} email - Email address
+ * @param {string} otp - OTP code
+ * @returns {Promise<Object>} Response with token and user
+ */
+export async function verifyOTP(email, otp) {
+  const response = await fetch(`${API_BASE_URL}/api/auth/otp/verify`, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({ email, otp }),
+  });
+  if (!response.ok) {
+    const errorData = await response.json().catch(() => ({}));
+    throw new Error(errorData.detail || "OTP verification failed");
+  }
+  return await response.json();
+}
+/**
+ * Logout the current user
+ * @returns {Promise<void>}
+ */
+export async function logout() {
+  // For JWT tokens, logout is handled client-side by removing the token
+  // No server-side logout needed
+  return Promise.resolve();
+}
+>>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d