Seth commited on
Commit
8e8c6a4
·
1 Parent(s): a3239f4
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +35 -35
  3. API_KEY_QUICK_START.md +140 -0
  4. Dockerfile +83 -50
  5. EXTERNAL_API_DOCUMENTATION.md +458 -0
  6. FIREBASE_OTP_SETUP.md +296 -0
  7. GOOGLE_OAUTH_SETUP.md +79 -0
  8. HUGGINGFACE_SPACES_SETUP.md +186 -0
  9. IMPLEMENTATION_COMPLETE.md +257 -0
  10. README.md +10 -10
  11. backend/app/api_key_auth.py +100 -0
  12. backend/app/apollo_service.py +444 -0
  13. backend/app/auth.py +92 -0
  14. backend/app/auth_routes.py +347 -0
  15. backend/app/brevo_service.py +486 -0
  16. backend/app/db.py +18 -18
  17. backend/app/email_validator.py +61 -0
  18. backend/app/firebase_auth.py +92 -0
  19. backend/app/main.py +786 -292
  20. backend/app/models.py +136 -32
  21. backend/app/monday_service.py +391 -0
  22. backend/app/openrouter_client.py +862 -627
  23. backend/app/otp_service.py +197 -0
  24. backend/app/schemas.py +26 -26
  25. backend/requirements.txt +15 -11
  26. frontend/build-env.sh +22 -0
  27. frontend/index.html +13 -12
  28. frontend/package.json +26 -25
  29. frontend/postcss.config.cjs +6 -6
  30. frontend/src/App.jsx +106 -30
  31. frontend/src/Layout.jsx +179 -143
  32. frontend/src/components/ErrorBoundary.jsx +72 -72
  33. frontend/src/components/ExportButtons.jsx +692 -320
  34. frontend/src/components/ShareLinkModal.jsx +141 -0
  35. frontend/src/components/ShareModal.jsx +197 -0
  36. frontend/src/components/auth/LoginForm.jsx +512 -0
  37. frontend/src/components/ocr/DocumentPreview.jsx +229 -236
  38. frontend/src/components/ocr/ExtractionOutput.jsx +1201 -639
  39. frontend/src/components/ocr/ProcessingStatus.jsx +118 -111
  40. frontend/src/components/ocr/UpgradeModal.jsx +213 -0
  41. frontend/src/components/ocr/UploadZone.jsx +251 -147
  42. frontend/src/components/ui/badge.jsx +24 -24
  43. frontend/src/components/ui/button.jsx +38 -38
  44. frontend/src/components/ui/dropdown-menu.jsx +113 -113
  45. frontend/src/components/ui/input.jsx +14 -14
  46. frontend/src/components/ui/select.jsx +116 -116
  47. frontend/src/components/ui/separator.jsx +16 -0
  48. frontend/src/components/ui/tabs.jsx +45 -45
  49. frontend/src/config/firebase.js +30 -0
  50. frontend/src/contexts/AuthContext.jsx +115 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
API_KEY_QUICK_START.md ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # API Key Authentication - Quick Start Guide
2
+
3
+ ## Summary
4
+
5
+ API key authentication has been successfully implemented for external applications. The `/api/extract` endpoint now supports both JWT Bearer tokens and API keys.
6
+
7
+ ## Quick Steps to Use from External Applications
8
+
9
+ ### 1. Get an API Key
10
+
11
+ **Option A: Via Web UI (if available)**
12
+ - Log in to your account
13
+ - Navigate to API Keys section
14
+ - Create a new API key
15
+ - Copy and store it securely
16
+
17
+ **Option B: Via API**
18
+
19
+ ```bash
20
+ # Step 1: Authenticate and get JWT token
21
+ curl -X POST https://your-api-url/api/auth/otp/request \
22
+ -H "Content-Type: application/json" \
23
+ -d '{"email": "your-email@company.com"}'
24
+
25
+ # Step 2: Verify OTP
26
+ curl -X POST https://your-api-url/api/auth/otp/verify \
27
+ -H "Content-Type: application/json" \
28
+ -d '{"email": "your-email@company.com", "otp": "123456"}'
29
+
30
+ # Step 3: Create API key (use token from step 2)
31
+ curl -X POST https://your-api-url/api/auth/api-key/create \
32
+ -H "Authorization: Bearer YOUR_JWT_TOKEN" \
33
+ -H "Content-Type: application/json" \
34
+ -d '{"name": "My App"}'
35
+ ```
36
+
37
+ **Response:**
38
+ ```json
39
+ {
40
+ "success": true,
41
+ "api_key": "sk_live_abc123...", // ⚠️ SAVE THIS!
42
+ "key_prefix": "sk_live_abc...",
43
+ "message": "API key created successfully. Store this key securely - it will not be shown again!"
44
+ }
45
+ ```
46
+
47
+ ### 2. Use API Key to Extract Documents
48
+
49
+ ```bash
50
+ curl -X POST https://your-api-url/api/extract \
51
+ -H "X-API-Key: sk_live_abc123..." \
52
+ -F "file=@document.pdf" \
53
+ -F "key_fields=Invoice Number,Invoice Date,Total Amount"
54
+ ```
55
+
56
+ ## Authentication Methods
57
+
58
+ The `/api/extract` endpoint accepts **either**:
59
+
60
+ 1. **API Key**: `X-API-Key: sk_live_...` header
61
+ 2. **JWT Token**: `Authorization: Bearer <token>` header
62
+
63
+ ## New Endpoints
64
+
65
+ - `POST /api/auth/api-key/create` - Create new API key (requires JWT)
66
+ - `GET /api/auth/api-keys` - List your API keys (requires JWT)
67
+ - `DELETE /api/auth/api-key/{key_id}` - Deactivate API key (requires JWT)
68
+
69
+ ## Security Features
70
+
71
+ - ✅ API keys are hashed (SHA-256) before storage
72
+ - ✅ Only key prefix shown when listing keys
73
+ - ✅ Usage tracking (`last_used_at` timestamp)
74
+ - ✅ Soft delete (deactivation) support
75
+ - ✅ One key per user account
76
+
77
+ ## Example Code
78
+
79
+ ### Python
80
+ ```python
81
+ import requests
82
+
83
+ API_KEY = "sk_live_abc123..."
84
+ url = "https://your-api-url/api/extract"
85
+
86
+ with open("document.pdf", "rb") as f:
87
+ response = requests.post(
88
+ url,
89
+ headers={"X-API-Key": API_KEY},
90
+ files={"file": f},
91
+ data={"key_fields": "Invoice Number,Invoice Date"}
92
+ )
93
+ print(response.json())
94
+ ```
95
+
96
+ ### JavaScript
97
+ ```javascript
98
+ const FormData = require('form-data');
99
+ const fs = require('fs');
100
+ const axios = require('axios');
101
+
102
+ const form = new FormData();
103
+ form.append('file', fs.createReadStream('document.pdf'));
104
+ form.append('key_fields', 'Invoice Number,Invoice Date');
105
+
106
+ axios.post('https://your-api-url/api/extract', form, {
107
+ headers: {
108
+ 'X-API-Key': 'sk_live_abc123...',
109
+ ...form.getHeaders()
110
+ }
111
+ }).then(response => console.log(response.data));
112
+ ```
113
+
114
+ ## Full Documentation
115
+
116
+ See `EXTERNAL_API_DOCUMENTATION.md` for complete documentation with:
117
+ - Detailed API reference
118
+ - Error handling
119
+ - Response formats
120
+ - Multiple language examples (Python, JavaScript, PHP)
121
+ - Best practices
122
+
123
+ ## Database Migration
124
+
125
+ The new `api_keys` table will be created automatically when you restart the application (SQLAlchemy's `create_all` handles this).
126
+
127
+ ## Testing
128
+
129
+ 1. Start your backend server
130
+ 2. Create an API key using the steps above
131
+ 3. Test the extraction endpoint with the API key
132
+ 4. Verify the response contains extracted data
133
+
134
+ ## Notes
135
+
136
+ - API keys are shown **only once** when created - store them securely!
137
+ - Business email required for account creation
138
+ - Max file size: 4 MB
139
+ - Supported formats: PDF, PNG, JPEG, TIFF
140
+
Dockerfile CHANGED
@@ -1,50 +1,83 @@
1
- # ---------- 1) Build frontend (React + Vite) ----------
2
- FROM node:20-alpine AS frontend-build
3
- WORKDIR /frontend
4
-
5
- # Install frontend dependencies
6
- COPY frontend/package*.json ./
7
- RUN npm install
8
-
9
- # Copy rest of frontend and build
10
- COPY frontend/ .
11
- RUN npm run build
12
- # Vite will output to /frontend/dist by default
13
-
14
- # ---------- 2) Backend (FastAPI + Python) ----------
15
- FROM python:3.11-slim
16
-
17
- ENV PYTHONDONTWRITEBYTECODE=1
18
- ENV PYTHONUNBUFFERED=1
19
-
20
- WORKDIR /app
21
-
22
- # System deps (optional but useful for some libs)
23
- RUN apt-get update && apt-get install -y --no-install-recommends \
24
- build-essential \
25
- && rm -rf /var/lib/apt/lists/*
26
-
27
- # Install backend dependencies
28
- COPY backend/requirements.txt ./backend/requirements.txt
29
- RUN pip install --no-cache-dir -r backend/requirements.txt
30
-
31
- # Copy backend code
32
- COPY backend ./backend
33
-
34
- # Copy built frontend into backend/frontend_dist
35
- # FastAPI will serve from this folder later
36
- RUN mkdir -p backend/frontend_dist
37
- COPY --from=frontend-build /frontend/dist ./backend/frontend_dist
38
-
39
- # Create data directory for SQLite
40
- RUN mkdir -p data
41
-
42
- # Env vars used in backend/db.py etc.
43
- ENV DB_PATH=/app/data/app.db
44
- ENV PORT=7860
45
- ENV PYTHONPATH=/app
46
-
47
- EXPOSE 7860
48
-
49
- # Launch FastAPI app (we'll use backend.app.main:app)
50
- CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------- 1) Build frontend (React + Vite) ----------
2
+ FROM node:20-alpine AS frontend-build
3
+ WORKDIR /frontend
4
+
5
+ # Accept build arguments for Vite environment variables
6
+ ARG VITE_FIREBASE_API_KEY
7
+ ARG VITE_FIREBASE_AUTH_DOMAIN
8
+ ARG VITE_FIREBASE_PROJECT_ID
9
+ ARG VITE_FIREBASE_STORAGE_BUCKET
10
+ ARG VITE_FIREBASE_MESSAGING_SENDER_ID
11
+ ARG VITE_FIREBASE_APP_ID
12
+ ARG VITE_API_BASE_URL
13
+
14
+ # Set as environment variables so they're available to the build script
15
+ ENV VITE_FIREBASE_API_KEY=$VITE_FIREBASE_API_KEY
16
+ ENV VITE_FIREBASE_AUTH_DOMAIN=$VITE_FIREBASE_AUTH_DOMAIN
17
+ ENV VITE_FIREBASE_PROJECT_ID=$VITE_FIREBASE_PROJECT_ID
18
+ ENV VITE_FIREBASE_STORAGE_BUCKET=$VITE_FIREBASE_STORAGE_BUCKET
19
+ ENV VITE_FIREBASE_MESSAGING_SENDER_ID=$VITE_FIREBASE_MESSAGING_SENDER_ID
20
+ ENV VITE_FIREBASE_APP_ID=$VITE_FIREBASE_APP_ID
21
+ ENV VITE_API_BASE_URL=$VITE_API_BASE_URL
22
+
23
+ # Install frontend dependencies
24
+ COPY frontend/package*.json ./
25
+ RUN npm install
26
+
27
+ # Copy rest of frontend
28
+ COPY frontend/ .
29
+
30
+ # Create .env file from environment variables and build
31
+ # Inline the script to avoid permission issues
32
+ RUN echo "Checking environment variables..." && \
33
+ [ -z "$VITE_FIREBASE_API_KEY" ] && echo "WARNING: VITE_FIREBASE_API_KEY is not set" || echo "✓ VITE_FIREBASE_API_KEY is set" && \
34
+ [ -z "$VITE_FIREBASE_AUTH_DOMAIN" ] && echo "WARNING: VITE_FIREBASE_AUTH_DOMAIN is not set" || echo "✓ VITE_FIREBASE_AUTH_DOMAIN is set" && \
35
+ [ -z "$VITE_FIREBASE_PROJECT_ID" ] && echo "WARNING: VITE_FIREBASE_PROJECT_ID is not set" || echo "✓ VITE_FIREBASE_PROJECT_ID is set" && \
36
+ echo "VITE_FIREBASE_API_KEY=${VITE_FIREBASE_API_KEY:-}" > .env && \
37
+ echo "VITE_FIREBASE_AUTH_DOMAIN=${VITE_FIREBASE_AUTH_DOMAIN:-}" >> .env && \
38
+ echo "VITE_FIREBASE_PROJECT_ID=${VITE_FIREBASE_PROJECT_ID:-}" >> .env && \
39
+ echo "VITE_FIREBASE_STORAGE_BUCKET=${VITE_FIREBASE_STORAGE_BUCKET:-}" >> .env && \
40
+ echo "VITE_FIREBASE_MESSAGING_SENDER_ID=${VITE_FIREBASE_MESSAGING_SENDER_ID:-}" >> .env && \
41
+ echo "VITE_FIREBASE_APP_ID=${VITE_FIREBASE_APP_ID:-}" >> .env && \
42
+ echo "VITE_API_BASE_URL=${VITE_API_BASE_URL:-}" >> .env && \
43
+ echo "Created .env file with environment variables" && \
44
+ npm run build
45
+ # Vite will output to /frontend/dist by default
46
+
47
+ # ---------- 2) Backend (FastAPI + Python) ----------
48
+ FROM python:3.11-slim
49
+
50
+ ENV PYTHONDONTWRITEBYTECODE=1
51
+ ENV PYTHONUNBUFFERED=1
52
+
53
+ WORKDIR /app
54
+
55
+ # System deps (optional but useful for some libs)
56
+ RUN apt-get update && apt-get install -y --no-install-recommends \
57
+ build-essential \
58
+ && rm -rf /var/lib/apt/lists/*
59
+
60
+ # Install backend dependencies
61
+ COPY backend/requirements.txt ./backend/requirements.txt
62
+ RUN pip install --no-cache-dir -r backend/requirements.txt
63
+
64
+ # Copy backend code
65
+ COPY backend ./backend
66
+
67
+ # Copy built frontend into backend/frontend_dist
68
+ # FastAPI will serve from this folder later
69
+ RUN mkdir -p backend/frontend_dist
70
+ COPY --from=frontend-build /frontend/dist ./backend/frontend_dist
71
+
72
+ # Create data directory for SQLite
73
+ RUN mkdir -p data
74
+
75
+ # Env vars used in backend/db.py etc.
76
+ ENV DB_PATH=/app/data/app.db
77
+ ENV PORT=7860
78
+ ENV PYTHONPATH=/app
79
+
80
+ EXPOSE 7860
81
+
82
+ # Launch FastAPI app (we'll use backend.app.main:app)
83
+ CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "7860"]
EXTERNAL_API_DOCUMENTATION.md ADDED
@@ -0,0 +1,458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # External API Documentation
2
+
3
+ This document explains how to use the Document Parsing API from external applications using API key authentication.
4
+
5
+ ## Table of Contents
6
+ 1. [Overview](#overview)
7
+ 2. [Authentication](#authentication)
8
+ 3. [API Endpoints](#api-endpoints)
9
+ 4. [Usage Examples](#usage-examples)
10
+ 5. [Response Format](#response-format)
11
+ 6. [Error Handling](#error-handling)
12
+
13
+ ## Overview
14
+
15
+ The Document Parsing API allows external applications to extract text and structured data from PDF and image files. The API supports:
16
+
17
+ - **File Types**: PDF, PNG, JPEG, TIFF
18
+ - **Max File Size**: 4 MB
19
+ - **Authentication**: API Key (via `X-API-Key` header) or JWT Bearer token
20
+ - **Response Format**: JSON
21
+
22
+ ## Authentication
23
+
24
+ ### Step 1: Create an Account
25
+
26
+ First, you need to create an account using one of these methods:
27
+
28
+ 1. **Firebase Authentication** (via web UI)
29
+ 2. **OTP Authentication** (via API)
30
+
31
+ #### OTP Authentication Flow
32
+
33
+ ```bash
34
+ # 1. Request OTP
35
+ curl -X POST https://your-api-url/api/auth/otp/request \
36
+ -H "Content-Type: application/json" \
37
+ -d '{
38
+ "email": "your-business-email@company.com"
39
+ }'
40
+
41
+ # Response:
42
+ # {
43
+ # "success": true,
44
+ # "message": "OTP sent to your email"
45
+ # }
46
+
47
+ # 2. Verify OTP and get JWT token
48
+ curl -X POST https://your-api-url/api/auth/otp/verify \
49
+ -H "Content-Type: application/json" \
50
+ -d '{
51
+ "email": "your-business-email@company.com",
52
+ "otp": "123456"
53
+ }'
54
+
55
+ # Response:
56
+ # {
57
+ # "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
58
+ # "user": { ... }
59
+ # }
60
+ ```
61
+
62
+ **Note**: Only business email addresses are allowed (no Gmail, Yahoo, etc.)
63
+
64
+ ### Step 2: Create an API Key
65
+
66
+ Once authenticated, create an API key for your external application:
67
+
68
+ ```bash
69
+ # Create API key (requires JWT token from Step 1)
70
+ curl -X POST https://your-api-url/api/auth/api-key/create \
71
+ -H "Authorization: Bearer YOUR_JWT_TOKEN" \
72
+ -H "Content-Type: application/json" \
73
+ -d '{
74
+ "name": "My External App"
75
+ }'
76
+
77
+ # Response:
78
+ # {
79
+ # "success": true,
80
+ # "api_key": "sk_live_abc123...", # ⚠️ SAVE THIS - shown only once!
81
+ # "key_id": 1,
82
+ # "key_prefix": "sk_live_abc...",
83
+ # "name": "My External App",
84
+ # "created_at": "2024-01-15T10:30:00",
85
+ # "message": "API key created successfully. Store this key securely - it will not be shown again!"
86
+ # }
87
+ ```
88
+
89
+ **⚠️ IMPORTANT**: The full API key is only shown once when created. Store it securely in your application's environment variables or secret management system.
90
+
91
+ ### Step 3: Use API Key for Authentication
92
+
93
+ Use the API key in the `X-API-Key` header for all subsequent API calls:
94
+
95
+ ```bash
96
+ curl -X POST https://your-api-url/api/extract \
97
+ -H "X-API-Key: sk_live_abc123..." \
98
+ -F "file=@document.pdf" \
99
+ -F "key_fields=Invoice Number,Invoice Date,Total Amount"
100
+ ```
101
+
102
+ ## API Endpoints
103
+
104
+ ### 1. Document Extraction
105
+
106
+ **Endpoint**: `POST /api/extract`
107
+
108
+ **Authentication**:
109
+ - API Key: `X-API-Key: <your-api-key>`
110
+ - OR JWT: `Authorization: Bearer <jwt-token>`
111
+
112
+ **Parameters**:
113
+ - `file` (required): The document file (PDF, PNG, JPEG, TIFF)
114
+ - `key_fields` (optional): Comma-separated list of specific fields to extract
115
+
116
+ **Example Request**:
117
+
118
+ ```bash
119
+ curl -X POST https://your-api-url/api/extract \
120
+ -H "X-API-Key: sk_live_abc123..." \
121
+ -F "file=@invoice.pdf" \
122
+ -F "key_fields=Invoice Number,Invoice Date,Total Amount,PO Number"
123
+ ```
124
+
125
+ **Example with cURL (file upload)**:
126
+
127
+ ```bash
128
+ curl -X POST https://your-api-url/api/extract \
129
+ -H "X-API-Key: sk_live_abc123..." \
130
+ -F "file=@/path/to/document.pdf"
131
+ ```
132
+
133
+ ### 2. List API Keys
134
+
135
+ **Endpoint**: `GET /api/auth/api-keys`
136
+
137
+ **Authentication**: JWT Bearer token (required)
138
+
139
+ **Example**:
140
+
141
+ ```bash
142
+ curl -X GET https://your-api-url/api/auth/api-keys \
143
+ -H "Authorization: Bearer YOUR_JWT_TOKEN"
144
+ ```
145
+
146
+ **Response**:
147
+
148
+ ```json
149
+ {
150
+ "success": true,
151
+ "api_keys": [
152
+ {
153
+ "id": 1,
154
+ "name": "My External App",
155
+ "key_prefix": "sk_live_abc...",
156
+ "is_active": true,
157
+ "last_used_at": "2024-01-15T14:30:00",
158
+ "created_at": "2024-01-15T10:30:00"
159
+ }
160
+ ]
161
+ }
162
+ ```
163
+
164
+ ### 3. Delete API Key
165
+
166
+ **Endpoint**: `DELETE /api/auth/api-key/{key_id}`
167
+
168
+ **Authentication**: JWT Bearer token (required)
169
+
170
+ **Example**:
171
+
172
+ ```bash
173
+ curl -X DELETE https://your-api-url/api/auth/api-key/1 \
174
+ -H "Authorization: Bearer YOUR_JWT_TOKEN"
175
+ ```
176
+
177
+ ## Usage Examples
178
+
179
+ ### Python Example
180
+
181
+ ```python
182
+ import requests
183
+
184
+ # API Configuration
185
+ API_BASE_URL = "https://your-api-url"
186
+ API_KEY = "sk_live_abc123..." # Your API key
187
+
188
+ # Extract document
189
+ def extract_document(file_path, key_fields=None):
190
+ url = f"{API_BASE_URL}/api/extract"
191
+ headers = {
192
+ "X-API-Key": API_KEY
193
+ }
194
+
195
+ with open(file_path, 'rb') as f:
196
+ files = {'file': f}
197
+ data = {}
198
+ if key_fields:
199
+ data['key_fields'] = key_fields
200
+
201
+ response = requests.post(url, headers=headers, files=files, data=data)
202
+ response.raise_for_status()
203
+ return response.json()
204
+
205
+ # Usage
206
+ result = extract_document("invoice.pdf", key_fields="Invoice Number,Invoice Date,Total Amount")
207
+ print(result)
208
+ ```
209
+
210
+ ### JavaScript/Node.js Example
211
+
212
+ ```javascript
213
+ const FormData = require('form-data');
214
+ const fs = require('fs');
215
+ const axios = require('axios');
216
+
217
+ // API Configuration
218
+ const API_BASE_URL = 'https://your-api-url';
219
+ const API_KEY = 'sk_live_abc123...'; // Your API key
220
+
221
+ // Extract document
222
+ async function extractDocument(filePath, keyFields = null) {
223
+ const form = new FormData();
224
+ form.append('file', fs.createReadStream(filePath));
225
+ if (keyFields) {
226
+ form.append('key_fields', keyFields);
227
+ }
228
+
229
+ try {
230
+ const response = await axios.post(`${API_BASE_URL}/api/extract`, form, {
231
+ headers: {
232
+ 'X-API-Key': API_KEY,
233
+ ...form.getHeaders()
234
+ }
235
+ });
236
+ return response.data;
237
+ } catch (error) {
238
+ console.error('Error:', error.response?.data || error.message);
239
+ throw error;
240
+ }
241
+ }
242
+
243
+ // Usage
244
+ extractDocument('invoice.pdf', 'Invoice Number,Invoice Date,Total Amount')
245
+ .then(result => console.log(result))
246
+ .catch(error => console.error(error));
247
+ ```
248
+
249
+ ### PHP Example
250
+
251
+ ```php
252
+ <?php
253
+
254
+ $apiBaseUrl = "https://your-api-url";
255
+ $apiKey = "sk_live_abc123..."; // Your API key
256
+
257
+ function extractDocument($filePath, $keyFields = null) {
258
+ global $apiBaseUrl, $apiKey;
259
+
260
+ $url = $apiBaseUrl . "/api/extract";
261
+
262
+ $curl = curl_init();
263
+
264
+ $postData = [
265
+ 'file' => new CURLFile($filePath)
266
+ ];
267
+
268
+ if ($keyFields) {
269
+ $postData['key_fields'] = $keyFields;
270
+ }
271
+
272
+ curl_setopt_array($curl, [
273
+ CURLOPT_URL => $url,
274
+ CURLOPT_RETURNTRANSFER => true,
275
+ CURLOPT_POST => true,
276
+ CURLOPT_POSTFIELDS => $postData,
277
+ CURLOPT_HTTPHEADER => [
278
+ "X-API-Key: " . $apiKey
279
+ ]
280
+ ]);
281
+
282
+ $response = curl_exec($curl);
283
+ $httpCode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
284
+ curl_close($curl);
285
+
286
+ if ($httpCode !== 200) {
287
+ throw new Exception("API request failed: " . $response);
288
+ }
289
+
290
+ return json_decode($response, true);
291
+ }
292
+
293
+ // Usage
294
+ try {
295
+ $result = extractDocument("invoice.pdf", "Invoice Number,Invoice Date,Total Amount");
296
+ print_r($result);
297
+ } catch (Exception $e) {
298
+ echo "Error: " . $e->getMessage();
299
+ }
300
+ ?>
301
+ ```
302
+
303
+ ## Response Format
304
+
305
+ ### Success Response
306
+
307
+ ```json
308
+ {
309
+ "id": 123,
310
+ "fileName": "invoice.pdf",
311
+ "fileType": "application/pdf",
312
+ "fileSize": "2.5 MB",
313
+ "status": "completed",
314
+ "confidence": 92.5,
315
+ "fieldsExtracted": 15,
316
+ "totalTime": 3500,
317
+ "fields": {
318
+ "page_1": {
319
+ "text": "Extracted text from page 1...",
320
+ "table": {
321
+ "row_1": {
322
+ "column_1": "value1",
323
+ "column_2": "value2"
324
+ }
325
+ },
326
+ "footer_notes": ["Note 1", "Note 2"]
327
+ }
328
+ },
329
+ "full_text": "Complete extracted text from all pages...",
330
+ "Fields": {
331
+ "Invoice Number": "INV-001",
332
+ "Invoice Date": "2024-01-15",
333
+ "Total Amount": "$1,234.56"
334
+ },
335
+ "stages": {
336
+ "uploading": {
337
+ "time": 525,
338
+ "status": "completed",
339
+ "variation": "normal"
340
+ },
341
+ "aiAnalysis": {
342
+ "time": 1925,
343
+ "status": "completed",
344
+ "variation": "normal"
345
+ },
346
+ "dataExtraction": {
347
+ "time": 700,
348
+ "status": "completed",
349
+ "variation": "fast"
350
+ },
351
+ "outputRendering": {
352
+ "time": 350,
353
+ "status": "completed",
354
+ "variation": "normal"
355
+ }
356
+ },
357
+ "errorMessage": null
358
+ }
359
+ ```
360
+
361
+ ### Response Fields
362
+
363
+ - `id`: Extraction record ID
364
+ - `fileName`: Original filename
365
+ - `fileType`: MIME type of the file
366
+ - `fileSize`: File size as string
367
+ - `status`: "completed" or "failed"
368
+ - `confidence`: Extraction confidence (0-100)
369
+ - `fieldsExtracted`: Number of fields extracted
370
+ - `totalTime`: Total processing time in milliseconds
371
+ - `fields`: Structured data with page-wise extraction (tables, text, metadata)
372
+ - `full_text`: Complete extracted text from all pages
373
+ - `Fields`: User-specified fields extracted (if `key_fields` parameter was provided)
374
+ - `stages`: Processing stage timings
375
+ - `errorMessage`: Error message if extraction failed
376
+
377
+ ## Error Handling
378
+
379
+ ### Authentication Errors
380
+
381
+ **401 Unauthorized** - Invalid or missing API key:
382
+
383
+ ```json
384
+ {
385
+ "detail": "Invalid API key"
386
+ }
387
+ ```
388
+
389
+ **401 Unauthorized** - No authentication provided:
390
+
391
+ ```json
392
+ {
393
+ "detail": "Authentication required. Provide either a Bearer token or X-API-Key header."
394
+ }
395
+ ```
396
+
397
+ ### Validation Errors
398
+
399
+ **400 Bad Request** - File too large:
400
+
401
+ ```json
402
+ {
403
+ "detail": "File size exceeds 4 MB limit. Your file is 5.2 MB."
404
+ }
405
+ ```
406
+
407
+ **400 Bad Request** - Invalid file type:
408
+
409
+ ```json
410
+ {
411
+ "detail": "Only PDF, PNG, JPG, and TIFF files are allowed."
412
+ }
413
+ ```
414
+
415
+ ### Processing Errors
416
+
417
+ **500 Internal Server Error** - Extraction failed:
418
+
419
+ ```json
420
+ {
421
+ "id": 123,
422
+ "status": "failed",
423
+ "confidence": 0.0,
424
+ "fieldsExtracted": 0,
425
+ "errorMessage": "OCR processing failed: ..."
426
+ }
427
+ ```
428
+
429
+ ## Best Practices
430
+
431
+ 1. **Store API Keys Securely**: Never commit API keys to version control. Use environment variables or secret management systems.
432
+
433
+ 2. **Handle Errors Gracefully**: Always check the `status` field in the response. If `status` is "failed", check `errorMessage` for details.
434
+
435
+ 3. **Respect Rate Limits**: If rate limiting is implemented, handle 429 responses appropriately with exponential backoff.
436
+
437
+ 4. **Validate File Types**: Check file type and size before uploading to avoid unnecessary API calls.
438
+
439
+ 5. **Use Specific Fields**: When you know what fields to extract, use the `key_fields` parameter for better accuracy and faster processing.
440
+
441
+ 6. **Monitor API Key Usage**: Regularly check your API keys via the `/api/auth/api-keys` endpoint to monitor usage and detect unauthorized access.
442
+
443
+ ## Security Notes
444
+
445
+ - API keys are hashed before storage in the database
446
+ - Only the key prefix is shown when listing API keys
447
+ - API keys can be deactivated (soft deleted) but not permanently deleted
448
+ - Each API key is tied to a specific user account
449
+ - API key usage is tracked with `last_used_at` timestamp
450
+
451
+ ## Support
452
+
453
+ For issues or questions:
454
+ 1. Check the error message in the API response
455
+ 2. Verify your API key is active and correct
456
+ 3. Ensure your file meets the requirements (type, size)
457
+ 4. Check the API status endpoint: `GET /ping`
458
+
FIREBASE_OTP_SETUP.md ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Firebase Authentication + OTP Setup Guide
2
+
3
+ This application uses Firebase Authentication for Google sign-in and Brevo for OTP email delivery. Only business email addresses are allowed.
4
+
5
+ ## Prerequisites
6
+
7
+ 1. Firebase project
8
+ 2. Brevo account (for sending OTP emails)
9
+ 3. Business email domain verification
10
+
11
+ ---
12
+
13
+ ## Step 1: Firebase Setup
14
+
15
+ ### 1.1 Create Firebase Project
16
+
17
+ 1. Go to [Firebase Console](https://console.firebase.google.com/)
18
+ 2. Click "Add project" or select an existing project
19
+ 3. Follow the setup wizard
20
+
21
+ ### 1.2 Enable Google Authentication
22
+
23
+ 1. In Firebase Console, go to **Authentication** → **Sign-in method**
24
+ 2. Click on **Google** provider
25
+ 3. Enable it and set your project support email
26
+ 4. Save the changes
27
+
28
+ ### 1.3 Get Firebase Web App Configuration
29
+
30
+ 1. In Firebase Console, go to **Project Settings** (gear icon)
31
+ 2. Scroll down to "Your apps" section
32
+ 3. Click the **Web** icon (`</>`) to add a web app
33
+ 4. Register your app (you can skip Firebase Hosting for now)
34
+ 5. Copy the Firebase configuration object
35
+
36
+ ### 1.4 Get Firebase Service Account Key
37
+
38
+ 1. In Firebase Console, go to **Project Settings** → **Service accounts**
39
+ 2. Click **Generate new private key**
40
+ 3. Download the JSON file (keep it secure!)
41
+
42
+ ### 1.5 Set Frontend Environment Variables
43
+
44
+ Create or update `frontend/.env`:
45
+
46
+ ```bash
47
+ VITE_FIREBASE_API_KEY=your-api-key
48
+ VITE_FIREBASE_AUTH_DOMAIN=your-project.firebaseapp.com
49
+ VITE_FIREBASE_PROJECT_ID=your-project-id
50
+ VITE_FIREBASE_STORAGE_BUCKET=your-project.appspot.com
51
+ VITE_FIREBASE_MESSAGING_SENDER_ID=your-sender-id
52
+ VITE_FIREBASE_APP_ID=your-app-id
53
+ ```
54
+
55
+ ### 1.6 Set Backend Environment Variables
56
+
57
+ You have two options for Firebase Admin SDK:
58
+
59
+ **Option A: Service Account JSON File**
60
+ ```bash
61
+ FIREBASE_SERVICE_ACCOUNT_KEY=/path/to/service-account-key.json
62
+ ```
63
+
64
+ **Option B: Service Account JSON String (Recommended for Docker/Cloud/Hugging Face Spaces)**
65
+ ```bash
66
+ FIREBASE_SERVICE_ACCOUNT_JSON='{"type":"service_account","project_id":"...","private_key_id":"...","private_key":"...","client_email":"...","client_id":"...","auth_uri":"...","token_uri":"...","auth_provider_x509_cert_url":"...","client_x509_cert_url":"..."}'
67
+ ```
68
+
69
+ **For Hugging Face Spaces:**
70
+ - Use **Option B** (JSON String) as a **Secret** (Private)
71
+ - Copy the entire contents of your service account JSON file
72
+ - Paste it as the value for `FIREBASE_SERVICE_ACCOUNT_JSON`
73
+ - Make sure to keep the single quotes around the JSON if setting via command line, or just paste the raw JSON in the Spaces UI
74
+
75
+ ---
76
+
77
+ ## Step 2: Brevo Setup
78
+
79
+ ### 2.1 Create Brevo Account
80
+
81
+ 1. Go to [Brevo](https://www.brevo.com/) (formerly Sendinblue)
82
+ 2. Sign up for a free account (300 emails/day free tier)
83
+ 3. Verify your email address
84
+
85
+ ### 2.2 Get API Key
86
+
87
+ 1. Log in to Brevo
88
+ 2. Go to **Settings** → **API Keys**
89
+ 3. Click **Generate a new API key**
90
+ 4. Copy the API key (starts with `xkeysib-...`)
91
+
92
+ ### 2.3 Verify Sender Email
93
+
94
+ 1. Go to **Senders & IP** → **Senders**
95
+ 2. Click **Add a sender**
96
+ 3. Enter your sender email (e.g., `noreply@yourdomain.com`)
97
+ 4. Verify the email address (check your inbox for verification email)
98
+ 5. Once verified, you can use it to send emails
99
+
100
+ ### 2.4 Set Backend Environment Variables
101
+
102
+ ```bash
103
+ BREVO_API_KEY=xkeysib-your-api-key-here
104
+ BREVO_SENDER_EMAIL=noreply@yourdomain.com
105
+ BREVO_SENDER_NAME=EZOFIS AI
106
+ ```
107
+
108
+ ---
109
+
110
+ ## Step 3: JWT Secret Key
111
+
112
+ Generate a strong random string for JWT token signing:
113
+
114
+ ```bash
115
+ # Generate a random secret (Linux/Mac)
116
+ openssl rand -hex 32
117
+
118
+ # Or use Python
119
+ python -c "import secrets; print(secrets.token_hex(32))"
120
+ ```
121
+
122
+ Set the environment variable:
123
+
124
+ ```bash
125
+ JWT_SECRET_KEY=your-generated-secret-key-here
126
+ ```
127
+
128
+ ---
129
+
130
+ ## Step 4: Frontend URL
131
+
132
+ Set the frontend URL for OAuth redirects:
133
+
134
+ ```bash
135
+ FRONTEND_URL=http://localhost:5173 # Development
136
+ # OR
137
+ FRONTEND_URL=https://your-domain.com # Production
138
+ ```
139
+
140
+ ---
141
+
142
+ ## Step 5: Install Dependencies
143
+
144
+ ### Backend
145
+
146
+ ```bash
147
+ cd backend
148
+ pip install -r requirements.txt
149
+ ```
150
+
151
+ ### Frontend
152
+
153
+ ```bash
154
+ cd frontend
155
+ npm install
156
+ ```
157
+
158
+ ---
159
+
160
+ ## Step 6: Database Migration
161
+
162
+ The database will automatically create the new schema when you start the application. However, if you have existing data:
163
+
164
+ **Option 1: Fresh Start (Recommended for Development)**
165
+ - Delete the existing database file: `data/app.db`
166
+ - Restart the application (tables will be recreated)
167
+
168
+ **Option 2: Manual Migration (For Production)**
169
+ - The new `users` table will be created automatically
170
+ - Existing `extractions` table needs `user_id` column added
171
+ - You'll need to assign existing records to a default user or migrate them
172
+
173
+ ---
174
+
175
+ ## Step 7: Test the Setup
176
+
177
+ ### 7.1 Test Firebase Authentication
178
+
179
+ 1. Start the backend server
180
+ 2. Start the frontend development server
181
+ 3. Navigate to the application
182
+ 4. Click "Google Sign In"
183
+ 5. Sign in with a business Google account
184
+ 6. Verify you're redirected to the dashboard
185
+
186
+ ### 7.2 Test OTP Authentication
187
+
188
+ 1. Click on "Email / OTP" tab
189
+ 2. Enter a business email address
190
+ 3. Click "Send OTP"
191
+ 4. Check your email for the OTP code
192
+ 5. Enter the OTP and verify
193
+ 6. Verify you're redirected to the dashboard
194
+
195
+ ### 7.3 Test Business Email Validation
196
+
197
+ 1. Try to sign in with a personal Gmail account
198
+ 2. Verify you get an error message
199
+ 3. Try OTP with a personal email
200
+ 4. Verify it's blocked
201
+
202
+ ---
203
+
204
+ ## Environment Variables Summary
205
+
206
+ ### Backend (.env or environment)
207
+
208
+ ```bash
209
+ # Firebase
210
+ FIREBASE_SERVICE_ACCOUNT_JSON='{...}' # OR
211
+ FIREBASE_SERVICE_ACCOUNT_KEY=/path/to/key.json
212
+
213
+ # Brevo
214
+ BREVO_API_KEY=xkeysib-...
215
+ BREVO_SENDER_EMAIL=noreply@yourdomain.com
216
+ BREVO_SENDER_NAME=EZOFIS AI
217
+
218
+ # JWT
219
+ JWT_SECRET_KEY=your-secret-key
220
+
221
+ # Frontend URL
222
+ FRONTEND_URL=http://localhost:5173 # For local development
223
+ # For Hugging Face Spaces: https://your-username-ezofisocr.hf.space
224
+ ```
225
+
226
+ **For Hugging Face Spaces:**
227
+ - Set `FIREBASE_SERVICE_ACCOUNT_JSON`, `BREVO_API_KEY`, and `JWT_SECRET_KEY` as **Secrets (Private)**
228
+ - Set `BREVO_SENDER_EMAIL`, `BREVO_SENDER_NAME`, and `FRONTEND_URL` as **Variables (Public)**
229
+ - See `HUGGINGFACE_SPACES_SETUP.md` for detailed instructions
230
+
231
+ ### Frontend (.env)
232
+
233
+ ```bash
234
+ VITE_FIREBASE_API_KEY=...
235
+ VITE_FIREBASE_AUTH_DOMAIN=...
236
+ VITE_FIREBASE_PROJECT_ID=...
237
+ VITE_FIREBASE_STORAGE_BUCKET=...
238
+ VITE_FIREBASE_MESSAGING_SENDER_ID=...
239
+ VITE_FIREBASE_APP_ID=...
240
+ VITE_API_BASE_URL=http://localhost:7860
241
+ ```
242
+
243
+ ---
244
+
245
+ ## Troubleshooting
246
+
247
+ ### Firebase Issues
248
+
249
+ - **"Firebase not configured"**: Check that `FIREBASE_SERVICE_ACCOUNT_JSON` or `FIREBASE_SERVICE_ACCOUNT_KEY` is set correctly
250
+ - **"Invalid Firebase token"**: Ensure Firebase Web SDK is properly configured in frontend
251
+ - **"Email not found"**: Make sure Google sign-in is enabled in Firebase Console
252
+
253
+ ### Brevo Issues
254
+
255
+ - **"Failed to send email"**:
256
+ - Verify your API key is correct
257
+ - Check that sender email is verified in Brevo
258
+ - Ensure you haven't exceeded the free tier limit (300 emails/day)
259
+ - **"API key not set"**: Check that `BREVO_API_KEY` environment variable is set
260
+
261
+ ### Business Email Validation
262
+
263
+ - Personal emails (Gmail, Yahoo, etc.) are automatically blocked
264
+ - Only business/corporate email domains are allowed
265
+ - The validation happens on both frontend and backend
266
+
267
+ ---
268
+
269
+ ## Security Notes
270
+
271
+ 1. **Never commit** Firebase service account keys or API keys to version control
272
+ 2. Use environment variables or secure secret management
273
+ 3. JWT tokens expire after 7 days
274
+ 4. OTP codes expire after 10 minutes
275
+ 5. Maximum 5 OTP verification attempts per email
276
+ 6. All extraction records are filtered by user_id for data isolation
277
+
278
+ ---
279
+
280
+ ## Production Deployment
281
+
282
+ 1. Set all environment variables in your hosting platform
283
+ 2. Use HTTPS for both frontend and backend
284
+ 3. Update `FRONTEND_URL` to your production domain
285
+ 4. Verify sender email in Brevo with your production domain
286
+ 5. Consider using Redis for OTP storage instead of in-memory (for scalability)
287
+ 6. Set up proper error monitoring and logging
288
+
289
+ ---
290
+
291
+ ## Support
292
+
293
+ For issues:
294
+ - Firebase: [Firebase Documentation](https://firebase.google.com/docs)
295
+ - Brevo: [Brevo API Documentation](https://developers.brevo.com/)
296
+
GOOGLE_OAUTH_SETUP.md ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Google OAuth Setup Guide
2
+
3
+ This application uses Google OAuth for user authentication. Follow these steps to set it up:
4
+
5
+ ## 1. Create Google OAuth Credentials
6
+
7
+ 1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
8
+ 2. Create a new project or select an existing one
9
+ 3. Enable the Google+ API
10
+ 4. Go to "Credentials" → "Create Credentials" → "OAuth client ID"
11
+ 5. Choose "Web application"
12
+ 6. Add authorized redirect URIs:
13
+ - For development: `http://localhost:7860/api/auth/callback`
14
+ - For production: `https://your-domain.com/api/auth/callback`
15
+ 7. Copy the Client ID and Client Secret
16
+
17
+ ## 2. Set Environment Variables
18
+
19
+ Set the following environment variables:
20
+
21
+ ```bash
22
+ # Google OAuth
23
+ GOOGLE_CLIENT_ID=your-client-id-here
24
+ GOOGLE_CLIENT_SECRET=your-client-secret-here
25
+
26
+ # JWT Secret (use a strong random string)
27
+ JWT_SECRET_KEY=your-secret-key-here
28
+
29
+ # Frontend URL (for OAuth redirect)
30
+ FRONTEND_URL=http://localhost:5173 # or your production URL
31
+ ```
32
+
33
+ ## 3. Database Migration
34
+
35
+ The database will automatically create the new `users` table and add `user_id` to the `extractions` table when you start the application.
36
+
37
+ **Note:** If you have an existing database with extraction records, you'll need to:
38
+ 1. Back up your data
39
+ 2. Delete the old database file
40
+ 3. Restart the application to recreate tables with the new schema
41
+
42
+ Or manually migrate:
43
+ - Add `user_id` column to `extractions` table (you may need to set a default user_id for existing records)
44
+
45
+ ## 4. Install Dependencies
46
+
47
+ Make sure to install the new Python dependencies:
48
+
49
+ ```bash
50
+ pip install -r backend/requirements.txt
51
+ ```
52
+
53
+ New dependencies added:
54
+ - `authlib` - OAuth library
55
+ - `pyjwt` - JWT token handling
56
+ - `python-jose[cryptography]` - JWT verification
57
+
58
+ ## 5. Start the Application
59
+
60
+ 1. Start the backend server
61
+ 2. Start the frontend development server
62
+ 3. Users will be prompted to sign in with Google when they try to access the application
63
+
64
+ ## How It Works
65
+
66
+ 1. User clicks "Sign in with Google" → redirected to Google login
67
+ 2. After authentication, Google redirects to `/api/auth/callback`
68
+ 3. Backend creates/updates user in database and generates JWT token
69
+ 4. Frontend receives token and stores it in localStorage
70
+ 5. All API requests include the JWT token in the Authorization header
71
+ 6. Backend verifies token and filters data by user_id
72
+
73
+ ## Security Notes
74
+
75
+ - JWT tokens expire after 7 days
76
+ - Tokens are stored in localStorage (consider httpOnly cookies for production)
77
+ - All extraction records are filtered by user_id
78
+ - Users can only see their own data and history
79
+
HUGGINGFACE_SPACES_SETUP.md ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Spaces Setup Guide
2
+
3
+ This guide provides specific instructions for deploying the EZOFIS OCR application to Hugging Face Spaces.
4
+
5
+ ## Prerequisites
6
+
7
+ 1. ✅ Firebase project configured
8
+ 2. ✅ Brevo account set up
9
+ 3. ✅ Hugging Face account with a Space created
10
+
11
+ ---
12
+
13
+ ## Step 1: Frontend Environment Variables
14
+
15
+ Set these in **Hugging Face Spaces → Settings → Variables and secrets**:
16
+
17
+ ### Variables (Public):
18
+ - `VITE_FIREBASE_API_KEY` → Set as **Secret (Private)** ✅
19
+ - `VITE_FIREBASE_AUTH_DOMAIN` → Set as **Variable (Public)**
20
+ - `VITE_FIREBASE_PROJECT_ID` → Set as **Variable (Public)**
21
+ - `VITE_FIREBASE_STORAGE_BUCKET` → Set as **Variable (Public)**
22
+ - `VITE_FIREBASE_MESSAGING_SENDER_ID` → Set as **Variable (Public)**
23
+ - `VITE_FIREBASE_APP_ID` → Set as **Variable (Public)**
24
+ - `VITE_API_BASE_URL` → Set as **Variable (Public)** (e.g., `https://your-username-ezofisocr.hf.space`)
25
+
26
+ **Note:** These variables are used during the Docker build process. The Dockerfile has been updated to accept them as build arguments.
27
+
28
+ ---
29
+
30
+ ## Step 2: Backend Environment Variables
31
+
32
+ Set these in **Hugging Face Spaces → Settings → Variables and secrets**:
33
+
34
+ ### Secrets (Private):
35
+ 1. **`FIREBASE_SERVICE_ACCOUNT_JSON`**
36
+ - Get your Firebase service account JSON file (from Firebase Console → Project Settings → Service accounts)
37
+ - Copy the **entire JSON content** (all in one line or formatted)
38
+ - Paste it as the value for this secret
39
+ - Example format:
40
+ ```json
41
+ {"type":"service_account","project_id":"your-project","private_key_id":"...","private_key":"-----BEGIN PRIVATE KEY-----\n...\n-----END PRIVATE KEY-----\n","client_email":"...","client_id":"...","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_x509_cert_url":"..."}
42
+ ```
43
+
44
+ 2. **`BREVO_API_KEY`**
45
+ - Get from Brevo → Settings → API Keys
46
+ - Format: `xkeysib-...`
47
+
48
+ 3. **`JWT_SECRET_KEY`**
49
+ - Generate a secure random key:
50
+ ```bash
51
+ openssl rand -hex 32
52
+ ```
53
+ - Or use Python:
54
+ ```bash
55
+ python -c "import secrets; print(secrets.token_hex(32))"
56
+ ```
57
+
58
+ ### Variables (Public):
59
+ 1. **`BREVO_SENDER_EMAIL`**
60
+ - Your verified sender email in Brevo
61
+ - Example: `noreply@yourdomain.com`
62
+
63
+ 2. **`BREVO_SENDER_NAME`**
64
+ - Display name for emails
65
+ - Example: `EZOFIS AI`
66
+
67
+ 3. **`FRONTEND_URL`**
68
+ - Your Hugging Face Space URL
69
+ - Format: `https://your-username-ezofisocr.hf.space`
70
+ - Replace `your-username` with your actual Hugging Face username
71
+
72
+ ---
73
+
74
+ ## Step 3: Verify Dockerfile
75
+
76
+ The Dockerfile has been updated to accept frontend environment variables as build arguments. Make sure your `Dockerfile` includes:
77
+
78
+ ```dockerfile
79
+ # Accept build arguments for Vite environment variables
80
+ ARG VITE_FIREBASE_API_KEY
81
+ ARG VITE_FIREBASE_AUTH_DOMAIN
82
+ ARG VITE_FIREBASE_PROJECT_ID
83
+ ARG VITE_FIREBASE_STORAGE_BUCKET
84
+ ARG VITE_FIREBASE_MESSAGING_SENDER_ID
85
+ ARG VITE_FIREBASE_APP_ID
86
+ ARG VITE_API_BASE_URL
87
+
88
+ # Set as environment variables so Vite can access them during build
89
+ ENV VITE_FIREBASE_API_KEY=$VITE_FIREBASE_API_KEY
90
+ ENV VITE_FIREBASE_AUTH_DOMAIN=$VITE_FIREBASE_AUTH_DOMAIN
91
+ ENV VITE_FIREBASE_PROJECT_ID=$VITE_FIREBASE_PROJECT_ID
92
+ ENV VITE_FIREBASE_STORAGE_BUCKET=$VITE_FIREBASE_STORAGE_BUCKET
93
+ ENV VITE_FIREBASE_MESSAGING_SENDER_ID=$VITE_FIREBASE_MESSAGING_SENDER_ID
94
+ ENV VITE_FIREBASE_APP_ID=$VITE_FIREBASE_APP_ID
95
+ ENV VITE_API_BASE_URL=$VITE_API_BASE_URL
96
+ ```
97
+
98
+ ---
99
+
100
+ ## Step 4: Deploy
101
+
102
+ 1. **Commit and push** your code to the Hugging Face Space repository
103
+ - Make sure `frontend/build-env.sh` is included in your commit
104
+ 2. **Wait for the build** to complete (check the "Logs" tab)
105
+ - Look for "Checking environment variables..." messages in the build logs
106
+ - Verify all variables show "✓ ... is set" (not "WARNING: ... is not set")
107
+ 3. **Test the deployment**:
108
+ - Open your Space URL
109
+ - Try Firebase login
110
+ - Try OTP authentication
111
+
112
+ **Important:** After setting or updating environment variables in Hugging Face Spaces, you need to **rebuild** the Space for the changes to take effect. The frontend is built during the Docker build process, so environment variable changes require a rebuild.
113
+
114
+ ---
115
+
116
+ ## Troubleshooting
117
+
118
+ ### Build Fails with "VITE_* variables not found"
119
+ - **Solution:** Make sure all `VITE_*` variables are set in Spaces → Variables and secrets
120
+ - Hugging Face Spaces automatically makes environment variables available during build
121
+ - The Dockerfile uses a build script to create a `.env` file from these variables
122
+
123
+ ### Firebase Authentication Not Working - "auth/invalid-api-key" Error
124
+ - **Check:** `VITE_FIREBASE_API_KEY` is set correctly (as a Secret) and contains the full API key
125
+ - **Check:** All other `VITE_FIREBASE_*` variables are set with correct values
126
+ - **Check:** After updating variables, rebuild the Space (the frontend needs to be rebuilt)
127
+ - **Check:** Firebase Console → Authentication → Sign-in method → Google is enabled
128
+ - **Check:** The API key matches the one in Firebase Console → Project Settings → Your apps
129
+ - **Solution:** If the error persists, check the build logs to see if the `.env` file is being created correctly
130
+
131
+ ### OTP Emails Not Sending
132
+ - **Check:** `BREVO_API_KEY` is set correctly (as a Secret)
133
+ - **Check:** `BREVO_SENDER_EMAIL` is verified in Brevo
134
+ - **Check:** `BREVO_SENDER_NAME` is set
135
+ - **Check:** You haven't exceeded Brevo free tier (300 emails/day)
136
+
137
+ ### Backend Errors
138
+ - **Check:** `FIREBASE_SERVICE_ACCOUNT_JSON` contains the full JSON (all fields)
139
+ - **Check:** `JWT_SECRET_KEY` is set
140
+ - **Check:** `FRONTEND_URL` matches your Space URL exactly
141
+
142
+ ---
143
+
144
+ ## Environment Variables Checklist
145
+
146
+ ### Frontend (Build-time):
147
+ - [ ] `VITE_FIREBASE_API_KEY` (Secret)
148
+ - [ ] `VITE_FIREBASE_AUTH_DOMAIN` (Variable)
149
+ - [ ] `VITE_FIREBASE_PROJECT_ID` (Variable)
150
+ - [ ] `VITE_FIREBASE_STORAGE_BUCKET` (Variable)
151
+ - [ ] `VITE_FIREBASE_MESSAGING_SENDER_ID` (Variable)
152
+ - [ ] `VITE_FIREBASE_APP_ID` (Variable)
153
+ - [ ] `VITE_API_BASE_URL` (Variable)
154
+
155
+ ### Backend (Runtime):
156
+ - [ ] `FIREBASE_SERVICE_ACCOUNT_JSON` (Secret)
157
+ - [ ] `BREVO_API_KEY` (Secret)
158
+ - [ ] `JWT_SECRET_KEY` (Secret)
159
+ - [ ] `BREVO_SENDER_EMAIL` (Variable)
160
+ - [ ] `BREVO_SENDER_NAME` (Variable)
161
+ - [ ] `FRONTEND_URL` (Variable)
162
+
163
+ ---
164
+
165
+ ## Notes
166
+
167
+ 1. **Build vs Runtime:** Frontend variables (`VITE_*`) are used during Docker build, backend variables are used at runtime.
168
+
169
+ 2. **Secrets vs Variables:**
170
+ - Use **Secrets** for sensitive data (API keys, private keys, JWT secrets)
171
+ - Use **Variables** for non-sensitive configuration (URLs, display names)
172
+
173
+ 3. **Firebase Service Account JSON:** When copying the JSON, make sure to include the entire content, including the `private_key` field with newlines preserved (they should be `\n` in the JSON string).
174
+
175
+ 4. **Space URL:** Your Space URL format is `https://{username}-{space-name}.hf.space`. Make sure `FRONTEND_URL` and `VITE_API_BASE_URL` match this exactly.
176
+
177
+ ---
178
+
179
+ ## Support
180
+
181
+ If you encounter issues:
182
+ 1. Check the build logs in Hugging Face Spaces
183
+ 2. Verify all environment variables are set correctly
184
+ 3. Ensure Firebase and Brevo are properly configured
185
+ 4. Review the main setup guide: `FIREBASE_OTP_SETUP.md`
186
+
IMPLEMENTATION_COMPLETE.md ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ✅ Firebase + OTP Authentication Implementation Complete
2
+
3
+ All code changes have been applied successfully! Here are the next steps you need to follow:
4
+
5
+ ## 📋 Implementation Summary
6
+
7
+ ### ✅ Backend Changes (Completed)
8
+ - ✅ Updated `requirements.txt` with Firebase Admin SDK
9
+ - ✅ Updated `models.py` - User model now supports Firebase and OTP auth methods
10
+ - ✅ Created `email_validator.py` - Business email validation
11
+ - ✅ Created `firebase_auth.py` - Firebase token verification
12
+ - ✅ Created `brevo_service.py` - Brevo email service for OTP
13
+ - ✅ Created `otp_service.py` - OTP generation and verification
14
+ - ✅ Updated `auth_routes.py` - New endpoints for Firebase and OTP login
15
+
16
+ ### ✅ Frontend Changes (Completed)
17
+ - ✅ Updated `package.json` with Firebase SDK
18
+ - ✅ Created `config/firebase.js` - Firebase configuration
19
+ - ✅ Updated `services/auth.js` - Firebase and OTP auth functions
20
+ - ✅ Updated `contexts/AuthContext.jsx` - Firebase and OTP support
21
+ - ✅ Created `components/auth/LoginForm.jsx` - Login UI with both options
22
+ - ✅ Updated `App.jsx` - Integrated LoginForm component
23
+
24
+ ---
25
+
26
+ ## 🚀 Next Steps (YOU NEED TO DO THESE)
27
+
28
+ ### Step 1: Install Dependencies
29
+
30
+ **Backend:**
31
+ ```bash
32
+ cd backend
33
+ pip install -r requirements.txt
34
+ ```
35
+
36
+ **Frontend:**
37
+ ```bash
38
+ cd frontend
39
+ npm install
40
+ ```
41
+
42
+ ---
43
+
44
+ ### Step 2: Set Up Firebase
45
+
46
+ 1. **Create Firebase Project:**
47
+ - Go to https://console.firebase.google.com/
48
+ - Create a new project or use existing one
49
+
50
+ 2. **Enable Google Authentication:**
51
+ - In Firebase Console → Authentication → Sign-in method
52
+ - Enable "Google" provider
53
+ - Set project support email
54
+
55
+ 3. **Get Web App Config:**
56
+ - Project Settings → Your apps → Add Web app
57
+ - Copy the config values
58
+
59
+ 4. **Get Service Account Key:**
60
+ - Project Settings → Service accounts
61
+ - Click "Generate new private key"
62
+ - Download the JSON file
63
+
64
+ 5. **Set Frontend Environment Variables:**
65
+ Create `frontend/.env`:
66
+ ```bash
67
+ VITE_FIREBASE_API_KEY=your-api-key-here
68
+ VITE_FIREBASE_AUTH_DOMAIN=your-project.firebaseapp.com
69
+ VITE_FIREBASE_PROJECT_ID=your-project-id
70
+ VITE_FIREBASE_STORAGE_BUCKET=your-project.appspot.com
71
+ VITE_FIREBASE_MESSAGING_SENDER_ID=your-sender-id
72
+ VITE_FIREBASE_APP_ID=your-app-id
73
+ VITE_API_BASE_URL=http://localhost:7860
74
+ ```
75
+
76
+ 6. **Set Backend Environment Variables:**
77
+ Option A (JSON file path):
78
+ ```bash
79
+ FIREBASE_SERVICE_ACCOUNT_KEY=/path/to/service-account-key.json
80
+ ```
81
+
82
+ Option B (JSON string - recommended for Docker):
83
+ ```bash
84
+ FIREBASE_SERVICE_ACCOUNT_JSON='{"type":"service_account","project_id":"...","private_key":"...","client_email":"..."}'
85
+ ```
86
+ (Copy the entire JSON content from the downloaded file)
87
+
88
+ ---
89
+
90
+ ### Step 3: Set Up Brevo
91
+
92
+ 1. **Create Brevo Account:**
93
+ - Go to https://www.brevo.com/
94
+ - Sign up (free tier: 300 emails/day)
95
+
96
+ 2. **Get API Key:**
97
+ - Settings → API Keys
98
+ - Generate new API key
99
+ - Copy the key (starts with `xkeysib-`)
100
+
101
+ 3. **Verify Sender Email:**
102
+ - Senders & IP → Senders
103
+ - Add sender email (e.g., `noreply@yourdomain.com`)
104
+ - Verify via email
105
+
106
+ 4. **Set Backend Environment Variables:**
107
+ ```bash
108
+ BREVO_API_KEY=xkeysib-your-api-key-here
109
+ BREVO_SENDER_EMAIL=noreply@yourdomain.com
110
+ BREVO_SENDER_NAME=EZOFIS AI
111
+ ```
112
+
113
+ ---
114
+
115
+ ### Step 4: Set JWT Secret
116
+
117
+ Generate a secure random key:
118
+ ```bash
119
+ # Linux/Mac
120
+ openssl rand -hex 32
121
+
122
+ # Or Python
123
+ python -c "import secrets; print(secrets.token_hex(32))"
124
+ ```
125
+
126
+ Set environment variable:
127
+ ```bash
128
+ JWT_SECRET_KEY=your-generated-secret-key-here
129
+ ```
130
+
131
+ ---
132
+
133
+ ### Step 5: Set Frontend URL
134
+
135
+ ```bash
136
+ FRONTEND_URL=http://localhost:5173 # Development
137
+ # OR
138
+ FRONTEND_URL=https://your-domain.com # Production
139
+ ```
140
+
141
+ ---
142
+
143
+ ### Step 6: Database Migration
144
+
145
+ **If you have existing data:**
146
+ - The new schema will be created automatically
147
+ - Existing `extractions` table needs `user_id` column
148
+ - You may need to assign existing records to a default user
149
+
150
+ **For fresh start (recommended for development):**
151
+ - Delete `data/app.db` (if exists)
152
+ - Restart application - tables will be recreated
153
+
154
+ ---
155
+
156
+ ### Step 7: Test the Implementation
157
+
158
+ 1. **Start Backend:**
159
+ ```bash
160
+ cd backend
161
+ uvicorn app.main:app --reload --port 7860
162
+ ```
163
+
164
+ 2. **Start Frontend:**
165
+ ```bash
166
+ cd frontend
167
+ npm run dev
168
+ ```
169
+
170
+ 3. **Test Firebase Login:**
171
+ - Navigate to http://localhost:5173
172
+ - Click "Google Sign In" tab
173
+ - Sign in with business Google account
174
+ - Should redirect to dashboard
175
+
176
+ 4. **Test OTP Login:**
177
+ - Click "Email / OTP" tab
178
+ - Enter business email
179
+ - Click "Send OTP"
180
+ - Check email for OTP code
181
+ - Enter OTP and verify
182
+ - Should redirect to dashboard
183
+
184
+ 5. **Test Business Email Validation:**
185
+ - Try personal Gmail account → Should be blocked
186
+ - Try OTP with personal email → Should be blocked
187
+
188
+ ---
189
+
190
+ ## 📝 Environment Variables Checklist
191
+
192
+ ### Backend (.env or system environment)
193
+ - [ ] `FIREBASE_SERVICE_ACCOUNT_JSON` or `FIREBASE_SERVICE_ACCOUNT_KEY`
194
+ - [ ] `BREVO_API_KEY`
195
+ - [ ] `BREVO_SENDER_EMAIL`
196
+ - [ ] `BREVO_SENDER_NAME`
197
+ - [ ] `JWT_SECRET_KEY`
198
+ - [ ] `FRONTEND_URL`
199
+
200
+ ### Frontend (.env)
201
+ - [ ] `VITE_FIREBASE_API_KEY`
202
+ - [ ] `VITE_FIREBASE_AUTH_DOMAIN`
203
+ - [ ] `VITE_FIREBASE_PROJECT_ID`
204
+ - [ ] `VITE_FIREBASE_STORAGE_BUCKET`
205
+ - [ ] `VITE_FIREBASE_MESSAGING_SENDER_ID`
206
+ - [ ] `VITE_FIREBASE_APP_ID`
207
+ - [ ] `VITE_API_BASE_URL`
208
+
209
+ ---
210
+
211
+ ## 🔒 Security Reminders
212
+
213
+ 1. ✅ Never commit API keys or secrets to git
214
+ 2. ✅ Use `.env` files (add to `.gitignore`)
215
+ 3. ✅ Business email validation is enforced on both frontend and backend
216
+ 4. ✅ JWT tokens expire after 7 days
217
+ 5. ✅ OTP codes expire after 10 minutes
218
+ 6. ✅ Maximum 5 OTP verification attempts
219
+
220
+ ---
221
+
222
+ ## 📚 Documentation
223
+
224
+ - **Firebase Setup:** See `FIREBASE_OTP_SETUP.md` for detailed instructions
225
+ - **Brevo API:** https://developers.brevo.com/reference/sendtransacemail
226
+
227
+ ---
228
+
229
+ ## ⚠️ Important Notes
230
+
231
+ 1. **Database Schema Change:**
232
+ - User model changed from `google_id` (required) to `firebase_uid` (optional)
233
+ - If you have existing users, you'll need to migrate the data
234
+ - For development, deleting `data/app.db` is the easiest option
235
+
236
+ 2. **Business Email Validation:**
237
+ - Personal email domains are blocked (Gmail, Yahoo, Outlook, etc.)
238
+ - Validation happens on both frontend and backend
239
+ - Users must use their work/corporate email addresses
240
+
241
+ 3. **OTP Storage:**
242
+ - Currently stored in memory (works for single server)
243
+ - For production with multiple servers, consider using Redis
244
+
245
+ ---
246
+
247
+ ## 🎉 You're All Set!
248
+
249
+ Once you complete the setup steps above, your application will have:
250
+ - ✅ Firebase Google Sign-in (no OAuth credentials needed!)
251
+ - ✅ Email/OTP authentication via Brevo
252
+ - ✅ Business email validation
253
+ - ✅ User-specific data isolation
254
+ - ✅ Secure JWT token authentication
255
+
256
+ Good luck! 🚀
257
+
README.md CHANGED
@@ -1,10 +1,10 @@
1
- ---
2
- title: EZOFISAIOCR
3
- emoji: 🌍
4
- colorFrom: blue
5
- colorTo: purple
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: EZOFISAIOCR
3
+ emoji: 🌍
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
backend/app/api_key_auth.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import secrets
3
+ import hashlib
4
+ from datetime import datetime
5
+ from typing import Optional
6
+ from fastapi import Depends, HTTPException, status, Header
7
+ from sqlalchemy.orm import Session
8
+ from .db import SessionLocal
9
+ from .models import APIKey, User
10
+
11
+
12
+ def get_db():
13
+ """Database dependency."""
14
+ db = SessionLocal()
15
+ try:
16
+ yield db
17
+ finally:
18
+ db.close()
19
+
20
+
21
+ def generate_api_key() -> str:
22
+ """
23
+ Generate a secure API key.
24
+ Format: sk_live_<random_64_char_hex>
25
+ """
26
+ random_bytes = secrets.token_bytes(32)
27
+ random_hex = random_bytes.hex()
28
+ return f"sk_live_{random_hex}"
29
+
30
+
31
+ def hash_api_key(key: str) -> str:
32
+ """Hash an API key using SHA-256."""
33
+ return hashlib.sha256(key.encode()).hexdigest()
34
+
35
+
36
+ def verify_api_key(key: str, key_hash: str) -> bool:
37
+ """Verify an API key against its hash."""
38
+ return hash_api_key(key) == key_hash
39
+
40
+
41
+ def get_api_key_prefix(key: str) -> str:
42
+ """Get the prefix of an API key for display purposes."""
43
+ return key[:12] + "..." if len(key) > 12 else key
44
+
45
+
46
+ async def get_user_from_api_key(
47
+ api_key: Optional[str] = Header(None, alias="X-API-Key"),
48
+ db: Session = Depends(get_db)
49
+ ) -> Optional[User]:
50
+ """
51
+ Authenticate user from API key header.
52
+ Returns User if valid, None if no API key provided.
53
+ Raises HTTPException if API key is invalid.
54
+ """
55
+ if not api_key:
56
+ return None
57
+
58
+ # Hash the provided key
59
+ key_hash = hash_api_key(api_key)
60
+
61
+ # Find the API key in database
62
+ api_key_record = (
63
+ db.query(APIKey)
64
+ .filter(APIKey.key_hash == key_hash)
65
+ .filter(APIKey.is_active == True)
66
+ .first()
67
+ )
68
+
69
+ if not api_key_record:
70
+ raise HTTPException(
71
+ status_code=status.HTTP_401_UNAUTHORIZED,
72
+ detail="Invalid API key",
73
+ headers={"WWW-Authenticate": "Bearer"},
74
+ )
75
+
76
+ # Update last used timestamp
77
+ api_key_record.last_used_at = datetime.utcnow()
78
+ db.commit()
79
+
80
+ # Get the user
81
+ user = db.query(User).filter(User.id == api_key_record.user_id).first()
82
+ if not user:
83
+ raise HTTPException(
84
+ status_code=status.HTTP_401_UNAUTHORIZED,
85
+ detail="User not found for API key",
86
+ )
87
+
88
+ return user
89
+
90
+
91
+ async def get_current_user_or_api_key(
92
+ api_key_user: Optional[User] = Depends(get_user_from_api_key),
93
+ # JWT auth will be handled separately in main.py
94
+ ) -> Optional[User]:
95
+ """
96
+ Dependency that returns user from API key if provided, otherwise None.
97
+ This allows endpoints to support both JWT and API key authentication.
98
+ """
99
+ return api_key_user
100
+
backend/app/apollo_service.py ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Apollo.io API service for creating contacts, enriching contact data, and adding them to sequences.
3
+ Reference:
4
+ - Create contact: https://docs.apollo.io/reference/create-a-contact
5
+ - Add to sequence: https://docs.apollo.io/reference/add-contacts-to-sequence
6
+ - Enrich person: https://docs.apollo.io/reference/enrich-people-data
7
+ """
8
+ import os
9
+ import httpx
10
+ from typing import Optional, Dict, Any
11
+
12
+ APOLLO_API_KEY = os.environ.get("APOLLO_API_KEY", "")
13
+ APOLLO_API_URL = "https://api.apollo.io/api/v1"
14
+ APOLLO_TRIAL_LIST_NAME = "VPR TRIAL LEADS"
15
+ # Allow list ID to be set directly via environment variable (more reliable than lookup)
16
+ APOLLO_TRIAL_LIST_ID = os.environ.get("APOLLO_TRIAL_LIST_ID", None)
17
+ # Sequence ID for adding contacts to email sequences (preferred over lists)
18
+ APOLLO_TRIAL_SEQUENCE_ID = os.environ.get("APOLLO_TRIAL_SEQUENCE_ID", None)
19
+
20
+
21
+ async def get_list_id(list_name: Optional[str] = None) -> Optional[str]:
22
+ """
23
+ Get Apollo list ID. First tries environment variable, then attempts API lookup.
24
+
25
+ Args:
26
+ list_name: Name of the list (for lookup if env var not set)
27
+
28
+ Returns:
29
+ List ID as string if found, None otherwise
30
+ """
31
+ # First, try to use the list ID from environment variable (most reliable)
32
+ if APOLLO_TRIAL_LIST_ID:
33
+ # Apollo list IDs are typically hexadecimal strings (MongoDB ObjectIds)
34
+ # Accept them as strings, just strip whitespace
35
+ list_id = str(APOLLO_TRIAL_LIST_ID).strip()
36
+ if list_id:
37
+ print(f"[INFO] Using Apollo list ID from environment variable: {list_id}")
38
+ return list_id
39
+ else:
40
+ print(f"[WARNING] APOLLO_TRIAL_LIST_ID is empty")
41
+
42
+ # If no env var, try to look up by name (this may not work if API endpoint is different)
43
+ if not list_name or not APOLLO_API_KEY:
44
+ return None
45
+
46
+ # Note: The /lists endpoint may not be available in all Apollo API versions
47
+ # Try alternative: search for lists using a different endpoint
48
+ try:
49
+ async with httpx.AsyncClient() as client:
50
+ # Try the lists endpoint (may return 404 in some API versions)
51
+ response = await client.get(
52
+ f"{APOLLO_API_URL}/lists",
53
+ headers={
54
+ "Content-Type": "application/json",
55
+ "Cache-Control": "no-cache",
56
+ "X-Api-Key": APOLLO_API_KEY
57
+ },
58
+ timeout=10.0
59
+ )
60
+ if response.status_code == 200:
61
+ data = response.json()
62
+ lists = data.get("lists", [])
63
+ for list_item in lists:
64
+ if list_item.get("name") == list_name:
65
+ list_id = list_item.get("id")
66
+ print(f"[INFO] Found Apollo list '{list_name}' with ID: {list_id}")
67
+ # Return as string (Apollo IDs are typically hex strings)
68
+ return str(list_id) if list_id else None
69
+ print(f"[WARNING] Apollo list '{list_name}' not found in available lists")
70
+ else:
71
+ print(f"[WARNING] Apollo lists endpoint returned {response.status_code}, cannot lookup list by name")
72
+ except Exception as e:
73
+ print(f"[WARNING] Failed to fetch Apollo list ID: {str(e)}")
74
+
75
+ return None
76
+
77
+
78
+ async def add_contact_to_sequence(contact_id: str, sequence_id: str) -> bool:
79
+ """
80
+ Add a contact to an Apollo.io email sequence.
81
+
82
+ Args:
83
+ contact_id: The Apollo contact ID
84
+ sequence_id: The Apollo sequence ID
85
+
86
+ Returns:
87
+ True if contact was successfully added to sequence, False otherwise
88
+ """
89
+ if not APOLLO_API_KEY:
90
+ print("[WARNING] APOLLO_API_KEY not set, skipping sequence enrollment")
91
+ return False
92
+
93
+ try:
94
+ async with httpx.AsyncClient() as client:
95
+ response = await client.post(
96
+ f"{APOLLO_API_URL}/sequence_contacts",
97
+ headers={
98
+ "Content-Type": "application/json",
99
+ "Cache-Control": "no-cache",
100
+ "X-Api-Key": APOLLO_API_KEY
101
+ },
102
+ json={
103
+ "sequence_id": sequence_id,
104
+ "contact_id": contact_id
105
+ },
106
+ timeout=10.0
107
+ )
108
+
109
+ if response.status_code in [200, 201]:
110
+ print(f"[INFO] Successfully added contact {contact_id} to sequence {sequence_id}")
111
+ return True
112
+ else:
113
+ error_data = response.text
114
+ print(f"[ERROR] Failed to add contact to sequence: {response.status_code} - {error_data}")
115
+ return False
116
+ except httpx.HTTPStatusError as e:
117
+ print(f"[ERROR] Apollo API HTTP error adding to sequence: {e.response.status_code} - {e.response.text}")
118
+ return False
119
+ except Exception as e:
120
+ print(f"[ERROR] Failed to add contact to sequence: {str(e)}")
121
+ return False
122
+
123
+
124
+ async def create_apollo_contact(
125
+ email: str,
126
+ first_name: Optional[str] = None,
127
+ last_name: Optional[str] = None,
128
+ organization_name: Optional[str] = None,
129
+ title: Optional[str] = None,
130
+ list_name: Optional[str] = None,
131
+ sequence_id: Optional[str] = None
132
+ ) -> bool:
133
+ """
134
+ Create a contact in Apollo.io and optionally add to a sequence or list.
135
+
136
+ Args:
137
+ email: Contact email address (required)
138
+ first_name: Contact first name
139
+ last_name: Contact last name
140
+ organization_name: Organization name
141
+ title: Job title
142
+ list_name: Name of the list to add contact to (defaults to APOLLO_TRIAL_LIST_NAME)
143
+ sequence_id: ID of the sequence to add contact to (preferred over list)
144
+
145
+ Returns:
146
+ True if contact created successfully, False otherwise
147
+
148
+ Raises:
149
+ ValueError: If APOLLO_API_KEY is not set
150
+ """
151
+ if not APOLLO_API_KEY:
152
+ print("[WARNING] APOLLO_API_KEY not set, skipping Apollo contact creation")
153
+ return False
154
+
155
+ # Use default list name if not provided
156
+ if list_name is None:
157
+ list_name = APOLLO_TRIAL_LIST_NAME
158
+
159
+ # Parse name if full name is provided but first/last are not
160
+ if not first_name and not last_name:
161
+ # Try to extract from email or use email prefix
162
+ email_prefix = email.split('@')[0]
163
+ if '.' in email_prefix:
164
+ parts = email_prefix.split('.')
165
+ first_name = parts[0].capitalize() if parts else None
166
+ last_name = parts[1].capitalize() if len(parts) > 1 else None
167
+ else:
168
+ first_name = email_prefix.capitalize()
169
+
170
+ # Extract organization domain from email
171
+ organization_domain = None
172
+ if '@' in email:
173
+ organization_domain = email.split('@')[1]
174
+
175
+ # Prepare contact data
176
+ contact_data: Dict[str, Any] = {
177
+ "email": email.lower(),
178
+ "run_dedupe": True # Prevent duplicate contacts
179
+ }
180
+
181
+ if first_name:
182
+ contact_data["first_name"] = first_name
183
+ if last_name:
184
+ contact_data["last_name"] = last_name
185
+ if organization_name:
186
+ contact_data["organization_name"] = organization_name
187
+ if organization_domain:
188
+ contact_data["organization_domain"] = organization_domain
189
+ if title:
190
+ contact_data["title"] = title
191
+
192
+ try:
193
+ async with httpx.AsyncClient() as client:
194
+ # Get the list ID if list_name is provided
195
+ list_ids = []
196
+ target_list_id = None # Store for later use
197
+ if list_name:
198
+ list_id = await get_list_id(list_name)
199
+ if list_id:
200
+ target_list_id = list_id # Store for verification later
201
+ # Apollo API accepts list_ids as an array of strings (hex IDs)
202
+ list_ids = [str(list_id)]
203
+ contact_data["list_ids"] = list_ids
204
+ print(f"[INFO] Adding contact to list ID: {list_id}")
205
+ else:
206
+ print(f"[WARNING] Could not find list '{list_name}'. Set APOLLO_TRIAL_LIST_ID environment variable with the list ID, or create contact without list assignment")
207
+
208
+ # Log the payload being sent (for debugging)
209
+ print(f"[DEBUG] Creating Apollo contact with payload: {contact_data}")
210
+
211
+ # Create the contact
212
+ response = await client.post(
213
+ f"{APOLLO_API_URL}/contacts",
214
+ headers={
215
+ "Content-Type": "application/json",
216
+ "Cache-Control": "no-cache",
217
+ "X-Api-Key": APOLLO_API_KEY
218
+ },
219
+ json=contact_data,
220
+ timeout=10.0
221
+ )
222
+
223
+ # Log the full response for debugging
224
+ print(f"[DEBUG] Apollo API response status: {response.status_code}")
225
+ try:
226
+ response_json = response.json()
227
+ print(f"[DEBUG] Apollo API response (full): {response_json}")
228
+ except:
229
+ print(f"[DEBUG] Apollo API response body (text): {response.text[:1000]}") # First 1000 chars
230
+
231
+ if response.status_code == 200 or response.status_code == 201:
232
+ result = response.json()
233
+ contact = result.get("contact", {})
234
+ contact_id = contact.get("id")
235
+ print(f"[INFO] Successfully created Apollo contact: {email} (ID: {contact_id})")
236
+
237
+ # Priority: Add to sequence if sequence_id is provided (this is supported by API)
238
+ target_sequence_id = sequence_id or APOLLO_TRIAL_SEQUENCE_ID
239
+ if contact_id and target_sequence_id:
240
+ print(f"[INFO] Adding contact to sequence: {target_sequence_id}")
241
+ sequence_success = await add_contact_to_sequence(contact_id, target_sequence_id)
242
+ if sequence_success:
243
+ print(f"[INFO] ✓ Contact successfully enrolled in sequence")
244
+ else:
245
+ print(f"[WARNING] Failed to add contact to sequence, but contact was created")
246
+
247
+ # Fallback: Try to add to list (API limitation - may not work)
248
+ if list_ids and contact_id and target_list_id and not target_sequence_id:
249
+ print(f"[INFO] Contact created with list_ids parameter: {list_ids}")
250
+ print(f"[INFO] ⚠️ Apollo.io API Limitation: The API does not return list_ids in responses,")
251
+ print(f"[INFO] so we cannot verify if the contact was added to the list via API.")
252
+ print(f"[INFO] Please verify manually in Apollo.io that contact '{email}' is in list '{list_name or target_list_id}'")
253
+ print(f"[INFO] Consider using sequences instead (APOLLO_TRIAL_SEQUENCE_ID) for better API support.")
254
+
255
+ return True
256
+ else:
257
+ error_data = response.text
258
+ print(f"[ERROR] Failed to create Apollo contact: {response.status_code} - {error_data}")
259
+ return False
260
+
261
+ except httpx.HTTPStatusError as e:
262
+ print(f"[ERROR] Apollo API HTTP error: {e.response.status_code} - {e.response.text}")
263
+ return False
264
+ except Exception as e:
265
+ print(f"[ERROR] Failed to create Apollo contact: {str(e)}")
266
+ return False
267
+
268
+
269
+ async def enrich_contact_by_email(email: str) -> Optional[Dict[str, Any]]:
270
+ """
271
+ Enrich contact data from Apollo.io using email address.
272
+
273
+ Args:
274
+ email: Contact email address
275
+
276
+ Returns:
277
+ Dictionary with enriched contact data, or None if not found
278
+ """
279
+ if not APOLLO_API_KEY:
280
+ print("[WARNING] APOLLO_API_KEY not set, skipping Apollo enrichment")
281
+ return None
282
+
283
+ try:
284
+ async with httpx.AsyncClient() as client:
285
+ # Try people/match endpoint first (for exact email match)
286
+ print(f"[DEBUG] Attempting Apollo.io enrichment for {email} via /people/match endpoint")
287
+ response = await client.post(
288
+ f"{APOLLO_API_URL}/people/match",
289
+ headers={
290
+ "Content-Type": "application/json",
291
+ "Cache-Control": "no-cache",
292
+ "X-Api-Key": APOLLO_API_KEY
293
+ },
294
+ json={
295
+ "email": email.lower()
296
+ # Note: reveal_phone_number requires webhook_url, so we skip it for now
297
+ },
298
+ timeout=10.0
299
+ )
300
+
301
+ print(f"[DEBUG] Apollo.io /people/match response status: {response.status_code}")
302
+
303
+ if response.status_code == 200:
304
+ data = response.json()
305
+ print(f"[DEBUG] Apollo.io /people/match response data keys: {list(data.keys())}")
306
+ person = data.get("person", {})
307
+ if person:
308
+ print(f"[DEBUG] Found person data in Apollo.io response")
309
+ # Extract enriched data
310
+ enriched_data = {
311
+ "first_name": person.get("first_name"),
312
+ "last_name": person.get("last_name"),
313
+ "title": person.get("title"),
314
+ "phone_number": person.get("phone_numbers", [{}])[0].get("raw_number") if person.get("phone_numbers") else None,
315
+ "linkedin_url": person.get("linkedin_url"),
316
+ "headline": person.get("headline"),
317
+ "organization_name": person.get("organization", {}).get("name") if person.get("organization") else None,
318
+ "organization_website": person.get("organization", {}).get("website_url") if person.get("organization") else None,
319
+ "organization_address": None, # May need to parse from organization data
320
+ }
321
+
322
+ # Try to get organization address
323
+ if person.get("organization"):
324
+ org = person.get("organization", {})
325
+ address_parts = []
326
+ if org.get("street_address"):
327
+ address_parts.append(org.get("street_address"))
328
+ if org.get("city"):
329
+ address_parts.append(org.get("city"))
330
+ if org.get("state"):
331
+ address_parts.append(org.get("state"))
332
+ if org.get("postal_code"):
333
+ address_parts.append(org.get("postal_code"))
334
+ if org.get("country"):
335
+ address_parts.append(org.get("country"))
336
+ if address_parts:
337
+ enriched_data["organization_address"] = ", ".join(address_parts)
338
+
339
+ print(f"[INFO] Successfully enriched contact data for {email} from Apollo.io")
340
+ return enriched_data
341
+ else:
342
+ print(f"[DEBUG] Apollo.io /people/match returned 200 but no person data found")
343
+ elif response.status_code == 404:
344
+ print(f"[DEBUG] Apollo.io /people/match returned 404 - contact not found in database")
345
+ elif response.status_code == 401:
346
+ print(f"[ERROR] Apollo.io API authentication failed - check your API key")
347
+ try:
348
+ error_data = response.json()
349
+ print(f"[ERROR] Apollo.io error details: {error_data}")
350
+ except:
351
+ print(f"[ERROR] Apollo.io error response: {response.text}")
352
+ else:
353
+ print(f"[DEBUG] Apollo.io /people/match returned status {response.status_code}")
354
+ try:
355
+ error_data = response.json()
356
+ print(f"[DEBUG] Apollo.io response: {error_data}")
357
+ except:
358
+ print(f"[DEBUG] Apollo.io response text: {response.text[:500]}")
359
+
360
+ # If match fails, try the new search endpoint (api_search)
361
+ print(f"[DEBUG] Attempting Apollo.io enrichment for {email} via /mixed_people/api_search endpoint")
362
+ search_response = await client.post(
363
+ f"{APOLLO_API_URL}/mixed_people/api_search",
364
+ headers={
365
+ "Content-Type": "application/json",
366
+ "Cache-Control": "no-cache",
367
+ "X-Api-Key": APOLLO_API_KEY
368
+ },
369
+ json={
370
+ "email": email.lower(),
371
+ "per_page": 1
372
+ },
373
+ timeout=10.0
374
+ )
375
+
376
+ print(f"[DEBUG] Apollo.io /mixed_people/api_search response status: {search_response.status_code}")
377
+
378
+ if search_response.status_code == 200:
379
+ search_data = search_response.json()
380
+ print(f"[DEBUG] Apollo.io /mixed_people/api_search response data keys: {list(search_data.keys())}")
381
+ people = search_data.get("people", [])
382
+ print(f"[DEBUG] Found {len(people)} people in search results")
383
+ if people:
384
+ person = people[0]
385
+ # Extract enriched data (same structure as above)
386
+ enriched_data = {
387
+ "first_name": person.get("first_name"),
388
+ "last_name": person.get("last_name"),
389
+ "title": person.get("title"),
390
+ "phone_number": person.get("phone_numbers", [{}])[0].get("raw_number") if person.get("phone_numbers") else None,
391
+ "linkedin_url": person.get("linkedin_url"),
392
+ "headline": person.get("headline"),
393
+ "organization_name": person.get("organization", {}).get("name") if person.get("organization") else None,
394
+ "organization_website": person.get("organization", {}).get("website_url") if person.get("organization") else None,
395
+ "organization_address": None,
396
+ }
397
+
398
+ if person.get("organization"):
399
+ org = person.get("organization", {})
400
+ address_parts = []
401
+ if org.get("street_address"):
402
+ address_parts.append(org.get("street_address"))
403
+ if org.get("city"):
404
+ address_parts.append(org.get("city"))
405
+ if org.get("state"):
406
+ address_parts.append(org.get("state"))
407
+ if org.get("postal_code"):
408
+ address_parts.append(org.get("postal_code"))
409
+ if org.get("country"):
410
+ address_parts.append(org.get("country"))
411
+ if address_parts:
412
+ enriched_data["organization_address"] = ", ".join(address_parts)
413
+
414
+ print(f"[INFO] Successfully enriched contact data for {email} from Apollo.io (via search)")
415
+ return enriched_data
416
+ else:
417
+ print(f"[DEBUG] Apollo.io /mixed_people/api_search returned 200 but no people in results")
418
+ elif search_response.status_code == 404:
419
+ print(f"[DEBUG] Apollo.io /mixed_people/api_search returned 404 - contact not found")
420
+ elif search_response.status_code == 401:
421
+ print(f"[ERROR] Apollo.io API authentication failed on search - check your API key")
422
+ try:
423
+ error_data = search_response.json()
424
+ print(f"[ERROR] Apollo.io search error details: {error_data}")
425
+ except:
426
+ print(f"[ERROR] Apollo.io search error response: {search_response.text}")
427
+ else:
428
+ print(f"[DEBUG] Apollo.io /mixed_people/api_search returned status {search_response.status_code}")
429
+ try:
430
+ error_data = search_response.json()
431
+ print(f"[DEBUG] Apollo.io search response: {error_data}")
432
+ except:
433
+ print(f"[DEBUG] Apollo.io search response text: {search_response.text[:500]}")
434
+
435
+ print(f"[INFO] No contact data found in Apollo.io for {email} - contact may not exist in Apollo's database")
436
+ return None
437
+
438
+ except httpx.HTTPStatusError as e:
439
+ print(f"[ERROR] Apollo API HTTP error during enrichment: {e.response.status_code} - {e.response.text}")
440
+ return None
441
+ except Exception as e:
442
+ print(f"[ERROR] Failed to enrich contact from Apollo.io: {str(e)}")
443
+ return None
444
+
backend/app/auth.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import jwt
3
+ from datetime import datetime, timedelta
4
+ from typing import Optional
5
+ from fastapi import Depends, HTTPException, status
6
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
7
+ from sqlalchemy.orm import Session
8
+ from .db import SessionLocal
9
+ from .models import User
10
+
11
+ # JWT Configuration
12
+ SECRET_KEY = os.environ.get("JWT_SECRET_KEY", "your-secret-key-change-in-production")
13
+ ALGORITHM = "HS256"
14
+ ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days
15
+
16
+ security = HTTPBearer()
17
+
18
+
19
+ def get_db():
20
+ """Database dependency."""
21
+ db = SessionLocal()
22
+ try:
23
+ yield db
24
+ finally:
25
+ db.close()
26
+
27
+
28
+ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
29
+ """Create a JWT access token."""
30
+ to_encode = data.copy()
31
+ # Ensure 'sub' (subject) is a string, not an integer
32
+ if "sub" in to_encode:
33
+ to_encode["sub"] = str(to_encode["sub"])
34
+ if expires_delta:
35
+ expire = datetime.utcnow() + expires_delta
36
+ else:
37
+ expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
38
+ to_encode.update({"exp": expire})
39
+ encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
40
+ return encoded_jwt
41
+
42
+
43
+ def verify_token(token: str) -> dict:
44
+ """Verify and decode a JWT token."""
45
+ try:
46
+ payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
47
+ return payload
48
+ except jwt.ExpiredSignatureError:
49
+ raise HTTPException(
50
+ status_code=status.HTTP_401_UNAUTHORIZED,
51
+ detail="Token has expired",
52
+ )
53
+ except jwt.InvalidTokenError:
54
+ raise HTTPException(
55
+ status_code=status.HTTP_401_UNAUTHORIZED,
56
+ detail="Could not validate credentials",
57
+ )
58
+
59
+
60
+ def get_current_user(
61
+ credentials: HTTPAuthorizationCredentials = Depends(security),
62
+ db: Session = Depends(get_db)
63
+ ) -> User:
64
+ """Get the current authenticated user from JWT token."""
65
+ token = credentials.credentials
66
+ payload = verify_token(token)
67
+ user_id: int = payload.get("sub")
68
+
69
+ if user_id is None:
70
+ raise HTTPException(
71
+ status_code=status.HTTP_401_UNAUTHORIZED,
72
+ detail="Could not validate credentials",
73
+ )
74
+
75
+ # Convert user_id back to integer for database query
76
+ try:
77
+ user_id_int = int(user_id)
78
+ except (ValueError, TypeError):
79
+ raise HTTPException(
80
+ status_code=status.HTTP_401_UNAUTHORIZED,
81
+ detail="Invalid user ID in token",
82
+ )
83
+
84
+ user = db.query(User).filter(User.id == user_id_int).first()
85
+ if user is None:
86
+ raise HTTPException(
87
+ status_code=status.HTTP_401_UNAUTHORIZED,
88
+ detail="User not found",
89
+ )
90
+
91
+ return user
92
+
backend/app/auth_routes.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import APIRouter, Depends, HTTPException, Body
3
+ from pydantic import BaseModel, EmailStr
4
+ from sqlalchemy.orm import Session
5
+ from .models import User, APIKey
6
+ from .auth import create_access_token, get_current_user
7
+ from .firebase_auth import verify_firebase_token
8
+ from .otp_service import request_otp, verify_otp
9
+ from .email_validator import validate_business_email, is_business_email
10
+ from .api_key_auth import generate_api_key, hash_api_key, get_api_key_prefix
11
+ from .db import SessionLocal
12
+
13
+ def get_db():
14
+ """Database dependency."""
15
+ db = SessionLocal()
16
+ try:
17
+ yield db
18
+ finally:
19
+ db.close()
20
+
21
+ router = APIRouter()
22
+
23
+
24
+ class FirebaseLoginRequest(BaseModel):
25
+ id_token: str
26
+
27
+
28
+ class OTPRequestRequest(BaseModel):
29
+ email: EmailStr
30
+
31
+
32
+ class OTPVerifyRequest(BaseModel):
33
+ email: EmailStr
34
+ otp: str
35
+
36
+
37
+ class CreateAPIKeyRequest(BaseModel):
38
+ name: str # User-friendly name for the API key
39
+
40
+
41
+ @router.post("/api/auth/firebase/login")
42
+ async def firebase_login(
43
+ request: FirebaseLoginRequest,
44
+ db: Session = Depends(get_db)
45
+ ):
46
+ """
47
+ Login with Firebase ID token.
48
+ Validates business email and creates/updates user.
49
+ """
50
+ try:
51
+ # Verify Firebase token
52
+ user_info = await verify_firebase_token(request.id_token)
53
+ email = user_info.get('email')
54
+
55
+ if not email:
56
+ raise HTTPException(status_code=400, detail="Email not found in Firebase token")
57
+
58
+ # Validate business email
59
+ if not is_business_email(email):
60
+ raise HTTPException(
61
+ status_code=400,
62
+ detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
63
+ )
64
+
65
+ # Get or create user
66
+ user = db.query(User).filter(
67
+ (User.email == email.lower()) | (User.firebase_uid == user_info['uid'])
68
+ ).first()
69
+
70
+ if not user:
71
+ user = User(
72
+ email=email.lower(),
73
+ name=user_info.get('name'),
74
+ picture=user_info.get('picture'),
75
+ firebase_uid=user_info['uid'],
76
+ auth_method='firebase',
77
+ email_verified=True
78
+ )
79
+ db.add(user)
80
+ db.commit()
81
+ db.refresh(user)
82
+ print(f"[INFO] New user created via Firebase: {email}")
83
+
84
+ # Enrich contact data from Apollo.io and update Brevo + Monday.com
85
+ try:
86
+ from .apollo_service import enrich_contact_by_email
87
+ from .brevo_service import create_brevo_contact, BREVO_TRIAL_LIST_ID
88
+ from .monday_service import create_monday_lead
89
+
90
+ # Enrich contact data from Apollo.io
91
+ enriched_data = await enrich_contact_by_email(email)
92
+
93
+ # Use enriched data if available, otherwise use basic data
94
+ first_name = enriched_data.get("first_name") if enriched_data else None
95
+ last_name = enriched_data.get("last_name") if enriched_data else None
96
+ org_name = enriched_data.get("organization_name") if enriched_data else None
97
+
98
+ # Fallback to Firebase data if Apollo didn't provide it
99
+ if not first_name or not last_name:
100
+ full_name = user_info.get('name', '')
101
+ if full_name:
102
+ name_parts = full_name.strip().split(' ', 1)
103
+ first_name = first_name or (name_parts[0] if name_parts else None)
104
+ last_name = last_name or (name_parts[1] if len(name_parts) > 1 else None)
105
+
106
+ if not org_name:
107
+ org_domain = email.split('@')[1] if '@' in email else None
108
+ org_name = org_domain.split('.')[0].capitalize() if org_domain else None
109
+
110
+ # Update Brevo contact with enriched data
111
+ await create_brevo_contact(
112
+ email=email,
113
+ first_name=first_name,
114
+ last_name=last_name,
115
+ organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
116
+ phone_number=enriched_data.get("phone_number") if enriched_data else None,
117
+ linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
118
+ title=enriched_data.get("title") if enriched_data else None,
119
+ headline=enriched_data.get("headline") if enriched_data else None,
120
+ organization_website=enriched_data.get("organization_website") if enriched_data else None,
121
+ organization_address=enriched_data.get("organization_address") if enriched_data else None,
122
+ list_id=BREVO_TRIAL_LIST_ID
123
+ )
124
+
125
+ # Create lead in Monday.com
126
+ await create_monday_lead(
127
+ email=email,
128
+ first_name=first_name,
129
+ last_name=last_name,
130
+ phone_number=enriched_data.get("phone_number") if enriched_data else None,
131
+ linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
132
+ title=enriched_data.get("title") if enriched_data else None,
133
+ headline=enriched_data.get("headline") if enriched_data else None,
134
+ organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
135
+ organization_website=enriched_data.get("organization_website") if enriched_data else None,
136
+ organization_address=enriched_data.get("organization_address") if enriched_data else None,
137
+ )
138
+ except Exception as e:
139
+ # Don't fail user creation if integrations fail
140
+ print(f"[WARNING] Failed to enrich/update contact for {email}: {str(e)}")
141
+ else:
142
+ # Update user info
143
+ user.firebase_uid = user_info['uid']
144
+ user.email_verified = True
145
+ user.name = user_info.get('name', user.name)
146
+ user.picture = user_info.get('picture', user.picture)
147
+ if user.auth_method != 'firebase':
148
+ user.auth_method = 'firebase'
149
+ db.commit()
150
+ print(f"[INFO] User logged in via Firebase: {email}")
151
+
152
+ # Generate JWT token
153
+ token = create_access_token(data={"sub": user.id})
154
+
155
+ return {
156
+ "token": token,
157
+ "user": {
158
+ "id": user.id,
159
+ "email": user.email,
160
+ "name": user.name,
161
+ "picture": user.picture,
162
+ "auth_method": user.auth_method
163
+ }
164
+ }
165
+ except HTTPException:
166
+ raise
167
+ except Exception as e:
168
+ print(f"[ERROR] Firebase login failed: {str(e)}")
169
+ raise HTTPException(status_code=400, detail=f"Authentication failed: {str(e)}")
170
+
171
+
172
+ @router.post("/api/auth/otp/request")
173
+ async def request_otp_endpoint(
174
+ request: OTPRequestRequest,
175
+ db: Session = Depends(get_db)
176
+ ):
177
+ """
178
+ Request OTP for email login.
179
+ Validates business email before sending OTP.
180
+ """
181
+ try:
182
+ # Validate business email
183
+ validate_business_email(request.email)
184
+
185
+ # Request OTP
186
+ result = await request_otp(request.email, db)
187
+ return result
188
+ except HTTPException:
189
+ raise
190
+ except Exception as e:
191
+ print(f"[ERROR] OTP request failed: {str(e)}")
192
+ raise HTTPException(status_code=500, detail=f"Failed to send OTP: {str(e)}")
193
+
194
+
195
+ @router.post("/api/auth/otp/verify")
196
+ async def verify_otp_endpoint(
197
+ request: OTPVerifyRequest,
198
+ db: Session = Depends(get_db)
199
+ ):
200
+ """
201
+ Verify OTP and login.
202
+ Validates business email and OTP code.
203
+ """
204
+ try:
205
+ # Validate business email
206
+ validate_business_email(request.email)
207
+
208
+ # Verify OTP
209
+ user = await verify_otp(request.email, request.otp, db)
210
+
211
+ # Generate JWT token
212
+ token = create_access_token(data={"sub": user.id})
213
+
214
+ return {
215
+ "token": token,
216
+ "user": {
217
+ "id": user.id,
218
+ "email": user.email,
219
+ "name": user.name,
220
+ "picture": user.picture,
221
+ "auth_method": user.auth_method
222
+ }
223
+ }
224
+ except HTTPException:
225
+ raise
226
+ except Exception as e:
227
+ print(f"[ERROR] OTP verification failed: {str(e)}")
228
+ raise HTTPException(status_code=400, detail=f"OTP verification failed: {str(e)}")
229
+
230
+
231
+ @router.get("/api/auth/me")
232
+ async def get_current_user_info(current_user: User = Depends(get_current_user)):
233
+ """Get current user information."""
234
+ return {
235
+ "id": current_user.id,
236
+ "email": current_user.email,
237
+ "name": current_user.name,
238
+ "picture": current_user.picture,
239
+ "auth_method": current_user.auth_method,
240
+ }
241
+
242
+
243
+ @router.post("/api/auth/api-key/create")
244
+ async def create_api_key(
245
+ request: CreateAPIKeyRequest,
246
+ current_user: User = Depends(get_current_user),
247
+ db: Session = Depends(get_db)
248
+ ):
249
+ """
250
+ Create a new API key for the current user.
251
+ Returns the API key (only shown once - store it securely!).
252
+ """
253
+ if not request.name or not request.name.strip():
254
+ raise HTTPException(status_code=400, detail="API key name is required")
255
+
256
+ # Generate new API key
257
+ api_key = generate_api_key()
258
+ key_hash = hash_api_key(api_key)
259
+ key_prefix = get_api_key_prefix(api_key)
260
+
261
+ # Create API key record
262
+ api_key_record = APIKey(
263
+ user_id=current_user.id,
264
+ name=request.name.strip(),
265
+ key_hash=key_hash,
266
+ key_prefix=key_prefix,
267
+ is_active=True
268
+ )
269
+ db.add(api_key_record)
270
+ db.commit()
271
+ db.refresh(api_key_record)
272
+
273
+ print(f"[INFO] API key created for user {current_user.email}: {key_prefix}")
274
+
275
+ return {
276
+ "success": True,
277
+ "api_key": api_key, # Only returned once - user must save this!
278
+ "key_id": api_key_record.id,
279
+ "key_prefix": key_prefix,
280
+ "name": api_key_record.name,
281
+ "created_at": api_key_record.created_at.isoformat() if api_key_record.created_at else None,
282
+ "message": "API key created successfully. Store this key securely - it will not be shown again!"
283
+ }
284
+
285
+
286
+ @router.get("/api/auth/api-keys")
287
+ async def list_api_keys(
288
+ current_user: User = Depends(get_current_user),
289
+ db: Session = Depends(get_db)
290
+ ):
291
+ """
292
+ List all API keys for the current user.
293
+ Only shows key prefix, not the full key for security.
294
+ """
295
+ api_keys = (
296
+ db.query(APIKey)
297
+ .filter(APIKey.user_id == current_user.id)
298
+ .order_by(APIKey.created_at.desc())
299
+ .all()
300
+ )
301
+
302
+ return {
303
+ "success": True,
304
+ "api_keys": [
305
+ {
306
+ "id": key.id,
307
+ "name": key.name,
308
+ "key_prefix": key.key_prefix,
309
+ "is_active": key.is_active,
310
+ "last_used_at": key.last_used_at.isoformat() if key.last_used_at else None,
311
+ "created_at": key.created_at.isoformat() if key.created_at else None,
312
+ }
313
+ for key in api_keys
314
+ ]
315
+ }
316
+
317
+
318
+ @router.delete("/api/auth/api-key/{key_id}")
319
+ async def delete_api_key(
320
+ key_id: int,
321
+ current_user: User = Depends(get_current_user),
322
+ db: Session = Depends(get_db)
323
+ ):
324
+ """
325
+ Delete (deactivate) an API key.
326
+ """
327
+ api_key = (
328
+ db.query(APIKey)
329
+ .filter(APIKey.id == key_id)
330
+ .filter(APIKey.user_id == current_user.id)
331
+ .first()
332
+ )
333
+
334
+ if not api_key:
335
+ raise HTTPException(status_code=404, detail="API key not found")
336
+
337
+ # Soft delete by deactivating
338
+ api_key.is_active = False
339
+ db.commit()
340
+
341
+ print(f"[INFO] API key {api_key.key_prefix} deactivated for user {current_user.email}")
342
+
343
+ return {
344
+ "success": True,
345
+ "message": "API key deactivated successfully"
346
+ }
347
+
backend/app/brevo_service.py ADDED
@@ -0,0 +1,486 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Brevo (formerly Sendinblue) email service for sending transactional emails.
3
+ Reference: https://developers.brevo.com/reference/sendtransacemail
4
+ """
5
+ import os
6
+ import httpx
7
+ from typing import Optional, Dict, Any
8
+ from difflib import SequenceMatcher
9
+
10
+ BREVO_API_KEY = os.environ.get("BREVO_API_KEY", "")
11
+ BREVO_API_URL = "https://api.brevo.com/v3/smtp/email"
12
+ BREVO_SENDER_EMAIL = os.environ.get("BREVO_SENDER_EMAIL", "noreply@yourdomain.com")
13
+ BREVO_SENDER_NAME = os.environ.get("BREVO_SENDER_NAME", "EZOFIS AI")
14
+ BREVO_TRIAL_LIST_ID = int(os.environ.get("BREVO_TRIAL_LIST_ID", "5")) # Default to 5 for "VRP Trials"
15
+
16
+ # Brevo standard attribute names mapping
17
+ BREVO_ATTRIBUTE_MAP = {
18
+ "first_name": "FIRSTNAME",
19
+ "last_name": "LASTNAME",
20
+ "organization_name": "COMPANY",
21
+ "phone_number": "SMS",
22
+ "linkedin_url": "LINKEDIN",
23
+ "title": "JOB_TITLE",
24
+ "headline": "HEADLINE",
25
+ "organization_website": "WEBSITE",
26
+ "organization_address": "ADDRESS",
27
+ # Common variations
28
+ "firstname": "FIRSTNAME",
29
+ "fname": "FIRSTNAME",
30
+ "given_name": "FIRSTNAME",
31
+ "lastname": "LASTNAME",
32
+ "lname": "LASTNAME",
33
+ "surname": "LASTNAME",
34
+ "family_name": "LASTNAME",
35
+ "company": "COMPANY",
36
+ "org": "COMPANY",
37
+ "organization": "COMPANY",
38
+ "phone": "SMS",
39
+ "mobile": "SMS",
40
+ "telephone": "SMS",
41
+ "linkedin": "LINKEDIN",
42
+ "linkedin_profile": "LINKEDIN",
43
+ "job_title": "JOB_TITLE",
44
+ "position": "JOB_TITLE",
45
+ "role": "JOB_TITLE",
46
+ "website": "WEBSITE",
47
+ "url": "WEBSITE",
48
+ "web": "WEBSITE",
49
+ "address": "ADDRESS",
50
+ "location": "ADDRESS",
51
+ }
52
+
53
+
54
+ def _get_brevo_attribute_name(field_name: str) -> Optional[str]:
55
+ """
56
+ Get Brevo attribute name for a given field name using semantic matching.
57
+
58
+ Args:
59
+ field_name: Field name (e.g., "first_name", "email")
60
+
61
+ Returns:
62
+ Brevo attribute name (e.g., "FIRSTNAME") or None if not found
63
+ """
64
+ # Normalize field name
65
+ normalized = field_name.lower().replace("_", "").replace("-", "")
66
+
67
+ # Direct lookup first
68
+ if field_name.lower() in BREVO_ATTRIBUTE_MAP:
69
+ return BREVO_ATTRIBUTE_MAP[field_name.lower()]
70
+
71
+ if normalized in BREVO_ATTRIBUTE_MAP:
72
+ return BREVO_ATTRIBUTE_MAP[normalized]
73
+
74
+ # Semantic matching using similarity
75
+ best_match = None
76
+ best_score = 0.0
77
+
78
+ for key, value in BREVO_ATTRIBUTE_MAP.items():
79
+ score = SequenceMatcher(None, normalized, key.lower()).ratio()
80
+ if score > best_score:
81
+ best_score = score
82
+ best_match = value
83
+
84
+ # Only return if similarity is high enough
85
+ if best_score >= 0.6:
86
+ return best_match
87
+
88
+ return None
89
+
90
+
91
+ async def send_otp_email(email: str, otp: str) -> bool:
92
+ """
93
+ Send OTP email using Brevo transactional email API.
94
+
95
+ Args:
96
+ email: Recipient email address
97
+ otp: One-time password code
98
+
99
+ Returns:
100
+ True if email sent successfully
101
+
102
+ Raises:
103
+ ValueError: If BREVO_API_KEY is not set
104
+ Exception: If email sending fails
105
+ """
106
+ if not BREVO_API_KEY:
107
+ raise ValueError("BREVO_API_KEY environment variable is not set")
108
+
109
+ # Brevo API payload structure
110
+ payload = {
111
+ "sender": {
112
+ "name": BREVO_SENDER_NAME,
113
+ "email": BREVO_SENDER_EMAIL
114
+ },
115
+ "to": [
116
+ {
117
+ "email": email
118
+ }
119
+ ],
120
+ "subject": "Your OTP Code for EZOFIS AI",
121
+ "htmlContent": f"""
122
+ <!DOCTYPE html>
123
+ <html>
124
+ <head>
125
+ <meta charset="utf-8">
126
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
127
+ <style>
128
+ body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; line-height: 1.6; color: #333; margin: 0; padding: 0; background-color: #f4f4f4; }}
129
+ .container {{ max-width: 600px; margin: 20px auto; background: white; border-radius: 10px; overflow: hidden; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }}
130
+ .content {{ padding: 40px 30px; }}
131
+ .content p {{ margin: 0 0 15px 0; color: #555; }}
132
+ .otp-box {{ background: #f8f9fa; border: 2px dashed #667eea; padding: 30px; text-align: center; margin: 30px 0; border-radius: 8px; }}
133
+ .otp-label {{ font-size: 14px; color: #666; margin-bottom: 10px; }}
134
+ .otp-code {{ font-size: 36px; font-weight: bold; color: #667eea; letter-spacing: 8px; font-family: 'Courier New', monospace; }}
135
+ .expiry {{ color: #888; font-size: 14px; margin-top: 20px; }}
136
+ .footer {{ text-align: center; margin-top: 30px; padding-top: 20px; border-top: 1px solid #eee; color: #999; font-size: 12px; }}
137
+ .warning {{ background: #fff3cd; border-left: 4px solid #ffc107; padding: 15px; margin: 20px 0; border-radius: 4px; font-size: 14px; color: #856404; }}
138
+ </style>
139
+ </head>
140
+ <body>
141
+ <div class="container">
142
+ <div class="content">
143
+ <p>Hello,</p>
144
+ <p>You requested a one-time password (OTP) to sign in to your EZOFIS account.</p>
145
+ <div class="otp-box">
146
+ <div class="otp-label">Your OTP code is:</div>
147
+ <div class="otp-code">{otp}</div>
148
+ </div>
149
+ <p class="expiry">This code will expire in <strong>10 minutes</strong>.</p>
150
+ <div class="warning">
151
+ <strong>⚠️ Security Notice:</strong> If you didn't request this code, please ignore this email. Do not share this code with anyone.
152
+ </div>
153
+ <div class="footer">
154
+ <p>© EZOFIS - Agentic Intelligence Platform</p>
155
+ <p>This is an automated message, please do not reply.</p>
156
+ </div>
157
+ </div>
158
+ </div>
159
+ </body>
160
+ </html>
161
+ """,
162
+ "textContent": f"""
163
+ Your OTP Code for EZOFIS AI
164
+
165
+ Hello,
166
+
167
+ You requested a one-time password (OTP) to sign in to your EZOFIS account.
168
+
169
+ Your OTP code is: {otp}
170
+
171
+ This code will expire in 10 minutes.
172
+
173
+ ⚠️ Security Notice: If you didn't request this code, please ignore this email. Do not share this code with anyone.
174
+
175
+ © EZOFIS - Agentic Intelligence Platform
176
+ This is an automated message, please do not reply.
177
+ """
178
+ }
179
+
180
+ headers = {
181
+ "accept": "application/json",
182
+ "api-key": BREVO_API_KEY,
183
+ "content-type": "application/json"
184
+ }
185
+
186
+ try:
187
+ async with httpx.AsyncClient(timeout=30.0) as client:
188
+ response = await client.post(BREVO_API_URL, json=payload, headers=headers)
189
+ response.raise_for_status()
190
+
191
+ result = response.json()
192
+ message_id = result.get('messageId', 'N/A')
193
+ print(f"[INFO] Brevo email sent successfully to {email}. Message ID: {message_id}")
194
+ return True
195
+ except httpx.HTTPStatusError as e:
196
+ error_detail = {}
197
+ try:
198
+ error_detail = e.response.json() if e.response else {}
199
+ except:
200
+ error_detail = {"message": str(e)}
201
+
202
+ error_msg = error_detail.get('message', f'HTTP {e.response.status_code}' if e.response else 'Unknown error')
203
+ print(f"[ERROR] Brevo API error: {e.response.status_code if e.response else 'N/A'} - {error_msg}")
204
+ raise Exception(f"Failed to send email via Brevo: {error_msg}")
205
+ except httpx.TimeoutException:
206
+ print(f"[ERROR] Brevo API request timed out")
207
+ raise Exception("Email service timeout. Please try again.")
208
+ except Exception as e:
209
+ print(f"[ERROR] Brevo email sending failed: {str(e)}")
210
+ raise Exception(f"Failed to send email: {str(e)}")
211
+
212
+
213
+ async def send_share_email(recipient_email: str, sender_email: str, share_link: str, sender_name: str = None) -> bool:
214
+ """
215
+ Send share email using Brevo transactional email API.
216
+
217
+ Args:
218
+ recipient_email: Recipient email address
219
+ sender_email: Sender email address
220
+ share_link: Share link URL
221
+ sender_name: Sender's display name (optional, falls back to email if not provided)
222
+
223
+ Returns:
224
+ True if email sent successfully
225
+
226
+ Raises:
227
+ ValueError: If BREVO_API_KEY is not set
228
+ Exception: If email sending fails
229
+ """
230
+ if not BREVO_API_KEY:
231
+ raise ValueError("BREVO_API_KEY environment variable is not set")
232
+
233
+ # Get base URL from environment or use default
234
+ base_url = os.environ.get("VITE_API_BASE_URL", "https://seth0330-ezofisocr.hf.space")
235
+
236
+ # Determine sender display name: use sender_name if available, otherwise extract from email
237
+ # This is the logged-in user's name, NOT the email sender name (BREVO_SENDER_NAME)
238
+ # BREVO_SENDER_NAME is only used for the "From" field, not the email body
239
+ if sender_name and sender_name.strip():
240
+ # Use the actual logged-in user's name
241
+ sender_display = sender_name.strip()
242
+ print(f"[INFO] Using user's name from database: {sender_display}")
243
+ else:
244
+ # Extract name from email (part before @) and format it nicely
245
+ email_name = sender_email.split('@')[0]
246
+ # Handle cases like "seth.smith" -> "Seth Smith" or "seth_smith" -> "Seth Smith"
247
+ if '.' in email_name:
248
+ parts = email_name.split('.')
249
+ sender_display = ' '.join(part.capitalize() for part in parts)
250
+ elif '_' in email_name:
251
+ parts = email_name.split('_')
252
+ sender_display = ' '.join(part.capitalize() for part in parts)
253
+ else:
254
+ # Simple case: "seth" -> "Seth"
255
+ sender_display = email_name.capitalize()
256
+ print(f"[INFO] Extracted name from email: {sender_display} (from {sender_email})")
257
+
258
+ # Brevo API payload structure
259
+ # Note: BREVO_SENDER_NAME is used only for the "From" field in the email header
260
+ # The email body uses sender_display (the logged-in user's name)
261
+ payload = {
262
+ "sender": {
263
+ "name": BREVO_SENDER_NAME,
264
+ "email": BREVO_SENDER_EMAIL
265
+ },
266
+ "to": [
267
+ {
268
+ "email": recipient_email
269
+ }
270
+ ],
271
+ "subject": f"{sender_display} shared a document extraction with you",
272
+ "htmlContent": f"""
273
+ <!DOCTYPE html>
274
+ <html>
275
+ <head>
276
+ <meta charset="utf-8">
277
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
278
+ <style>
279
+ body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; line-height: 1.6; color: #333; margin: 0; padding: 0; background-color: #f4f4f4; }}
280
+ .container {{ max-width: 600px; margin: 20px auto; background: white; border-radius: 10px; overflow: hidden; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }}
281
+ .content {{ padding: 40px 30px; }}
282
+ .content p {{ margin: 0 0 15px 0; color: #555; }}
283
+ .share-box {{ background: #f8f9fa; border: 2px solid #667eea; padding: 30px; text-align: center; margin: 30px 0; border-radius: 8px; }}
284
+ .share-button {{ display: inline-block; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: #ffffff !important; padding: 15px 30px; text-decoration: none; border-radius: 8px; font-weight: 600; margin-top: 20px; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; }}
285
+ .share-button:hover {{ color: #ffffff !important; }}
286
+ .footer {{ text-align: center; margin-top: 30px; padding-top: 20px; border-top: 1px solid #eee; color: #999; font-size: 12px; }}
287
+ </style>
288
+ </head>
289
+ <body>
290
+ <div class="container">
291
+ <div class="content">
292
+ <p>Hello,</p>
293
+ <p><strong>{sender_display}</strong> wants you to take a look at a document extraction output.</p>
294
+ <div class="share-box">
295
+ <p style="margin-bottom: 20px; color: #666;">Click the button below to view the shared extraction:</p>
296
+ <a href="{share_link}" class="share-button">View Shared Extraction</a>
297
+ </div>
298
+ <p style="color: #888; font-size: 14px;">You'll need to sign in to your EZOFIS account to view this extraction. If you don't have an account, you can create one using the link above.</p>
299
+ <div class="footer">
300
+ <p>© EZOFIS - Agentic Intelligence Platform</p>
301
+ <p>This is an automated message, please do not reply.</p>
302
+ </div>
303
+ </div>
304
+ </div>
305
+ </body>
306
+ </html>
307
+ """,
308
+ "textContent": f"""
309
+ {sender_display} shared a document extraction with you
310
+
311
+ Hello,
312
+
313
+ {sender_display} wants you to take a look at a document extraction output.
314
+
315
+ View the shared extraction: {share_link}
316
+
317
+ You'll need to sign in to your EZOFIS account to view this extraction. If you don't have an account, you can create one using the link above.
318
+
319
+ © EZOFIS - Agentic Intelligence Platform
320
+ This is an automated message, please do not reply.
321
+ """
322
+ }
323
+
324
+ headers = {
325
+ "accept": "application/json",
326
+ "api-key": BREVO_API_KEY,
327
+ "content-type": "application/json"
328
+ }
329
+
330
+ try:
331
+ async with httpx.AsyncClient(timeout=30.0) as client:
332
+ response = await client.post(BREVO_API_URL, json=payload, headers=headers)
333
+ response.raise_for_status()
334
+
335
+ result = response.json()
336
+ message_id = result.get('messageId', 'N/A')
337
+ print(f"[INFO] Brevo share email sent successfully to {recipient_email}. Message ID: {message_id}")
338
+ return True
339
+ except httpx.HTTPStatusError as e:
340
+ error_detail = {}
341
+ try:
342
+ error_detail = e.response.json() if e.response else {}
343
+ except:
344
+ error_detail = {"message": str(e)}
345
+
346
+ error_msg = error_detail.get('message', f'HTTP {e.response.status_code}' if e.response else 'Unknown error')
347
+ print(f"[ERROR] Brevo API error: {e.response.status_code if e.response else 'N/A'} - {error_msg}")
348
+ raise Exception(f"Failed to send email via Brevo: {error_msg}")
349
+ except httpx.TimeoutException:
350
+ print(f"[ERROR] Brevo API request timed out")
351
+ raise Exception("Email service timeout. Please try again.")
352
+ except Exception as e:
353
+ print(f"[ERROR] Brevo email sending failed: {str(e)}")
354
+ raise Exception(f"Failed to send email: {str(e)}")
355
+
356
+
357
+ async def create_brevo_contact(
358
+ email: str,
359
+ first_name: Optional[str] = None,
360
+ last_name: Optional[str] = None,
361
+ organization_name: Optional[str] = None,
362
+ phone_number: Optional[str] = None,
363
+ linkedin_url: Optional[str] = None,
364
+ title: Optional[str] = None,
365
+ headline: Optional[str] = None,
366
+ organization_website: Optional[str] = None,
367
+ organization_address: Optional[str] = None,
368
+ list_id: Optional[int] = None
369
+ ) -> bool:
370
+ """
371
+ Create a contact in Brevo and optionally add to a list.
372
+
373
+ Args:
374
+ email: Contact email address (required)
375
+ first_name: Contact first name
376
+ last_name: Contact last name
377
+ organization_name: Organization name
378
+ phone_number: Phone number
379
+ linkedin_url: LinkedIn profile URL
380
+ title: Job title
381
+ headline: Professional headline
382
+ organization_website: Company website
383
+ organization_address: Company address
384
+ list_id: ID of the list to add contact to (e.g., 5 for "VRP Trials")
385
+
386
+ Returns:
387
+ True if contact created successfully, False otherwise
388
+ """
389
+ if not BREVO_API_KEY:
390
+ print("[WARNING] BREVO_API_KEY not set, skipping Brevo contact creation")
391
+ return False
392
+
393
+ # Prepare contact attributes using automatic field mapping
394
+ attributes = {}
395
+
396
+ # Map all fields automatically
397
+ field_mappings = {
398
+ "first_name": first_name,
399
+ "last_name": last_name,
400
+ "organization_name": organization_name,
401
+ "phone_number": phone_number,
402
+ "linkedin_url": linkedin_url,
403
+ "title": title,
404
+ "headline": headline,
405
+ "organization_website": organization_website,
406
+ "organization_address": organization_address,
407
+ }
408
+
409
+ for field_name, field_value in field_mappings.items():
410
+ if field_value:
411
+ brevo_attr = _get_brevo_attribute_name(field_name)
412
+ if brevo_attr:
413
+ attributes[brevo_attr] = str(field_value).strip() # Ensure it's a string and trimmed
414
+ print(f"[DEBUG] Mapped '{field_name}' ({field_value}) to Brevo attribute '{brevo_attr}'")
415
+ else:
416
+ print(f"[DEBUG] No Brevo attribute mapping found for '{field_name}'")
417
+ else:
418
+ print(f"[DEBUG] Skipping '{field_name}' - value is empty/None")
419
+
420
+ print(f"[DEBUG] Final Brevo attributes to send: {attributes}")
421
+
422
+ # Prepare contact data
423
+ contact_data = {
424
+ "email": email.lower(),
425
+ "updateEnabled": True # Update existing contact if email already exists
426
+ }
427
+
428
+ if attributes:
429
+ contact_data["attributes"] = attributes
430
+
431
+ # Add to list if list_id is provided
432
+ if list_id:
433
+ contact_data["listIds"] = [list_id]
434
+
435
+ headers = {
436
+ "accept": "application/json",
437
+ "api-key": BREVO_API_KEY,
438
+ "content-type": "application/json"
439
+ }
440
+
441
+ try:
442
+ async with httpx.AsyncClient(timeout=30.0) as client:
443
+ response = await client.post(
444
+ "https://api.brevo.com/v3/contacts",
445
+ json=contact_data,
446
+ headers=headers
447
+ )
448
+
449
+ if response.status_code in [200, 201, 204]:
450
+ print(f"[INFO] Successfully created Brevo contact: {email}" +
451
+ (f" and added to list {list_id}" if list_id else ""))
452
+ return True
453
+ elif response.status_code == 400:
454
+ # Contact might already exist, try to update it
455
+ try:
456
+ error_data = response.json()
457
+ if "already exists" in str(error_data).lower():
458
+ print(f"[INFO] Contact {email} already exists in Brevo, updating...")
459
+ # Use PUT to update existing contact
460
+ update_response = await client.put(
461
+ f"https://api.brevo.com/v3/contacts/{email.lower()}",
462
+ json=contact_data,
463
+ headers=headers
464
+ )
465
+ if update_response.status_code in [200, 204]:
466
+ print(f"[INFO] Successfully updated Brevo contact: {email}" +
467
+ (f" and added to list {list_id}" if list_id else ""))
468
+ return True
469
+ except:
470
+ pass
471
+
472
+ error_data = response.text
473
+ print(f"[ERROR] Failed to create Brevo contact: {response.status_code} - {error_data}")
474
+ return False
475
+ else:
476
+ error_data = response.text
477
+ print(f"[ERROR] Failed to create Brevo contact: {response.status_code} - {error_data}")
478
+ return False
479
+
480
+ except httpx.HTTPStatusError as e:
481
+ print(f"[ERROR] Brevo API HTTP error: {e.response.status_code} - {e.response.text}")
482
+ return False
483
+ except Exception as e:
484
+ print(f"[ERROR] Failed to create Brevo contact: {str(e)}")
485
+ return False
486
+
backend/app/db.py CHANGED
@@ -1,18 +1,18 @@
1
- import os
2
- from sqlalchemy import create_engine
3
- from sqlalchemy.orm import sessionmaker, declarative_base
4
-
5
- # SQLite DB path. In Docker / HF we’ll set DB_PATH env, default is local "data/app.db"
6
- DB_PATH = os.environ.get("DB_PATH", "data/app.db")
7
-
8
- # Create SQLAlchemy engine
9
- engine = create_engine(
10
- f"sqlite:///{DB_PATH}",
11
- connect_args={"check_same_thread": False},
12
- )
13
-
14
- # Session factory
15
- SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
16
-
17
- # Base model class
18
- Base = declarative_base()
 
1
+ import os
2
+ from sqlalchemy import create_engine
3
+ from sqlalchemy.orm import sessionmaker, declarative_base
4
+
5
+ # SQLite DB path. In Docker / HF we’ll set DB_PATH env, default is local "data/app.db"
6
+ DB_PATH = os.environ.get("DB_PATH", "data/app.db")
7
+
8
+ # Create SQLAlchemy engine
9
+ engine = create_engine(
10
+ f"sqlite:///{DB_PATH}",
11
+ connect_args={"check_same_thread": False},
12
+ )
13
+
14
+ # Session factory
15
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
16
+
17
+ # Base model class
18
+ Base = declarative_base()
backend/app/email_validator.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Email validation utilities to ensure only business emails are allowed.
3
+ """
4
+ from fastapi import HTTPException
5
+
6
+ # List of personal email domains to block
7
+ PERSONAL_EMAIL_DOMAINS = {
8
+ 'gmail.com', 'yahoo.com', 'hotmail.com', 'outlook.com',
9
+ 'aol.com', 'icloud.com', 'mail.com', 'protonmail.com',
10
+ 'yandex.com', 'zoho.com', 'gmx.com', 'live.com', 'msn.com',
11
+ 'me.com', 'mac.com', 'yahoo.co.uk', 'yahoo.co.jp', 'yahoo.fr',
12
+ 'yahoo.de', 'yahoo.it', 'yahoo.es', 'yahoo.in', 'yahoo.com.au',
13
+ 'gmail.co.uk', 'gmail.fr', 'gmail.de', 'gmail.it', 'gmail.es',
14
+ 'gmail.in', 'gmail.com.au', 'hotmail.co.uk', 'hotmail.fr',
15
+ 'hotmail.de', 'hotmail.it', 'hotmail.es', 'outlook.co.uk',
16
+ 'outlook.fr', 'outlook.de', 'outlook.it', 'outlook.es',
17
+ 'rediffmail.com', 'sina.com', 'qq.com', '163.com', '126.com',
18
+ 'mail.ru', 'inbox.com', 'fastmail.com', 'tutanota.com',
19
+ 'hey.com', 'pm.me'
20
+ }
21
+
22
+
23
+ def is_business_email(email: str) -> bool:
24
+ """
25
+ Check if email is a business email (not personal).
26
+
27
+ Args:
28
+ email: Email address to validate
29
+
30
+ Returns:
31
+ True if business email, False if personal email
32
+ """
33
+ if not email or '@' not in email:
34
+ return False
35
+
36
+ domain = email.split('@')[1].lower().strip()
37
+ return domain not in PERSONAL_EMAIL_DOMAINS
38
+
39
+
40
+ def validate_business_email(email: str) -> None:
41
+ """
42
+ Raise exception if email is not a business email.
43
+
44
+ Args:
45
+ email: Email address to validate
46
+
47
+ Raises:
48
+ HTTPException: If email is a personal email domain
49
+ """
50
+ if not email:
51
+ raise HTTPException(
52
+ status_code=400,
53
+ detail="Email address is required"
54
+ )
55
+
56
+ if not is_business_email(email):
57
+ raise HTTPException(
58
+ status_code=400,
59
+ detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
60
+ )
61
+
backend/app/firebase_auth.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Firebase Authentication utilities.
3
+ """
4
+ import os
5
+ import json
6
+ import firebase_admin
7
+ from firebase_admin import auth, credentials
8
+ from fastapi import HTTPException
9
+
10
+ # Initialize Firebase Admin SDK
11
+ _firebase_initialized = False
12
+
13
+ def initialize_firebase():
14
+ """Initialize Firebase Admin SDK."""
15
+ global _firebase_initialized
16
+
17
+ if _firebase_initialized:
18
+ return
19
+
20
+ if not firebase_admin._apps:
21
+ # Try to get service account from environment variable (JSON string)
22
+ service_account_json = os.environ.get("FIREBASE_SERVICE_ACCOUNT_JSON")
23
+
24
+ if service_account_json:
25
+ try:
26
+ service_account_info = json.loads(service_account_json)
27
+ cred = credentials.Certificate(service_account_info)
28
+ firebase_admin.initialize_app(cred)
29
+ _firebase_initialized = True
30
+ print("[INFO] Firebase Admin SDK initialized from environment variable")
31
+ return
32
+ except json.JSONDecodeError:
33
+ print("[WARNING] Failed to parse FIREBASE_SERVICE_ACCOUNT_JSON")
34
+
35
+ # Try to get service account from file path
36
+ service_account_path = os.environ.get("FIREBASE_SERVICE_ACCOUNT_KEY")
37
+ if service_account_path and os.path.exists(service_account_path):
38
+ cred = credentials.Certificate(service_account_path)
39
+ firebase_admin.initialize_app(cred)
40
+ _firebase_initialized = True
41
+ print(f"[INFO] Firebase Admin SDK initialized from file: {service_account_path}")
42
+ return
43
+
44
+ # Try to use default credentials (for Google Cloud environments)
45
+ try:
46
+ firebase_admin.initialize_app()
47
+ _firebase_initialized = True
48
+ print("[INFO] Firebase Admin SDK initialized with default credentials")
49
+ return
50
+ except Exception as e:
51
+ print(f"[WARNING] Firebase initialization failed: {e}")
52
+ raise HTTPException(
53
+ status_code=500,
54
+ detail="Firebase not configured. Please set FIREBASE_SERVICE_ACCOUNT_JSON or FIREBASE_SERVICE_ACCOUNT_KEY environment variable."
55
+ )
56
+
57
+
58
+ async def verify_firebase_token(id_token: str) -> dict:
59
+ """
60
+ Verify Firebase ID token and return user info.
61
+
62
+ Args:
63
+ id_token: Firebase ID token from client
64
+
65
+ Returns:
66
+ Dictionary with user information (uid, email, name, picture)
67
+
68
+ Raises:
69
+ HTTPException: If token is invalid
70
+ """
71
+ initialize_firebase()
72
+
73
+ try:
74
+ decoded_token = auth.verify_id_token(id_token)
75
+
76
+ return {
77
+ 'uid': decoded_token['uid'],
78
+ 'email': decoded_token.get('email'),
79
+ 'name': decoded_token.get('name'),
80
+ 'picture': decoded_token.get('picture'),
81
+ }
82
+ except ValueError as e:
83
+ raise HTTPException(
84
+ status_code=401,
85
+ detail=f"Invalid Firebase token: {str(e)}"
86
+ )
87
+ except Exception as e:
88
+ raise HTTPException(
89
+ status_code=401,
90
+ detail=f"Firebase authentication failed: {str(e)}"
91
+ )
92
+
backend/app/main.py CHANGED
@@ -1,292 +1,786 @@
1
- import os
2
- import time
3
- from typing import List, Dict
4
-
5
- from fastapi import FastAPI, UploadFile, File, Depends
6
- from fastapi.middleware.cors import CORSMiddleware
7
- from fastapi.staticfiles import StaticFiles
8
- from sqlalchemy.orm import Session
9
-
10
- from .db import Base, engine, SessionLocal
11
- from .models import ExtractionRecord
12
- from .schemas import ExtractionRecordBase, ExtractionStage
13
- from .openrouter_client import extract_fields_from_document
14
-
15
- # Ensure data dir exists for SQLite
16
- os.makedirs("data", exist_ok=True)
17
-
18
- # Create tables
19
- Base.metadata.create_all(bind=engine)
20
-
21
- app = FastAPI(title="Document Capture Demo – Backend")
22
-
23
- # CORS (for safety we allow all; you can tighten later)
24
- app.add_middleware(
25
- CORSMiddleware,
26
- allow_origins=["*"],
27
- allow_credentials=True,
28
- allow_methods=["*"],
29
- allow_headers=["*"],
30
- )
31
-
32
-
33
- def get_db():
34
- db = SessionLocal()
35
- try:
36
- yield db
37
- finally:
38
- db.close()
39
-
40
-
41
- @app.get("/ping")
42
- def ping():
43
- """Healthcheck."""
44
- return {"status": "ok", "message": "backend alive"}
45
-
46
-
47
- def make_stages(total_ms: int, status: str) -> Dict[str, ExtractionStage]:
48
- """
49
- Build synthetic stage timing data for the History UI.
50
- For now we just split total_ms into 4 stages.
51
- """
52
- if total_ms <= 0:
53
- total_ms = 1000
54
-
55
- return {
56
- "uploading": ExtractionStage(
57
- time=int(total_ms * 0.15),
58
- status="completed",
59
- variation="normal",
60
- ),
61
- "aiAnalysis": ExtractionStage(
62
- time=int(total_ms * 0.55),
63
- status="completed" if status == "completed" else "failed",
64
- variation="normal",
65
- ),
66
- "dataExtraction": ExtractionStage(
67
- time=int(total_ms * 0.2),
68
- status="completed" if status == "completed" else "skipped",
69
- variation="fast",
70
- ),
71
- "outputRendering": ExtractionStage(
72
- time=int(total_ms * 0.1),
73
- status="completed" if status == "completed" else "skipped",
74
- variation="normal",
75
- ),
76
- }
77
-
78
-
79
- @app.post("/api/extract")
80
- async def extract_document(
81
- file: UploadFile = File(...),
82
- db: Session = Depends(get_db),
83
- ):
84
- """
85
- Main extraction endpoint used by the Dashboard.
86
- 1) Read the uploaded file
87
- 2) Call OpenRouter + Qwen3-VL
88
- 3) Store a record in SQLite
89
- 4) Return extraction result + metadata
90
- """
91
- start = time.time()
92
- content = await file.read()
93
- content_type = file.content_type or "application/octet-stream"
94
- size_mb = len(content) / 1024 / 1024
95
- size_str = f"{size_mb:.2f} MB"
96
-
97
- try:
98
- print(f"[INFO] Starting extraction for file: {file.filename}, type: {content_type}, size: {size_str}")
99
- extracted = await extract_fields_from_document(content, content_type, file.filename)
100
- total_ms = int((time.time() - start) * 1000)
101
-
102
- print(f"[INFO] Extraction completed. Response keys: {list(extracted.keys())}")
103
- print(f"[INFO] Fields extracted: {extracted.get('fields', {})}")
104
-
105
- confidence = float(extracted.get("confidence", 90))
106
- fields = extracted.get("fields", {})
107
-
108
- # Get full_text for text output
109
- full_text = extracted.get("full_text", "")
110
- if full_text:
111
- full_text_words = len(str(full_text).split())
112
- print(f"[INFO] Full text extracted: {full_text_words} words")
113
-
114
- # Check if fields contain structured data (from table parsing)
115
- # If fields is a dict with page_X keys, it's already structured
116
- # If fields is empty or simple, add full_text and pages for text display
117
- if not fields or (isinstance(fields, dict) and not any(k.startswith("page_") for k in fields.keys())):
118
- if full_text:
119
- fields["full_text"] = full_text
120
-
121
- # Also check for pages array
122
- pages_data = extracted.get("pages", [])
123
- if pages_data and isinstance(pages_data, list):
124
- print(f"[INFO] Extracted text from {len(pages_data)} page(s)")
125
- fields["pages"] = pages_data
126
-
127
- # Count fields - if structured data exists, count table rows + metadata
128
- if isinstance(fields, dict):
129
- # Check if it's structured page data
130
- if any(k.startswith("page_") for k in fields.keys()):
131
- # Count structured fields (metadata keys + table rows)
132
- page_data = list(fields.values())[0] if len(fields) == 1 else fields
133
- if isinstance(page_data, dict):
134
- table_rows = page_data.get("table", [])
135
- metadata_keys = len(page_data.get("metadata", {}))
136
- fields_extracted = len(table_rows) + metadata_keys
137
- print(f"[INFO] Structured data: {len(table_rows)} table rows, {metadata_keys} metadata fields")
138
- else:
139
- fields_extracted = len(fields)
140
- else:
141
- # Regular fields count (excluding full_text and pages)
142
- fields_extracted = len([k for k in fields.keys() if k not in ["full_text", "pages"]])
143
- else:
144
- fields_extracted = 0
145
-
146
- print(f"[INFO] Final stats - confidence: {confidence}, fields_count: {fields_extracted}")
147
-
148
- status = "completed"
149
- error_message = None
150
- except Exception as e:
151
- import traceback
152
- total_ms = int((time.time() - start) * 1000)
153
- confidence = 0.0
154
- fields = {}
155
- fields_extracted = 0
156
- status = "failed"
157
- error_message = str(e)
158
- print(f"[ERROR] Extraction failed: {error_message}")
159
- print(f"[ERROR] Traceback: {traceback.format_exc()}")
160
-
161
- # Save record to DB
162
- rec = ExtractionRecord(
163
- file_name=file.filename,
164
- file_type=content_type,
165
- file_size=size_str,
166
- status=status,
167
- confidence=confidence,
168
- fields_extracted=fields_extracted,
169
- total_time_ms=total_ms,
170
- raw_output=str(fields),
171
- error_message=error_message,
172
- )
173
- db.add(rec)
174
- db.commit()
175
- db.refresh(rec)
176
-
177
- stages = make_stages(total_ms, status)
178
-
179
- # Response shape that frontend will consume
180
- return {
181
- "id": rec.id,
182
- "fileName": rec.file_name,
183
- "fileType": rec.file_type,
184
- "fileSize": rec.file_size,
185
- "status": status,
186
- "confidence": confidence,
187
- "fieldsExtracted": fields_extracted,
188
- "totalTime": total_ms,
189
- "fields": fields,
190
- "stages": {k: v.dict() for k, v in stages.items()},
191
- "errorMessage": error_message,
192
- }
193
-
194
-
195
- @app.get("/api/history", response_model=List[ExtractionRecordBase])
196
- def get_history(db: Session = Depends(get_db)):
197
- """
198
- Used by the History page.
199
- Returns last 100 records, with synthetic stage data.
200
- """
201
- recs = (
202
- db.query(ExtractionRecord)
203
- .order_by(ExtractionRecord.created_at.desc())
204
- .limit(100)
205
- .all()
206
- )
207
-
208
- output: List[ExtractionRecordBase] = []
209
- for r in recs:
210
- stages = make_stages(r.total_time_ms or 1000, r.status or "completed")
211
- output.append(
212
- ExtractionRecordBase(
213
- id=r.id,
214
- fileName=r.file_name,
215
- fileType=r.file_type or "",
216
- fileSize=r.file_size or "",
217
- extractedAt=r.created_at,
218
- status=r.status or "completed",
219
- confidence=r.confidence or 0.0,
220
- fieldsExtracted=r.fields_extracted or 0,
221
- totalTime=r.total_time_ms or 0,
222
- stages=stages,
223
- errorMessage=r.error_message,
224
- )
225
- )
226
- return output
227
-
228
-
229
- # Static frontend mounting (used after we build React)
230
- # Dockerfile copies the Vite build into backend/frontend_dist
231
- # IMPORTANT: API routes must be defined BEFORE this so they take precedence
232
- frontend_dir = os.path.join(
233
- os.path.dirname(os.path.dirname(__file__)), "frontend_dist"
234
- )
235
-
236
- if os.path.isdir(frontend_dir):
237
- # Serve static files (JS, CSS, images, etc.) from assets directory
238
- assets_dir = os.path.join(frontend_dir, "assets")
239
- if os.path.isdir(assets_dir):
240
- app.mount(
241
- "/assets",
242
- StaticFiles(directory=assets_dir),
243
- name="assets",
244
- )
245
-
246
- # Serve static files from root (logo.png, favicon.ico, etc.)
247
- # Files in public/ directory are copied to dist/ root during Vite build
248
- # These routes must be defined BEFORE the catch-all route
249
- @app.get("/logo.png")
250
- async def serve_logo():
251
- """Serve logo.png from frontend_dist root."""
252
- from fastapi.responses import FileResponse
253
- logo_path = os.path.join(frontend_dir, "logo.png")
254
- if os.path.exists(logo_path):
255
- return FileResponse(logo_path, media_type="image/png")
256
- from fastapi import HTTPException
257
- raise HTTPException(status_code=404)
258
-
259
- @app.get("/favicon.ico")
260
- async def serve_favicon():
261
- """Serve favicon.ico from frontend_dist root."""
262
- from fastapi.responses import FileResponse
263
- favicon_path = os.path.join(frontend_dir, "favicon.ico")
264
- if os.path.exists(favicon_path):
265
- return FileResponse(favicon_path, media_type="image/x-icon")
266
- from fastapi import HTTPException
267
- raise HTTPException(status_code=404)
268
-
269
- # Catch-all route to serve index.html for React Router
270
- # This must be last so API routes and static files are matched first
271
- @app.get("/{full_path:path}")
272
- async def serve_frontend(full_path: str):
273
- """
274
- Serve React app for all non-API routes.
275
- React Router will handle client-side routing.
276
- """
277
- # Skip API routes, docs, static assets, and known static files
278
- if (full_path.startswith("api/") or
279
- full_path.startswith("docs") or
280
- full_path.startswith("openapi.json") or
281
- full_path.startswith("assets/") or
282
- full_path in ["logo.png", "favicon.ico"]):
283
- from fastapi import HTTPException
284
- raise HTTPException(status_code=404)
285
-
286
- # Serve index.html for all other routes (React Router will handle routing)
287
- from fastapi.responses import FileResponse
288
- index_path = os.path.join(frontend_dir, "index.html")
289
- if os.path.exists(index_path):
290
- return FileResponse(index_path)
291
- from fastapi import HTTPException
292
- raise HTTPException(status_code=404)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ from typing import List, Dict, Optional
4
+
5
+ from fastapi import FastAPI, UploadFile, File, Depends, Form, HTTPException, Body
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from fastapi.staticfiles import StaticFiles
8
+ from sqlalchemy.orm import Session
9
+ from pydantic import BaseModel
10
+
11
+ from .db import Base, engine, SessionLocal
12
+ from .models import ExtractionRecord, User, ShareToken
13
+ from .schemas import ExtractionRecordBase, ExtractionStage
14
+ from .openrouter_client import extract_fields_from_document
15
+ from .auth import get_current_user, get_db, verify_token
16
+ from .auth_routes import router as auth_router
17
+ from .api_key_auth import get_user_from_api_key
18
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
19
+ from typing import Optional
20
+
21
+ # Allowed file types
22
+ ALLOWED_CONTENT_TYPES = [
23
+ "application/pdf",
24
+ "image/png",
25
+ "image/jpeg",
26
+ "image/jpg",
27
+ "image/tiff",
28
+ "image/tif"
29
+ ]
30
+
31
+ # Allowed file extensions (for fallback validation)
32
+ ALLOWED_EXTENSIONS = [".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"]
33
+
34
+ # Maximum file size: 4 MB
35
+ MAX_FILE_SIZE = 4 * 1024 * 1024 # 4 MB in bytes
36
+
37
+ # Ensure data dir exists for SQLite
38
+ os.makedirs("data", exist_ok=True)
39
+
40
+ # Create tables
41
+ Base.metadata.create_all(bind=engine)
42
+
43
+ app = FastAPI(title="Document Capture Demo – Backend")
44
+
45
+ # Include auth routes
46
+ app.include_router(auth_router)
47
+
48
+ # CORS (for safety we allow all; you can tighten later)
49
+ app.add_middleware(
50
+ CORSMiddleware,
51
+ allow_origins=["*"],
52
+ allow_credentials=True,
53
+ allow_methods=["*"],
54
+ allow_headers=["*"],
55
+ )
56
+
57
+
58
+ def get_db():
59
+ db = SessionLocal()
60
+ try:
61
+ yield db
62
+ finally:
63
+ db.close()
64
+
65
+
66
+ async def get_current_user_or_api_key_user(
67
+ api_key_user: Optional[User] = Depends(get_user_from_api_key),
68
+ credentials: Optional[HTTPAuthorizationCredentials] = Depends(HTTPBearer(auto_error=False)),
69
+ db: Session = Depends(get_db),
70
+ ) -> User:
71
+ """
72
+ Flexible authentication: supports both JWT Bearer token and API key.
73
+ Tries API key first, then falls back to JWT if no API key provided.
74
+ """
75
+ # If API key authentication succeeded, use that
76
+ if api_key_user:
77
+ return api_key_user
78
+
79
+ # Otherwise, try JWT authentication
80
+ if credentials:
81
+ try:
82
+ from .auth import verify_token
83
+ token = credentials.credentials
84
+ payload = verify_token(token)
85
+ user_id = int(payload.get("sub"))
86
+ user = db.query(User).filter(User.id == user_id).first()
87
+ if user:
88
+ return user
89
+ except Exception:
90
+ pass # Will raise HTTPException below
91
+
92
+ # If neither worked, raise authentication error
93
+ raise HTTPException(
94
+ status_code=401,
95
+ detail="Authentication required. Provide either a Bearer token or X-API-Key header.",
96
+ headers={"WWW-Authenticate": "Bearer"},
97
+ )
98
+
99
+
100
+ @app.get("/ping")
101
+ def ping():
102
+ """Healthcheck."""
103
+ return {"status": "ok", "message": "backend alive"}
104
+
105
+
106
+ def make_stages(total_ms: int, status: str) -> Dict[str, ExtractionStage]:
107
+ """
108
+ Build synthetic stage timing data for the History UI.
109
+ For now we just split total_ms into 4 stages.
110
+ """
111
+ if total_ms <= 0:
112
+ total_ms = 1000
113
+
114
+ return {
115
+ "uploading": ExtractionStage(
116
+ time=int(total_ms * 0.15),
117
+ status="completed",
118
+ variation="normal",
119
+ ),
120
+ "aiAnalysis": ExtractionStage(
121
+ time=int(total_ms * 0.55),
122
+ status="completed" if status == "completed" else "failed",
123
+ variation="normal",
124
+ ),
125
+ "dataExtraction": ExtractionStage(
126
+ time=int(total_ms * 0.2),
127
+ status="completed" if status == "completed" else "skipped",
128
+ variation="fast",
129
+ ),
130
+ "outputRendering": ExtractionStage(
131
+ time=int(total_ms * 0.1),
132
+ status="completed" if status == "completed" else "skipped",
133
+ variation="normal",
134
+ ),
135
+ }
136
+
137
+
138
+ @app.post("/api/extract")
139
+ async def extract_document(
140
+ file: UploadFile = File(...),
141
+ key_fields: Optional[str] = Form(None),
142
+ db: Session = Depends(get_db),
143
+ current_user: User = Depends(get_current_user_or_api_key_user),
144
+ ):
145
+ """
146
+ Main extraction endpoint for document parsing.
147
+ Supports both JWT Bearer token and API key authentication.
148
+
149
+ Authentication methods:
150
+ 1. JWT Bearer token: Header "Authorization: Bearer <token>"
151
+ 2. API Key: Header "X-API-Key: <api_key>"
152
+
153
+ Parameters:
154
+ - file: Document file (PDF, PNG, JPEG, TIFF) - max 4MB
155
+ - key_fields: Optional comma-separated list of specific fields to extract (e.g., "Invoice Number,Invoice Date")
156
+
157
+ Returns JSON with extracted fields, text, confidence, and metadata.
158
+ """
159
+ start = time.time()
160
+ content = await file.read()
161
+ content_type = file.content_type or "application/octet-stream"
162
+ file_size = len(content)
163
+ size_mb = file_size / 1024 / 1024
164
+ size_str = f"{size_mb:.2f} MB"
165
+
166
+ # Convert file content to base64 for storage
167
+ import base64
168
+ file_base64 = base64.b64encode(content).decode("utf-8")
169
+
170
+ # Validate file size
171
+ if file_size > MAX_FILE_SIZE:
172
+ raise HTTPException(
173
+ status_code=400,
174
+ detail=f"File size exceeds 4 MB limit. Your file is {size_mb:.2f} MB."
175
+ )
176
+
177
+ # Validate file type
178
+ file_extension = ""
179
+ if file.filename:
180
+ file_extension = "." + file.filename.split(".")[-1].lower()
181
+
182
+ is_valid_type = (
183
+ content_type in ALLOWED_CONTENT_TYPES or
184
+ file_extension in ALLOWED_EXTENSIONS
185
+ )
186
+
187
+ if not is_valid_type:
188
+ raise HTTPException(
189
+ status_code=400,
190
+ detail="Only PDF, PNG, JPG, and TIFF files are allowed."
191
+ )
192
+
193
+ try:
194
+ print(f"[INFO] Starting extraction for file: {file.filename}, type: {content_type}, size: {size_str}")
195
+ if key_fields:
196
+ print(f"[INFO] Key fields requested: {key_fields}")
197
+ extracted = await extract_fields_from_document(content, content_type, file.filename, key_fields)
198
+ total_ms = int((time.time() - start) * 1000)
199
+
200
+ print(f"[INFO] Extraction completed. Response keys: {list(extracted.keys())}")
201
+ print(f"[INFO] Fields extracted: {extracted.get('fields', {})}")
202
+
203
+ confidence = float(extracted.get("confidence", 90))
204
+ fields = extracted.get("fields", {})
205
+
206
+ # Get Fields from root level (if user provided key_fields)
207
+ root_fields = extracted.get("Fields", {})
208
+
209
+ # Get full_text for text output
210
+ full_text = extracted.get("full_text", "")
211
+ if full_text:
212
+ full_text_words = len(str(full_text).split())
213
+ print(f"[INFO] Full text extracted: {full_text_words} words")
214
+
215
+ # Check if fields contain structured data (from table parsing)
216
+ # If fields is a dict with page_X keys, it's already structured
217
+ # If fields is empty or simple, add full_text and pages for text display
218
+ if not fields or (isinstance(fields, dict) and not any(k.startswith("page_") for k in fields.keys())):
219
+ if full_text:
220
+ fields["full_text"] = full_text
221
+
222
+ # Also check for pages array
223
+ pages_data = extracted.get("pages", [])
224
+ if pages_data and isinstance(pages_data, list):
225
+ print(f"[INFO] Extracted text from {len(pages_data)} page(s)")
226
+ fields["pages"] = pages_data
227
+
228
+ # Add Fields at root level if it exists
229
+ if root_fields:
230
+ fields["Fields"] = root_fields
231
+
232
+ # Count fields - if structured data exists, count table rows + root Fields
233
+ if isinstance(fields, dict):
234
+ # Check if it's structured page data
235
+ if any(k.startswith("page_") for k in fields.keys()):
236
+ # Count table rows from all pages
237
+ table_rows_count = 0
238
+ for page_key, page_data in fields.items():
239
+ if page_key.startswith("page_") and isinstance(page_data, dict):
240
+ table_rows = page_data.get("table", [])
241
+ if isinstance(table_rows, list):
242
+ table_rows_count += len(table_rows)
243
+
244
+ # Count Fields from root level
245
+ fields_keys = 0
246
+ if isinstance(root_fields, dict):
247
+ fields_keys = len(root_fields)
248
+
249
+ fields_extracted = table_rows_count + fields_keys
250
+ print(f"[INFO] Structured data: {table_rows_count} table rows, {fields_keys} extracted fields")
251
+ else:
252
+ # Regular fields count (excluding full_text, pages, and Fields)
253
+ fields_extracted = len([k for k in fields.keys() if k not in ["full_text", "pages", "Fields"]])
254
+ # Add Fields count if it exists
255
+ if isinstance(root_fields, dict):
256
+ fields_extracted += len(root_fields)
257
+ else:
258
+ fields_extracted = 0
259
+
260
+ print(f"[INFO] Final stats - confidence: {confidence}, fields_count: {fields_extracted}")
261
+
262
+ status = "completed"
263
+ error_message = None
264
+ except Exception as e:
265
+ import traceback
266
+ total_ms = int((time.time() - start) * 1000)
267
+ confidence = 0.0
268
+ fields = {}
269
+ fields_extracted = 0
270
+ status = "failed"
271
+ error_message = str(e)
272
+ print(f"[ERROR] Extraction failed: {error_message}")
273
+ print(f"[ERROR] Traceback: {traceback.format_exc()}")
274
+
275
+ # Save record to DB
276
+ import json
277
+ import base64
278
+ rec = ExtractionRecord(
279
+ user_id=current_user.id,
280
+ file_name=file.filename,
281
+ file_type=content_type,
282
+ file_size=size_str,
283
+ status=status,
284
+ confidence=confidence,
285
+ fields_extracted=fields_extracted,
286
+ total_time_ms=total_ms,
287
+ raw_output=json.dumps(fields), # Use JSON instead of str() to preserve structure
288
+ file_base64=file_base64, # Store base64 encoded file for preview
289
+ error_message=error_message,
290
+ )
291
+ db.add(rec)
292
+ db.commit()
293
+ db.refresh(rec)
294
+
295
+ stages = make_stages(total_ms, status)
296
+
297
+ # Response shape that frontend will consume
298
+ return {
299
+ "id": rec.id,
300
+ "fileName": rec.file_name,
301
+ "fileType": rec.file_type,
302
+ "fileSize": rec.file_size,
303
+ "status": status,
304
+ "confidence": confidence,
305
+ "fieldsExtracted": fields_extracted,
306
+ "totalTime": total_ms,
307
+ "fields": fields,
308
+ "stages": {k: v.dict() for k, v in stages.items()},
309
+ "errorMessage": error_message,
310
+ }
311
+
312
+
313
+ @app.get("/api/history", response_model=List[ExtractionRecordBase])
314
+ def get_history(
315
+ db: Session = Depends(get_db),
316
+ current_user: User = Depends(get_current_user),
317
+ ):
318
+ """
319
+ Used by the History page.
320
+ Returns last 100 records for the current user, with synthetic stage data.
321
+ """
322
+ recs = (
323
+ db.query(ExtractionRecord)
324
+ .filter(ExtractionRecord.user_id == current_user.id)
325
+ .order_by(ExtractionRecord.created_at.desc())
326
+ .limit(100)
327
+ .all()
328
+ )
329
+
330
+ # Deduplicate: if multiple extractions share the same shared_from_extraction_id,
331
+ # keep only the most recent one (to prevent duplicates when same extraction is shared multiple times)
332
+ seen_shared_ids = set()
333
+ deduplicated_recs = []
334
+ for rec in recs:
335
+ if rec.shared_from_extraction_id:
336
+ # This is a shared extraction
337
+ if rec.shared_from_extraction_id not in seen_shared_ids:
338
+ seen_shared_ids.add(rec.shared_from_extraction_id)
339
+ deduplicated_recs.append(rec)
340
+ # Skip duplicates
341
+ else:
342
+ # Original extraction (not shared), always include
343
+ deduplicated_recs.append(rec)
344
+
345
+ recs = deduplicated_recs
346
+
347
+ output: List[ExtractionRecordBase] = []
348
+ for r in recs:
349
+ stages = make_stages(r.total_time_ms or 1000, r.status or "completed")
350
+ output.append(
351
+ ExtractionRecordBase(
352
+ id=r.id,
353
+ fileName=r.file_name,
354
+ fileType=r.file_type or "",
355
+ fileSize=r.file_size or "",
356
+ extractedAt=r.created_at,
357
+ status=r.status or "completed",
358
+ confidence=r.confidence or 0.0,
359
+ fieldsExtracted=r.fields_extracted or 0,
360
+ totalTime=r.total_time_ms or 0,
361
+ stages=stages,
362
+ errorMessage=r.error_message,
363
+ )
364
+ )
365
+ return output
366
+
367
+
368
+ @app.get("/api/extraction/{extraction_id}")
369
+ def get_extraction(
370
+ extraction_id: int,
371
+ db: Session = Depends(get_db),
372
+ current_user: User = Depends(get_current_user),
373
+ ):
374
+ """
375
+ Get a specific extraction by ID with full fields data.
376
+ Used when viewing output from History page.
377
+ """
378
+ import json
379
+
380
+ rec = (
381
+ db.query(ExtractionRecord)
382
+ .filter(
383
+ ExtractionRecord.id == extraction_id,
384
+ ExtractionRecord.user_id == current_user.id
385
+ )
386
+ .first()
387
+ )
388
+
389
+ if not rec:
390
+ from fastapi import HTTPException
391
+ raise HTTPException(status_code=404, detail="Extraction not found")
392
+
393
+ # Parse the raw_output JSON string back to dict
394
+ fields = {}
395
+ if rec.raw_output:
396
+ try:
397
+ # Try parsing as JSON first (new format)
398
+ fields = json.loads(rec.raw_output)
399
+ except (json.JSONDecodeError, TypeError):
400
+ # If that fails, try using ast.literal_eval for old str() format (backward compatibility)
401
+ try:
402
+ import ast
403
+ # Only use literal_eval if it looks like a Python dict string
404
+ if rec.raw_output.strip().startswith('{'):
405
+ fields = ast.literal_eval(rec.raw_output)
406
+ else:
407
+ fields = {}
408
+ except:
409
+ fields = {}
410
+
411
+ stages = make_stages(rec.total_time_ms or 1000, rec.status or "completed")
412
+
413
+ return {
414
+ "id": rec.id,
415
+ "fileName": rec.file_name,
416
+ "fileType": rec.file_type or "",
417
+ "fileSize": rec.file_size or "",
418
+ "status": rec.status or "completed",
419
+ "confidence": rec.confidence or 0.0,
420
+ "fieldsExtracted": rec.fields_extracted or 0,
421
+ "totalTime": rec.total_time_ms or 0,
422
+ "fields": fields,
423
+ "fileBase64": rec.file_base64, # Include base64 encoded file for preview
424
+ "stages": {k: v.dict() for k, v in stages.items()},
425
+ "errorMessage": rec.error_message,
426
+ }
427
+
428
+
429
+ @app.post("/api/share")
430
+ async def share_extraction(
431
+ extraction_id: int = Body(...),
432
+ recipient_emails: List[str] = Body(...),
433
+ db: Session = Depends(get_db),
434
+ current_user: User = Depends(get_current_user),
435
+ ):
436
+ """
437
+ Share an extraction with one or more users via email.
438
+ Creates share tokens and sends emails to recipients.
439
+ """
440
+ import secrets
441
+ from datetime import datetime, timedelta
442
+ from .brevo_service import send_share_email
443
+ from .email_validator import validate_business_email
444
+
445
+ # Validate recipient emails list
446
+ if not recipient_emails or len(recipient_emails) == 0:
447
+ raise HTTPException(status_code=400, detail="At least one recipient email is required")
448
+
449
+ # Validate each recipient email is a business email
450
+ for email in recipient_emails:
451
+ try:
452
+ validate_business_email(email)
453
+ except HTTPException:
454
+ raise # Re-raise HTTPException from validate_business_email
455
+
456
+ # Get the extraction record
457
+ extraction = (
458
+ db.query(ExtractionRecord)
459
+ .filter(
460
+ ExtractionRecord.id == extraction_id,
461
+ ExtractionRecord.user_id == current_user.id
462
+ )
463
+ .first()
464
+ )
465
+
466
+ if not extraction:
467
+ raise HTTPException(status_code=404, detail="Extraction not found")
468
+
469
+ # Generate share link base URL
470
+ base_url = os.environ.get("VITE_API_BASE_URL", "https://seth0330-ezofisocr.hf.space")
471
+
472
+ # Process each recipient email
473
+ successful_shares = []
474
+ failed_shares = []
475
+ share_records = []
476
+
477
+ for recipient_email in recipient_emails:
478
+ recipient_email = recipient_email.strip().lower()
479
+
480
+ # Generate secure share token for this recipient
481
+ share_token = secrets.token_urlsafe(32)
482
+
483
+ # Create share token record (expires in 30 days)
484
+ expires_at = datetime.utcnow() + timedelta(days=30)
485
+ share_record = ShareToken(
486
+ token=share_token,
487
+ extraction_id=extraction_id,
488
+ sender_user_id=current_user.id,
489
+ recipient_email=recipient_email,
490
+ expires_at=expires_at,
491
+ )
492
+ db.add(share_record)
493
+ share_records.append((share_record, share_token, recipient_email))
494
+
495
+ # Commit all share tokens
496
+ try:
497
+ db.commit()
498
+ for share_record, share_token, recipient_email in share_records:
499
+ db.refresh(share_record)
500
+ except Exception as e:
501
+ db.rollback()
502
+ raise HTTPException(status_code=500, detail=f"Failed to create share tokens: {str(e)}")
503
+
504
+ # Send emails to all recipients
505
+ for share_record, share_token, recipient_email in share_records:
506
+ share_link = f"{base_url}/share/{share_token}"
507
+ try:
508
+ # Get sender's name from current_user, fallback to None if not available
509
+ sender_name = current_user.name if current_user.name else None
510
+ await send_share_email(recipient_email, current_user.email, share_link, sender_name)
511
+ successful_shares.append(recipient_email)
512
+ except Exception as e:
513
+ # Log error but continue with other emails
514
+ print(f"[ERROR] Failed to send share email to {recipient_email}: {str(e)}")
515
+ failed_shares.append(recipient_email)
516
+ # Optionally, you could delete the share token if email fails
517
+ # db.delete(share_record)
518
+
519
+ # Build response message
520
+ if len(failed_shares) == 0:
521
+ message = f"Extraction shared successfully with {len(successful_shares)} recipient(s)"
522
+ elif len(successful_shares) == 0:
523
+ raise HTTPException(status_code=500, detail=f"Failed to send share emails to all recipients")
524
+ else:
525
+ message = f"Extraction shared with {len(successful_shares)} recipient(s). Failed to send to: {', '.join(failed_shares)}"
526
+
527
+ return {
528
+ "success": True,
529
+ "message": message,
530
+ "successful_count": len(successful_shares),
531
+ "failed_count": len(failed_shares),
532
+ "successful_emails": successful_shares,
533
+ "failed_emails": failed_shares if failed_shares else None
534
+ }
535
+
536
+
537
+ class ShareLinkRequest(BaseModel):
538
+ extraction_id: int
539
+
540
+ @app.post("/api/share/link")
541
+ async def create_share_link(
542
+ request: ShareLinkRequest,
543
+ db: Session = Depends(get_db),
544
+ current_user: User = Depends(get_current_user),
545
+ ):
546
+ """
547
+ Create a shareable link for an extraction without requiring recipient emails.
548
+ Returns a share link that can be copied and shared manually.
549
+ """
550
+ import secrets
551
+ from datetime import datetime, timedelta
552
+
553
+ # Get the extraction record
554
+ extraction = (
555
+ db.query(ExtractionRecord)
556
+ .filter(
557
+ ExtractionRecord.id == request.extraction_id,
558
+ ExtractionRecord.user_id == current_user.id
559
+ )
560
+ .first()
561
+ )
562
+
563
+ if not extraction:
564
+ raise HTTPException(status_code=404, detail="Extraction not found")
565
+
566
+ # Generate secure share token
567
+ share_token = secrets.token_urlsafe(32)
568
+
569
+ # Create share token record (expires in 30 days, no specific recipient)
570
+ expires_at = datetime.utcnow() + timedelta(days=30)
571
+ share_record = ShareToken(
572
+ token=share_token,
573
+ extraction_id=request.extraction_id,
574
+ sender_user_id=current_user.id,
575
+ recipient_email=None, # None for public share links (copyable links)
576
+ expires_at=expires_at,
577
+ )
578
+ db.add(share_record)
579
+ db.commit()
580
+ db.refresh(share_record)
581
+
582
+ # Generate share link
583
+ base_url = os.environ.get("VITE_API_BASE_URL", "https://seth0330-ezofisocr.hf.space")
584
+ share_link = f"{base_url}/share/{share_token}"
585
+
586
+ return {
587
+ "success": True,
588
+ "share_link": share_link,
589
+ "share_token": share_token,
590
+ "expires_at": expires_at.isoformat() if expires_at else None
591
+ }
592
+
593
+
594
+ @app.get("/api/share/{token}")
595
+ async def access_shared_extraction(
596
+ token: str,
597
+ db: Session = Depends(get_db),
598
+ current_user: User = Depends(get_current_user),
599
+ ):
600
+ """
601
+ Access a shared extraction and copy it to the current user's account.
602
+ This endpoint is called after the user logs in via the share link.
603
+ """
604
+ from datetime import datetime
605
+ import json
606
+
607
+ # Find the share token
608
+ share = (
609
+ db.query(ShareToken)
610
+ .filter(ShareToken.token == token)
611
+ .first()
612
+ )
613
+
614
+ if not share:
615
+ raise HTTPException(status_code=404, detail="Share link not found or expired")
616
+
617
+ # Check if token is expired
618
+ if share.expires_at and share.expires_at < datetime.utcnow():
619
+ raise HTTPException(status_code=410, detail="Share link has expired")
620
+
621
+ # Get the original extraction
622
+ original_extraction = (
623
+ db.query(ExtractionRecord)
624
+ .filter(ExtractionRecord.id == share.extraction_id)
625
+ .first()
626
+ )
627
+
628
+ if not original_extraction:
629
+ raise HTTPException(status_code=404, detail="Original extraction not found")
630
+
631
+ # Check if already copied for this user (check by share token to prevent duplicates from same share)
632
+ # Also check if this specific share token was already used by this user
633
+ if share.accessed and share.accessed_by_user_id == current_user.id:
634
+ # This share token was already used by this user, find the extraction
635
+ existing_copy = (
636
+ db.query(ExtractionRecord)
637
+ .filter(
638
+ ExtractionRecord.user_id == current_user.id,
639
+ ExtractionRecord.shared_from_extraction_id == original_extraction.id
640
+ )
641
+ .order_by(ExtractionRecord.created_at.desc())
642
+ .first()
643
+ )
644
+
645
+ if existing_copy:
646
+ return {
647
+ "success": True,
648
+ "extraction_id": existing_copy.id,
649
+ "message": "Extraction already shared with you"
650
+ }
651
+
652
+ # Also check if any copy exists for this user from this original extraction
653
+ existing_copy = (
654
+ db.query(ExtractionRecord)
655
+ .filter(
656
+ ExtractionRecord.user_id == current_user.id,
657
+ ExtractionRecord.shared_from_extraction_id == original_extraction.id
658
+ )
659
+ .first()
660
+ )
661
+
662
+ if existing_copy:
663
+ # Already copied, mark this share as accessed and return existing extraction ID
664
+ share.accessed = True
665
+ share.accessed_at = datetime.utcnow()
666
+ share.accessed_by_user_id = current_user.id
667
+ db.commit()
668
+
669
+ return {
670
+ "success": True,
671
+ "extraction_id": existing_copy.id,
672
+ "message": "Extraction already shared with you"
673
+ }
674
+
675
+ # Copy extraction to current user's account
676
+ # Parse the raw_output JSON string back to dict
677
+ fields = {}
678
+ if original_extraction.raw_output:
679
+ try:
680
+ fields = json.loads(original_extraction.raw_output)
681
+ except (json.JSONDecodeError, TypeError):
682
+ try:
683
+ import ast
684
+ if original_extraction.raw_output.strip().startswith('{'):
685
+ fields = ast.literal_eval(original_extraction.raw_output)
686
+ else:
687
+ fields = {}
688
+ except:
689
+ fields = {}
690
+
691
+ # Create new extraction record for the recipient
692
+ new_extraction = ExtractionRecord(
693
+ user_id=current_user.id,
694
+ file_name=original_extraction.file_name,
695
+ file_type=original_extraction.file_type,
696
+ file_size=original_extraction.file_size,
697
+ status=original_extraction.status or "completed",
698
+ confidence=original_extraction.confidence or 0.0,
699
+ fields_extracted=original_extraction.fields_extracted or 0,
700
+ total_time_ms=original_extraction.total_time_ms or 0,
701
+ raw_output=original_extraction.raw_output, # Copy the JSON string
702
+ file_base64=original_extraction.file_base64, # Copy the base64 file
703
+ shared_from_extraction_id=original_extraction.id,
704
+ shared_by_user_id=share.sender_user_id,
705
+ )
706
+ db.add(new_extraction)
707
+
708
+ # Mark share as accessed
709
+ share.accessed = True
710
+ share.accessed_at = datetime.utcnow()
711
+ share.accessed_by_user_id = current_user.id
712
+
713
+ db.commit()
714
+ db.refresh(new_extraction)
715
+
716
+ return {
717
+ "success": True,
718
+ "extraction_id": new_extraction.id,
719
+ "message": "Extraction shared successfully"
720
+ }
721
+
722
+
723
+ # Static frontend mounting (used after we build React)
724
+ # Dockerfile copies the Vite build into backend/frontend_dist
725
+ # IMPORTANT: API routes must be defined BEFORE this so they take precedence
726
+ frontend_dir = os.path.join(
727
+ os.path.dirname(os.path.dirname(__file__)), "frontend_dist"
728
+ )
729
+
730
+ if os.path.isdir(frontend_dir):
731
+ # Serve static files (JS, CSS, images, etc.) from assets directory
732
+ assets_dir = os.path.join(frontend_dir, "assets")
733
+ if os.path.isdir(assets_dir):
734
+ app.mount(
735
+ "/assets",
736
+ StaticFiles(directory=assets_dir),
737
+ name="assets",
738
+ )
739
+
740
+ # Serve static files from root (logo.png, favicon.ico, etc.)
741
+ # Files in public/ directory are copied to dist/ root during Vite build
742
+ # These routes must be defined BEFORE the catch-all route
743
+ @app.get("/logo.png")
744
+ async def serve_logo():
745
+ """Serve logo.png from frontend_dist root."""
746
+ from fastapi.responses import FileResponse
747
+ logo_path = os.path.join(frontend_dir, "logo.png")
748
+ if os.path.exists(logo_path):
749
+ return FileResponse(logo_path, media_type="image/png")
750
+ from fastapi import HTTPException
751
+ raise HTTPException(status_code=404)
752
+
753
+ @app.get("/favicon.ico")
754
+ async def serve_favicon():
755
+ """Serve favicon.ico from frontend_dist root."""
756
+ from fastapi.responses import FileResponse
757
+ favicon_path = os.path.join(frontend_dir, "favicon.ico")
758
+ if os.path.exists(favicon_path):
759
+ return FileResponse(favicon_path, media_type="image/x-icon")
760
+ from fastapi import HTTPException
761
+ raise HTTPException(status_code=404)
762
+
763
+ # Catch-all route to serve index.html for React Router
764
+ # This must be last so API routes and static files are matched first
765
+ @app.get("/{full_path:path}")
766
+ async def serve_frontend(full_path: str):
767
+ """
768
+ Serve React app for all non-API routes.
769
+ React Router will handle client-side routing.
770
+ """
771
+ # Skip API routes, docs, static assets, and known static files
772
+ if (full_path.startswith("api/") or
773
+ full_path.startswith("docs") or
774
+ full_path.startswith("openapi.json") or
775
+ full_path.startswith("assets/") or
776
+ full_path in ["logo.png", "favicon.ico"]):
777
+ from fastapi import HTTPException
778
+ raise HTTPException(status_code=404)
779
+
780
+ # Serve index.html for all other routes (React Router will handle routing)
781
+ from fastapi.responses import FileResponse
782
+ index_path = os.path.join(frontend_dir, "index.html")
783
+ if os.path.exists(index_path):
784
+ return FileResponse(index_path)
785
+ from fastapi import HTTPException
786
+ raise HTTPException(status_code=404)
backend/app/models.py CHANGED
@@ -1,32 +1,136 @@
1
- from sqlalchemy import Column, Integer, String, Float, DateTime, Text
2
- from sqlalchemy.sql import func
3
-
4
- from .db import Base
5
-
6
-
7
- class ExtractionRecord(Base):
8
- """
9
- Stores one extraction run so the History page can show past jobs.
10
- We’ll fill it from the /api/extract endpoint later.
11
- """
12
-
13
- __tablename__ = "extractions"
14
-
15
- id = Column(Integer, primary_key=True, index=True)
16
-
17
- file_name = Column(String, index=True)
18
- file_type = Column(String)
19
- file_size = Column(String)
20
-
21
- status = Column(String) # "completed" | "failed"
22
- confidence = Column(Float) # overall confidence (0–100)
23
- fields_extracted = Column(Integer) # number of fields extracted
24
- total_time_ms = Column(Integer) # total processing time in ms
25
-
26
- raw_output = Column(Text) # JSON string from the model
27
- error_message = Column(Text, nullable=True)
28
-
29
- created_at = Column(
30
- DateTime(timezone=True),
31
- server_default=func.now(),
32
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, Integer, String, Float, DateTime, Text, ForeignKey, Boolean
2
+ from sqlalchemy.orm import relationship
3
+ from sqlalchemy.sql import func
4
+
5
+ from .db import Base
6
+
7
+
8
+ class User(Base):
9
+ """
10
+ Stores user information from Firebase or OTP authentication.
11
+ """
12
+ __tablename__ = "users"
13
+
14
+ id = Column(Integer, primary_key=True, index=True)
15
+ email = Column(String, unique=True, index=True, nullable=False)
16
+ name = Column(String, nullable=True)
17
+ picture = Column(String, nullable=True)
18
+
19
+ # Auth method: 'firebase' or 'otp'
20
+ auth_method = Column(String, default='firebase')
21
+
22
+ # Firebase-specific
23
+ firebase_uid = Column(String, unique=True, index=True, nullable=True)
24
+
25
+ # OTP-specific
26
+ email_verified = Column(Boolean, default=False)
27
+
28
+ created_at = Column(
29
+ DateTime(timezone=True),
30
+ server_default=func.now(),
31
+ )
32
+
33
+ # Relationship to extraction records (explicitly specify user_id as the foreign key)
34
+ # Note: primaryjoin must be specified because ExtractionRecord has multiple foreign keys to User
35
+ extractions = relationship(
36
+ "ExtractionRecord",
37
+ back_populates="user",
38
+ primaryjoin="User.id == ExtractionRecord.user_id"
39
+ )
40
+
41
+ # Relationship to API keys
42
+ api_keys = relationship(
43
+ "APIKey",
44
+ back_populates="user",
45
+ cascade="all, delete-orphan"
46
+ )
47
+
48
+
49
+ class ExtractionRecord(Base):
50
+ """
51
+ Stores one extraction run so the History page can show past jobs.
52
+ We'll fill it from the /api/extract endpoint later.
53
+ """
54
+
55
+ __tablename__ = "extractions"
56
+
57
+ id = Column(Integer, primary_key=True, index=True)
58
+ user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
59
+
60
+ file_name = Column(String, index=True)
61
+ file_type = Column(String)
62
+ file_size = Column(String)
63
+
64
+ status = Column(String) # "completed" | "failed"
65
+ confidence = Column(Float) # overall confidence (0–100)
66
+ fields_extracted = Column(Integer) # number of fields extracted
67
+ total_time_ms = Column(Integer) # total processing time in ms
68
+
69
+ raw_output = Column(Text) # JSON string from the model
70
+ file_base64 = Column(Text, nullable=True) # Base64 encoded original file for preview
71
+ error_message = Column(Text, nullable=True)
72
+
73
+ created_at = Column(
74
+ DateTime(timezone=True),
75
+ server_default=func.now(),
76
+ )
77
+
78
+ # Relationship to user (explicitly specify user_id as the foreign key)
79
+ # Note: primaryjoin must be specified because ExtractionRecord has multiple foreign keys to User
80
+ user = relationship(
81
+ "User",
82
+ back_populates="extractions",
83
+ primaryjoin="ExtractionRecord.user_id == User.id"
84
+ )
85
+
86
+ # Track if this extraction was shared (original extraction ID)
87
+ shared_from_extraction_id = Column(Integer, ForeignKey("extractions.id"), nullable=True, index=True)
88
+ shared_by_user_id = Column(Integer, ForeignKey("users.id"), nullable=True, index=True)
89
+
90
+
91
+ class ShareToken(Base):
92
+ """
93
+ Stores share tokens for sharing extractions with other users.
94
+ """
95
+ __tablename__ = "share_tokens"
96
+
97
+ id = Column(Integer, primary_key=True, index=True)
98
+ token = Column(String, unique=True, index=True, nullable=False) # Unique share token
99
+ extraction_id = Column(Integer, ForeignKey("extractions.id"), nullable=False, index=True)
100
+ sender_user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
101
+ recipient_email = Column(String, nullable=True, index=True) # Nullable for public share links
102
+ expires_at = Column(DateTime(timezone=True), nullable=True) # Optional expiration
103
+ accessed = Column(Boolean, default=False) # Track if link was accessed
104
+ accessed_at = Column(DateTime(timezone=True), nullable=True)
105
+ accessed_by_user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
106
+
107
+ created_at = Column(
108
+ DateTime(timezone=True),
109
+ server_default=func.now(),
110
+ )
111
+
112
+
113
+ class APIKey(Base):
114
+ """
115
+ Stores API keys for external application authentication.
116
+ API keys are hashed before storage for security.
117
+ """
118
+ __tablename__ = "api_keys"
119
+
120
+ id = Column(Integer, primary_key=True, index=True)
121
+ user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
122
+ name = Column(String, nullable=False) # User-friendly name for the API key
123
+ key_hash = Column(String, unique=True, index=True, nullable=False) # Hashed API key
124
+ key_prefix = Column(String, nullable=False) # First 8 chars of key for display (e.g., "sk_live_")
125
+ is_active = Column(Boolean, default=True, nullable=False)
126
+ last_used_at = Column(DateTime(timezone=True), nullable=True)
127
+ created_at = Column(
128
+ DateTime(timezone=True),
129
+ server_default=func.now(),
130
+ )
131
+
132
+ # Relationship to user
133
+ user = relationship(
134
+ "User",
135
+ back_populates="api_keys"
136
+ )
backend/app/monday_service.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Monday.com API service for creating leads with automatic field matching.
3
+ Reference: https://developer.monday.com/api-reference/docs
4
+ """
5
+ import os
6
+ import httpx
7
+ import json
8
+ from typing import Optional, Dict, Any, List, Tuple
9
+ from difflib import SequenceMatcher
10
+
11
+ MONDAY_API_KEY = os.environ.get("MONDAY_API_KEY", "")
12
+ MONDAY_API_URL = "https://api.monday.com/v2"
13
+ MONDAY_BOARD_ID = os.environ.get("MONDAY_BOARD_ID", None) # Your "New Leads" board ID
14
+
15
+ # Cache for board columns to avoid repeated API calls
16
+ _board_columns_cache: Dict[str, List[Dict[str, Any]]] = {}
17
+
18
+
19
+ def _calculate_similarity(str1: str, str2: str) -> float:
20
+ """
21
+ Calculate similarity between two strings using SequenceMatcher.
22
+ Returns a value between 0.0 and 1.0.
23
+ """
24
+ return SequenceMatcher(None, str1.lower(), str2.lower()).ratio()
25
+
26
+
27
+ def _find_best_column_match(
28
+ field_name: str,
29
+ available_columns: List[Dict[str, Any]],
30
+ min_similarity: float = 0.3
31
+ ) -> Optional[Tuple[str, str, float]]:
32
+ """
33
+ Find the best matching column for a field name using semantic similarity.
34
+
35
+ Args:
36
+ field_name: The field name to match (e.g., "first_name", "email")
37
+ available_columns: List of column dicts with 'id' and 'title' keys
38
+ min_similarity: Minimum similarity threshold (0.0 to 1.0)
39
+
40
+ Returns:
41
+ Tuple of (column_id, column_title, similarity_score) or None if no match found
42
+ """
43
+ best_match = None
44
+ best_score = 0.0
45
+
46
+ # Normalize field name for matching
47
+ normalized_field = field_name.lower().replace("_", " ").replace("-", " ")
48
+
49
+ # Common field name variations
50
+ field_variations = [
51
+ normalized_field,
52
+ field_name.lower(),
53
+ field_name.replace("_", ""),
54
+ ]
55
+
56
+ # Add common synonyms
57
+ synonyms = {
58
+ "first_name": ["first name", "firstname", "fname", "given name"],
59
+ "last_name": ["last name", "lastname", "lname", "surname", "family name"],
60
+ "email": ["email address", "email", "e-mail", "mail"],
61
+ "phone_number": ["phone", "phone number", "telephone", "mobile", "cell"],
62
+ "linkedin_url": ["linkedin", "linkedin profile", "linkedin url", "linkedin link"],
63
+ "title": ["job title", "position", "role", "job"],
64
+ "headline": ["headline", "tagline", "bio"],
65
+ "organization_name": ["company", "organization", "org", "company name", "employer"],
66
+ "organization_website": ["website", "company website", "url", "web"],
67
+ "organization_address": ["address", "company address", "location"],
68
+ }
69
+
70
+ if field_name in synonyms:
71
+ field_variations.extend(synonyms[field_name])
72
+
73
+ for column in available_columns:
74
+ column_title = column.get("title", "").lower()
75
+ column_id = column.get("id", "")
76
+
77
+ if not column_title or not column_id:
78
+ continue
79
+
80
+ # Calculate similarity for each variation
81
+ for variation in field_variations:
82
+ score = _calculate_similarity(variation, column_title)
83
+ if score > best_score:
84
+ best_score = score
85
+ best_match = (column_id, column.get("title", ""), score)
86
+
87
+ if best_match and best_score >= min_similarity:
88
+ return best_match
89
+ return None
90
+
91
+
92
+ async def _get_board_columns(board_id: str) -> List[Dict[str, Any]]:
93
+ """
94
+ Fetch board columns from Monday.com API.
95
+
96
+ Args:
97
+ board_id: Monday.com board ID
98
+
99
+ Returns:
100
+ List of column dictionaries with 'id', 'title', and 'type' keys
101
+ """
102
+ # Check cache first
103
+ if board_id in _board_columns_cache:
104
+ return _board_columns_cache[board_id]
105
+
106
+ if not MONDAY_API_KEY:
107
+ print("[WARNING] MONDAY_API_KEY not set, cannot fetch board columns")
108
+ return []
109
+
110
+ query = """
111
+ query ($boardId: ID!) {
112
+ boards(ids: [$boardId]) {
113
+ columns {
114
+ id
115
+ title
116
+ type
117
+ }
118
+ }
119
+ }
120
+ """
121
+
122
+ headers = {
123
+ "Authorization": MONDAY_API_KEY,
124
+ "Content-Type": "application/json"
125
+ }
126
+
127
+ try:
128
+ async with httpx.AsyncClient(timeout=30.0) as client:
129
+ response = await client.post(
130
+ MONDAY_API_URL,
131
+ json={
132
+ "query": query,
133
+ "variables": {"boardId": board_id}
134
+ },
135
+ headers=headers
136
+ )
137
+
138
+ if response.status_code == 200:
139
+ result = response.json()
140
+ if result.get("data") and result["data"].get("boards"):
141
+ boards = result["data"]["boards"]
142
+ if boards and boards[0].get("columns"):
143
+ columns = boards[0]["columns"]
144
+ # Cache the result
145
+ _board_columns_cache[board_id] = columns
146
+ print(f"[INFO] Fetched {len(columns)} columns from Monday.com board {board_id}")
147
+ return columns
148
+ elif result.get("errors"):
149
+ print(f"[ERROR] Failed to fetch board columns: {result['errors']}")
150
+ else:
151
+ print(f"[ERROR] Failed to fetch board columns: {response.status_code} - {response.text}")
152
+ except Exception as e:
153
+ print(f"[ERROR] Exception while fetching board columns: {str(e)}")
154
+
155
+ return []
156
+
157
+
158
+ def _format_column_value(value: Any, column_type: str, column_id: Optional[str] = None) -> Any:
159
+ """
160
+ Format a value according to Monday.com column type.
161
+
162
+ Args:
163
+ value: The value to format
164
+ column_type: Monday.com column type (email, phone, link, text, etc.)
165
+ column_id: Column ID (for special handling)
166
+
167
+ Returns:
168
+ For email/phone/link: Python dict object
169
+ For text/other types: Plain string
170
+ """
171
+ if value is None:
172
+ return ""
173
+
174
+ value_str = str(value)
175
+
176
+ if column_type == "email":
177
+ # Monday.com email format requires dict object (will be JSON encoded later)
178
+ return {"email": value_str, "text": value_str}
179
+ elif column_type == "phone":
180
+ return {"phone": value_str, "countryShortName": "US"}
181
+ elif column_type == "link":
182
+ # If it's already a URL, use it; otherwise create a link
183
+ if value_str.startswith("http://") or value_str.startswith("https://"):
184
+ return {"url": value_str, "text": value_str}
185
+ else:
186
+ return {"url": f"https://{value_str}", "text": value_str}
187
+ else:
188
+ # Text, status, and other types - just return the string
189
+ return value_str
190
+
191
+
192
+ async def create_monday_lead(
193
+ email: str,
194
+ first_name: Optional[str] = None,
195
+ last_name: Optional[str] = None,
196
+ phone_number: Optional[str] = None,
197
+ linkedin_url: Optional[str] = None,
198
+ title: Optional[str] = None,
199
+ headline: Optional[str] = None,
200
+ organization_name: Optional[str] = None,
201
+ organization_website: Optional[str] = None,
202
+ organization_address: Optional[str] = None,
203
+ board_id: Optional[str] = None
204
+ ) -> bool:
205
+ """
206
+ Create a new lead item in Monday.com board.
207
+
208
+ Args:
209
+ email: Contact email address (required)
210
+ first_name: Contact first name
211
+ last_name: Contact last name
212
+ phone_number: Phone number
213
+ linkedin_url: LinkedIn profile URL
214
+ title: Job title
215
+ headline: Professional headline
216
+ organization_name: Company name
217
+ organization_website: Company website
218
+ organization_address: Company address
219
+ board_id: Monday.com board ID as string (defaults to MONDAY_BOARD_ID env var)
220
+
221
+ Returns:
222
+ True if lead created successfully, False otherwise
223
+ """
224
+ if not MONDAY_API_KEY:
225
+ print("[WARNING] MONDAY_API_KEY not set, skipping Monday.com lead creation")
226
+ return False
227
+
228
+ target_board_id = board_id or MONDAY_BOARD_ID
229
+ if not target_board_id:
230
+ print("[WARNING] MONDAY_BOARD_ID not set, skipping Monday.com lead creation")
231
+ return False
232
+
233
+ # Prepare item name (use full name or email)
234
+ item_name = email
235
+ if first_name and last_name:
236
+ item_name = f"{first_name} {last_name}"
237
+ elif first_name:
238
+ item_name = first_name
239
+ elif last_name:
240
+ item_name = last_name
241
+
242
+ # Fetch board columns to automatically match fields
243
+ print(f"[INFO] Fetching Monday.com board columns for automatic field matching...")
244
+ board_columns = await _get_board_columns(str(target_board_id))
245
+
246
+ if not board_columns:
247
+ print("[WARNING] Could not fetch board columns, skipping Monday.com lead creation")
248
+ return False
249
+
250
+ # Create a mapping of column IDs to column types for formatting
251
+ column_types = {col["id"]: col.get("type", "text") for col in board_columns}
252
+
253
+ # Prepare data fields to map
254
+ data_fields = {
255
+ "email": email,
256
+ "first_name": first_name,
257
+ "last_name": last_name,
258
+ "phone_number": phone_number,
259
+ "linkedin_url": linkedin_url,
260
+ "title": title,
261
+ "headline": headline,
262
+ "organization_name": organization_name,
263
+ "organization_website": organization_website,
264
+ "organization_address": organization_address,
265
+ }
266
+
267
+ # Automatically match fields to columns using semantic similarity
268
+ column_values = {}
269
+ matched_fields = []
270
+ # Track which columns have been matched to handle duplicates (e.g., first_name and last_name -> Name)
271
+ column_matches = {} # column_id -> (field_name, value)
272
+
273
+ for field_name, field_value in data_fields.items():
274
+ if not field_value:
275
+ continue
276
+
277
+ match = _find_best_column_match(field_name, board_columns)
278
+ if match:
279
+ column_id, column_title, similarity = match
280
+ column_type = column_types.get(column_id, "text")
281
+
282
+ # Handle special case: if first_name and last_name both match to the same "Name" column
283
+ if column_id in column_matches:
284
+ existing_field, existing_value = column_matches[column_id]
285
+ # If both first_name and last_name match to the same column, combine them
286
+ if (field_name in ["first_name", "last_name"] and
287
+ existing_field in ["first_name", "last_name"] and
288
+ field_name != existing_field):
289
+ # Combine first and last name
290
+ if field_name == "first_name":
291
+ combined_value = f"{field_value} {existing_value}"
292
+ else:
293
+ combined_value = f"{existing_value} {field_value}"
294
+ formatted_value = _format_column_value(combined_value, column_type, column_id)
295
+ column_values[column_id] = formatted_value
296
+ matched_fields.append(f"{existing_field}+{field_name} -> {column_title} (combined)")
297
+ print(f"[INFO] Combined '{existing_field}' and '{field_name}' to column '{column_title}' (ID: {column_id})")
298
+ continue
299
+ else:
300
+ # Different fields matching to same column - use the one with higher similarity
301
+ print(f"[DEBUG] Column '{column_title}' already matched to '{existing_field}', skipping '{field_name}'")
302
+ continue
303
+
304
+ formatted_value = _format_column_value(field_value, column_type, column_id)
305
+ column_values[column_id] = formatted_value
306
+ column_matches[column_id] = (field_name, field_value)
307
+ matched_fields.append(f"{field_name} -> {column_title} (similarity: {similarity:.2f})")
308
+ print(f"[INFO] Matched '{field_name}' to column '{column_title}' (ID: {column_id}, type: {column_type}, value: {formatted_value[:100] if len(str(formatted_value)) > 100 else formatted_value})")
309
+ else:
310
+ print(f"[DEBUG] No suitable column match found for '{field_name}' (skipping)")
311
+
312
+ if not column_values:
313
+ print("[WARNING] No fields could be matched to board columns")
314
+ return False
315
+
316
+ print(f"[INFO] Successfully matched {len(matched_fields)} fields to Monday.com columns")
317
+
318
+ # Convert column_values to JSON string for GraphQL mutation
319
+ # Monday.com expects column values as a JSON string where:
320
+ # - Text columns: plain string values
321
+ # - Email/Phone/Link columns: dict objects (properly JSON encoded)
322
+ column_values_json = json.dumps(column_values)
323
+ print(f"[DEBUG] Monday.com column_values JSON: {column_values_json[:500]}")
324
+
325
+ # GraphQL mutation
326
+ # Note: Monday.com uses ID! (string) type for board_id, not Int!
327
+ mutation = """
328
+ mutation ($boardId: ID!, $itemName: String!, $columnValues: JSON!) {
329
+ create_item (board_id: $boardId, item_name: $itemName, column_values: $columnValues) {
330
+ id
331
+ }
332
+ }
333
+ """
334
+
335
+ # Convert board_id to string (Monday.com expects ID! which is a string)
336
+ board_id_str = str(target_board_id)
337
+
338
+ variables = {
339
+ "boardId": board_id_str,
340
+ "itemName": item_name,
341
+ "columnValues": column_values_json
342
+ }
343
+
344
+ headers = {
345
+ "Authorization": MONDAY_API_KEY,
346
+ "Content-Type": "application/json"
347
+ }
348
+
349
+ try:
350
+ async with httpx.AsyncClient(timeout=30.0) as client:
351
+ response = await client.post(
352
+ MONDAY_API_URL,
353
+ json={
354
+ "query": mutation,
355
+ "variables": variables
356
+ },
357
+ headers=headers
358
+ )
359
+
360
+ if response.status_code == 200:
361
+ result = response.json()
362
+ if result.get("data") and result["data"].get("create_item"):
363
+ item_id = result["data"]["create_item"].get("id")
364
+ print(f"[INFO] Successfully created Monday.com lead: {item_name} (ID: {item_id})")
365
+ return True
366
+ elif result.get("errors"):
367
+ errors = result.get("errors", [])
368
+ for error in errors:
369
+ error_msg = error.get("message", "Unknown error")
370
+ error_path = error.get("path", [])
371
+ print(f"[ERROR] Monday.com API error: {error_msg}")
372
+ if error_path:
373
+ print(f"[ERROR] Error path: {error_path}")
374
+ # Log full error for debugging
375
+ print(f"[DEBUG] Full Monday.com error response: {json.dumps(errors, indent=2)}")
376
+ return False
377
+ else:
378
+ print(f"[ERROR] Unexpected Monday.com API response: {result}")
379
+ return False
380
+ else:
381
+ error_data = response.text
382
+ print(f"[ERROR] Failed to create Monday.com lead: {response.status_code} - {error_data}")
383
+ return False
384
+
385
+ except httpx.HTTPStatusError as e:
386
+ print(f"[ERROR] Monday.com API HTTP error: {e.response.status_code} - {e.response.text}")
387
+ return False
388
+ except Exception as e:
389
+ print(f"[ERROR] Failed to create Monday.com lead: {str(e)}")
390
+ return False
391
+
backend/app/openrouter_client.py CHANGED
@@ -1,627 +1,862 @@
1
- import os
2
- import base64
3
- import json
4
- import re
5
- from io import BytesIO
6
- from typing import Any, Dict, List, Optional, Tuple
7
-
8
- from openai import OpenAI
9
-
10
- try:
11
- import fitz # PyMuPDF
12
- from PIL import Image
13
- PDF_SUPPORT = True
14
- except ImportError as e:
15
- PDF_SUPPORT = False
16
- print(f"[WARNING] PDF support libraries not available: {e}. PDF conversion will not work.")
17
-
18
- # OCR Model Configuration (from sample code)
19
- OCR_BASE_URL = os.environ.get("OCR_BASE_URL", "https://od5yev2behke5u-8000.proxy.runpod.net/v1")
20
- OCR_API_KEY = os.environ.get("OCR_API_KEY", "Ezofis@123")
21
- OCR_MODEL_NAME = os.environ.get("OCR_MODEL_NAME", "EZOFISOCR")
22
-
23
- # Initialize OpenAI client with OCR endpoint
24
- ocr_client = OpenAI(
25
- base_url=OCR_BASE_URL,
26
- api_key=OCR_API_KEY,
27
- )
28
-
29
-
30
- def _pdf_to_images(pdf_bytes: bytes) -> List[bytes]:
31
- """
32
- Convert PDF pages to PNG images.
33
- Returns a list of PNG image bytes, one per page.
34
- """
35
- if not PDF_SUPPORT:
36
- raise RuntimeError("PyMuPDF not installed. Cannot convert PDF to images.")
37
-
38
- pdf_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
39
- images = []
40
-
41
- print(f"[INFO] PDF has {len(pdf_doc)} page(s)")
42
-
43
- for page_num in range(len(pdf_doc)):
44
- page = pdf_doc[page_num]
45
- # Render page to image (zoom factor 2 for better quality)
46
- mat = fitz.Matrix(2.0, 2.0) # 2x zoom for better quality
47
- pix = page.get_pixmap(matrix=mat)
48
-
49
- # Convert to PIL Image then to JPEG bytes (better compression)
50
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
51
- img_bytes = BytesIO()
52
- img.save(img_bytes, format="JPEG", quality=95)
53
- images.append(img_bytes.getvalue())
54
-
55
- print(f"[INFO] Converted page {page_num + 1} to image ({pix.width}x{pix.height})")
56
-
57
- pdf_doc.close()
58
- return images
59
-
60
-
61
- def _image_bytes_to_base64(image_bytes: bytes) -> str:
62
- """Convert image bytes to base64 data URL (JPEG format)."""
63
- b64 = base64.b64encode(image_bytes).decode("utf-8")
64
- data_url = f"data:image/jpeg;base64,{b64}"
65
- print(f"[DEBUG] Base64 encoded image: {len(image_bytes)} bytes -> {len(data_url)} chars")
66
- return data_url
67
-
68
-
69
- def _parse_markdown_table(text: str) -> Optional[Tuple[List[str], List[List[str]]]]:
70
- """
71
- Parse a markdown table from text.
72
- Returns (headers, rows) if table found, None otherwise.
73
- Handles various table formats including malformed ones.
74
- """
75
- lines = [line.strip() for line in text.split('\n')]
76
-
77
- # Find potential table start (line with multiple | and actual text content)
78
- table_start = None
79
- for i, line in enumerate(lines):
80
- if '|' in line and line.count('|') >= 2:
81
- # Skip separator lines (only |, -, :, spaces)
82
- if re.match(r'^[\s\|\-:]+$', line):
83
- continue
84
- # Check if line has meaningful text (not just | characters)
85
- cells = [cell.strip() for cell in line.split('|')]
86
- if cells and not cells[0]:
87
- cells = cells[1:]
88
- if cells and not cells[-1]:
89
- cells = cells[:-1]
90
- # Must have at least 2 columns with some text
91
- meaningful_cells = [c for c in cells if len(c) > 0]
92
- if len(meaningful_cells) >= 2:
93
- table_start = i
94
- break
95
-
96
- if table_start is None:
97
- return None
98
-
99
- # Find table end (first non-empty line without | after table start)
100
- table_end = None
101
- for i in range(table_start + 1, len(lines)):
102
- line = lines[i]
103
- if not line: # Empty line, continue
104
- continue
105
- if '|' not in line:
106
- # Non-empty line without | means table ended
107
- table_end = i
108
- break
109
-
110
- if table_end is None:
111
- table_end = len(lines)
112
-
113
- table_lines = lines[table_start:table_end]
114
-
115
- # Find the actual header row (should have meaningful text, not just | or separators)
116
- headers = None
117
- header_idx = None
118
-
119
- for i, line in enumerate(table_lines):
120
- if not line or '|' not in line:
121
- continue
122
-
123
- # Skip separator lines (lines with only |, -, :, spaces)
124
- if re.match(r'^[\s\|\-:]+$', line):
125
- continue
126
-
127
- # Check if this line has meaningful content (not just | characters)
128
- cells = [cell.strip() for cell in line.split('|')]
129
- # Remove empty cells at start/end
130
- if cells and not cells[0]:
131
- cells = cells[1:]
132
- if cells and not cells[-1]:
133
- cells = cells[:-1]
134
-
135
- # Header should have at least 3 columns and meaningful text
136
- if len(cells) >= 3:
137
- # Check if cells have actual text (not just empty or single char)
138
- meaningful_cells = [c for c in cells if len(c) > 1]
139
- if len(meaningful_cells) >= 3:
140
- headers = cells
141
- header_idx = i
142
- break
143
-
144
- if not headers or header_idx is None:
145
- return None
146
-
147
- # Parse data rows (skip separator line after header if present)
148
- rows = []
149
- num_columns = len(headers)
150
-
151
- for i in range(header_idx + 1, len(table_lines)):
152
- line = table_lines[i]
153
-
154
- if not line:
155
- continue
156
-
157
- # Skip separator lines
158
- if re.match(r'^[\s\|\-:]+$', line):
159
- continue
160
-
161
- if '|' not in line:
162
- # No more table rows
163
- break
164
-
165
- cells = [cell.strip() for cell in line.split('|')]
166
- # Remove empty cells at start/end
167
- if cells and not cells[0]:
168
- cells = cells[1:]
169
- if cells and not cells[-1]:
170
- cells = cells[:-1]
171
-
172
- # Only add rows that match header column count (allow some flexibility)
173
- if len(cells) == num_columns or (len(cells) >= num_columns - 1 and len(cells) <= num_columns + 1):
174
- # Pad or trim to match header count
175
- if len(cells) < num_columns:
176
- cells.extend([''] * (num_columns - len(cells)))
177
- elif len(cells) > num_columns:
178
- cells = cells[:num_columns]
179
-
180
- # Only add if row has at least one non-empty cell
181
- if any(cell for cell in cells):
182
- rows.append(cells)
183
-
184
- if not rows:
185
- return None
186
-
187
- return (headers, rows)
188
-
189
-
190
- def _extract_metadata(text: str) -> Dict[str, str]:
191
- """
192
- Extract metadata from document header text.
193
- Looks for title, office, notice number, and description.
194
- """
195
- metadata = {
196
- "title": "",
197
- "office": "",
198
- "notice_no": "",
199
- "description": ""
200
- }
201
-
202
- lines = [line.strip() for line in text.split('\n') if line.strip()]
203
-
204
- # Extract office (usually first non-empty line)
205
- if lines:
206
- metadata["office"] = lines[0]
207
-
208
- # Look for notice number pattern (like "पत्रक सं- 1239" or "सं- 1239")
209
- notice_pattern = r'(?:पत्रक\s+)?सं[-\s:]*(\d+)'
210
- for line in lines[:10]: # Check first 10 lines
211
- match = re.search(notice_pattern, line)
212
- if match:
213
- metadata["notice_no"] = match.group(1)
214
- break
215
-
216
- # Look for title - usually in quotes or contains specific keywords
217
- # Check for quoted text first
218
- quoted_title = re.search(r'["""]([^"""]+)["""]', text[:1000])
219
- if quoted_title:
220
- metadata["title"] = quoted_title.group(1).strip()
221
- else:
222
- # Look for title patterns
223
- title_keywords = ['सम्पत्ति', 'सूचना', 'विज्ञप्ति', 'नाम परिवर्तन']
224
- for line in lines[:5]:
225
- if any(keyword in line for keyword in title_keywords):
226
- # Extract the title phrase
227
- title_match = re.search(r'(सम्पत्ति[^।]*|सूचना[^।]*|विज्ञप्ति[^।]*)', line)
228
- if title_match:
229
- metadata["title"] = title_match.group(1).strip()
230
- break
231
-
232
- # Extract description (text before table, usually contains key phrases)
233
- description_keywords = ['नाम परिवर्तन', 'अधिनियम', 'धारा', 'प्रकाशन', 'आवेदन']
234
- description_parts = []
235
- for i, line in enumerate(lines[:15]): # Check first 15 lines
236
- if any(keyword in line for keyword in description_keywords):
237
- description_parts.append(line)
238
- # Get a few surrounding lines for context
239
- if i > 0:
240
- description_parts.insert(0, lines[i-1])
241
- if i < len(lines) - 1:
242
- description_parts.append(lines[i+1])
243
- break
244
-
245
- if description_parts:
246
- description = ' '.join(description_parts).strip()
247
- if len(description) > 30: # Only if substantial
248
- # Clean up and limit length
249
- description = re.sub(r'\s+', ' ', description)
250
- metadata["description"] = description[:300] # Limit length
251
-
252
- return metadata
253
-
254
-
255
- def _extract_footer_notes(text: str) -> List[str]:
256
- """
257
- Extract footer notes from document.
258
- Usually appears after the table.
259
- """
260
- notes = []
261
-
262
- # Find table end
263
- lines = text.split('\n')
264
- table_end_idx = len(lines)
265
-
266
- for i, line in enumerate(lines):
267
- if '|' in line:
268
- # Find last table line
269
- j = i + 1
270
- while j < len(lines) and ('|' in lines[j] or re.match(r'^[\s\|\-:]+$', lines[j])):
271
- j += 1
272
- table_end_idx = j
273
- break
274
-
275
- # Extract footer text (after table)
276
- footer_lines = lines[table_end_idx:]
277
- footer_text = '\n'.join(footer_lines).strip()
278
-
279
- # Split into sentences/notes
280
- # Look for sentences ending with period, exclamation, or specific keywords
281
- sentences = re.split(r'[।\.!]\s+', footer_text)
282
-
283
- for sentence in sentences:
284
- sentence = sentence.strip()
285
- if len(sentence) > 20: # Only substantial notes
286
- # Clean up
287
- sentence = re.sub(r'\s+', ' ', sentence)
288
- if sentence:
289
- notes.append(sentence)
290
-
291
- # Limit to most relevant notes (usually 2-4)
292
- return notes[:5]
293
-
294
-
295
- def _parse_text_with_tables(text: str) -> Dict[str, Any]:
296
- """
297
- Parse text and extract structured data including tables.
298
- Returns structured JSON format with metadata, table, and footer_notes.
299
- """
300
- result = {
301
- "text": text, # Keep original text
302
- "metadata": {},
303
- "table": [],
304
- "footer_notes": []
305
- }
306
-
307
- # Check if text contains a table
308
- table_data = _parse_markdown_table(text)
309
-
310
- if table_data:
311
- headers, rows = table_data
312
- print(f"[INFO] Found table with {len(headers)} columns and {len(rows)} rows")
313
-
314
- # Extract metadata
315
- result["metadata"] = _extract_metadata(text)
316
-
317
- # Map headers to field names using original header text
318
- # Keep original language, just make valid JSON keys and handle duplicates
319
- header_mapping = {}
320
- header_counts = {} # Track occurrences of each header
321
-
322
- for i, header in enumerate(headers):
323
- header_clean = header.strip()
324
-
325
- # Create a valid JSON key from the original header
326
- # Remove special characters that aren't valid in JSON keys, but keep the text
327
- # Replace spaces and special chars with underscores, but preserve the original text
328
- header_key = header_clean
329
-
330
- # Track how many times we've seen this exact header
331
- if header_key not in header_counts:
332
- header_counts[header_key] = 0
333
-
334
- header_counts[header_key] += 1
335
-
336
- # If this header appears multiple times, append a number
337
- if header_counts[header_key] > 1:
338
- header_key = f"{header_key}_{header_counts[header_key]}"
339
-
340
- # Clean the key to be valid for JSON (remove/replace problematic characters)
341
- # Keep the original text but make it JSON-safe
342
- header_key = re.sub(r'[^\w\s\u0900-\u097F]', '', header_key) # Keep Unicode Hindi chars
343
- header_key = re.sub(r'\s+', '_', header_key) # Replace spaces with underscores
344
-
345
- # If key is empty after cleaning, use column index
346
- if not header_key:
347
- header_key = f"column_{i+1}"
348
-
349
- header_mapping[i] = header_key
350
-
351
- # Parse table rows - each row becomes a separate section
352
- table_rows_dict = {}
353
- for idx, row in enumerate(rows, start=1):
354
- row_dict = {}
355
- for i, header_idx in header_mapping.items():
356
- if i < len(row):
357
- row_dict[header_idx] = row[i].strip()
358
-
359
- if row_dict:
360
- # Each row is a separate section: row_1, row_2, etc.
361
- table_rows_dict[f"row_{idx}"] = row_dict
362
-
363
- # Store rows as separate sections instead of array
364
- result["table"] = table_rows_dict
365
-
366
- # Extract footer notes
367
- result["footer_notes"] = _extract_footer_notes(text)
368
- else:
369
- # No table found, just extract basic metadata
370
- result["metadata"] = _extract_metadata(text)
371
- result["footer_notes"] = _extract_footer_notes(text)
372
-
373
- return result
374
-
375
-
376
- async def _extract_text_with_ocr(image_bytes: bytes, page_num: int, total_pages: int) -> Dict[str, Any]:
377
- """
378
- Extract text from a single page/image using the OCR model.
379
- Returns text output in full_text field, keeps fields empty for now.
380
- """
381
- # Convert image bytes to base64 data URL
382
- data_url = _image_bytes_to_base64(image_bytes)
383
-
384
- print(f"[INFO] OCR: Processing page {page_num}/{total_pages} with model {OCR_MODEL_NAME}")
385
-
386
- try:
387
- # Use OpenAI client with OCR endpoint (as per sample code)
388
- import asyncio
389
- loop = asyncio.get_event_loop()
390
-
391
- # Run the synchronous OpenAI call in executor
392
- response = await loop.run_in_executor(
393
- None,
394
- lambda: ocr_client.chat.completions.create(
395
- model=OCR_MODEL_NAME,
396
- messages=[
397
- {
398
- "role": "user",
399
- "content": [
400
- {"type": "text", "text": "Extract all text from this image"},
401
- {
402
- "type": "image_url",
403
- "image_url": {
404
- "url": data_url
405
- }
406
- }
407
- ]
408
- }
409
- ],
410
- )
411
- )
412
-
413
- # Extract text from response
414
- extracted_text = response.choices[0].message.content
415
-
416
- if not extracted_text:
417
- extracted_text = ""
418
-
419
- print(f"[INFO] OCR: Extracted {len(extracted_text)} characters from page {page_num}")
420
-
421
- # Calculate confidence based on response quality
422
- confidence = _calculate_ocr_confidence(response, extracted_text)
423
-
424
- # Return text in full_text, keep fields empty for now
425
- return {
426
- "doc_type": "other",
427
- "confidence": confidence,
428
- "full_text": extracted_text,
429
- "fields": {} # Keep fields empty for now
430
- }
431
-
432
- except Exception as e:
433
- error_msg = str(e)
434
- print(f"[ERROR] OCR API error for page {page_num}: {error_msg}")
435
- raise RuntimeError(f"OCR API error for page {page_num}: {error_msg}")
436
-
437
-
438
- def _calculate_ocr_confidence(response, extracted_text: str) -> float:
439
- """
440
- Calculate confidence score based on OCR response quality.
441
- Checks for explicit confidence in response, or calculates based on heuristics.
442
- """
443
- # Check if response has explicit confidence score
444
- try:
445
- # Check response object for confidence-related fields
446
- if hasattr(response, 'usage'):
447
- # Some models provide usage info that might indicate quality
448
- usage = response.usage
449
- if hasattr(usage, 'completion_tokens') and usage.completion_tokens > 0:
450
- # More tokens might indicate better extraction
451
- pass
452
-
453
- # Check if finish_reason indicates quality
454
- if hasattr(response.choices[0], 'finish_reason'):
455
- finish_reason = response.choices[0].finish_reason
456
- if finish_reason == "stop":
457
- # Normal completion - good sign
458
- base_confidence = 85.0
459
- elif finish_reason == "length":
460
- # Response was truncated - lower confidence
461
- base_confidence = 70.0
462
- else:
463
- base_confidence = 75.0
464
- else:
465
- base_confidence = 85.0
466
- except Exception:
467
- base_confidence = 85.0
468
-
469
- # Adjust confidence based on text quality heuristics
470
- text_length = len(extracted_text.strip())
471
-
472
- if text_length == 0:
473
- return 0.0
474
- elif text_length < 10:
475
- # Very short text - might be error or empty
476
- return max(30.0, base_confidence - 30.0)
477
- elif text_length < 50:
478
- # Short text
479
- return max(50.0, base_confidence - 15.0)
480
- elif text_length > 1000:
481
- # Long text - likely good extraction
482
- confidence = min(95.0, base_confidence + 10.0)
483
- else:
484
- confidence = base_confidence
485
-
486
- # Check for structured content (tables, etc.) - indicates good extraction
487
- if '|' in extracted_text and extracted_text.count('|') > 5:
488
- # Table detected - boost confidence
489
- confidence = min(95.0, confidence + 5.0)
490
-
491
- # Check for meaningful content (non-whitespace ratio)
492
- non_whitespace = len([c for c in extracted_text if not c.isspace()])
493
- if text_length > 0:
494
- content_ratio = non_whitespace / text_length
495
- if content_ratio > 0.8:
496
- # High content ratio - good
497
- confidence = min(95.0, confidence + 3.0)
498
- elif content_ratio < 0.3:
499
- # Low content ratio - mostly whitespace
500
- confidence = max(50.0, confidence - 10.0)
501
-
502
- return round(confidence, 1)
503
-
504
-
505
- async def extract_fields_from_document(
506
- file_bytes: bytes,
507
- content_type: str,
508
- filename: str,
509
- ) -> Dict[str, Any]:
510
- """
511
- Extract text from document using OCR model.
512
- Processes pages separately for better reliability.
513
- Returns text output in full_text, keeps JSON/XML fields empty for now.
514
- """
515
- # Get raw image bytes for processing
516
- if content_type == "application/pdf" or content_type.endswith("/pdf"):
517
- if not PDF_SUPPORT:
518
- raise RuntimeError("PDF support requires PyMuPDF. Please install it.")
519
- # For PDFs, convert to images
520
- pdf_images = _pdf_to_images(file_bytes)
521
- image_bytes_list = pdf_images
522
- else:
523
- # For regular images, process the file bytes
524
- # Convert to JPEG for consistency
525
- try:
526
- img = Image.open(BytesIO(file_bytes))
527
- if img.mode != "RGB":
528
- img = img.convert("RGB")
529
-
530
- # Resize if too large (max 1920px on longest side)
531
- max_size = 1920
532
- w, h = img.size
533
- if w > max_size or h > max_size:
534
- if w > h:
535
- new_w = max_size
536
- new_h = int(h * (max_size / w))
537
- else:
538
- new_h = max_size
539
- new_w = int(w * (max_size / h))
540
- img = img.resize((new_w, new_h), Image.LANCZOS)
541
- print(f"[INFO] Resized image from {w}x{h} to {new_w}x{new_h}")
542
-
543
- # Convert to JPEG bytes
544
- img_bytes = BytesIO()
545
- img.save(img_bytes, format="JPEG", quality=95)
546
- image_bytes_list = [img_bytes.getvalue()]
547
- except Exception as e:
548
- # Fallback: use original file bytes
549
- print(f"[WARNING] Could not process image with PIL: {e}. Using original bytes.")
550
- image_bytes_list = [file_bytes]
551
-
552
- total_pages = len(image_bytes_list)
553
- print(f"[INFO] Processing {total_pages} page(s) with OCR model...")
554
-
555
- # Process each page separately
556
- page_results = []
557
- for page_num, img_bytes in enumerate(image_bytes_list):
558
- print(f"[INFO] Processing page {page_num + 1}/{total_pages}...")
559
- try:
560
- page_result = await _extract_text_with_ocr(img_bytes, page_num + 1, total_pages)
561
- page_results.append({
562
- "page_number": page_num + 1,
563
- "text": page_result.get("full_text", ""),
564
- "fields": page_result.get("fields", {}),
565
- "confidence": page_result.get("confidence", 0),
566
- "doc_type": page_result.get("doc_type", "other"),
567
- })
568
- print(f"[INFO] Page {page_num + 1} processed successfully")
569
- except Exception as e:
570
- print(f"[ERROR] Failed to process page {page_num + 1}: {e}")
571
- page_results.append({
572
- "page_number": page_num + 1,
573
- "text": "",
574
- "fields": {},
575
- "confidence": 0,
576
- "error": str(e)
577
- })
578
-
579
- # Combine results from all pages
580
- combined_full_text = "\n\n".join([f"=== PAGE {p['page_number']} ===\n\n{p['text']}" for p in page_results if p.get("text")])
581
-
582
- # Parse each page for tables and structure the output
583
- structured_pages = {}
584
- for page_result in page_results:
585
- if page_result.get("text"):
586
- page_num = page_result.get("page_number", 1)
587
- page_text = page_result.get("text", "")
588
-
589
- # Parse text for tables and structure
590
- parsed_data = _parse_text_with_tables(page_text)
591
-
592
- # Build structured page output
593
- page_key = f"page_{page_num}"
594
- structured_pages[page_key] = {
595
- "text": parsed_data["text"],
596
- "metadata": parsed_data["metadata"],
597
- "table": parsed_data["table"],
598
- "footer_notes": parsed_data["footer_notes"],
599
- "confidence": page_result.get("confidence", 0),
600
- "doc_type": page_result.get("doc_type", "other")
601
- }
602
-
603
- # If we have structured pages, use them; otherwise keep fields empty
604
- if structured_pages:
605
- # Always return pages with page_X keys (even for single page)
606
- combined_fields = structured_pages
607
- else:
608
- combined_fields = {}
609
-
610
- # Calculate average confidence
611
- confidences = [p.get("confidence", 0) for p in page_results if p.get("confidence", 0) > 0]
612
- avg_confidence = sum(confidences) / len(confidences) if confidences else 0
613
-
614
- # Determine doc_type from first successful page
615
- doc_type = "other"
616
- for page_result in page_results:
617
- if page_result.get("doc_type") and page_result["doc_type"] != "other":
618
- doc_type = page_result["doc_type"]
619
- break
620
-
621
- return {
622
- "doc_type": doc_type,
623
- "confidence": avg_confidence,
624
- "full_text": combined_full_text,
625
- "fields": combined_fields, # Now contains structured data with tables
626
- "pages": page_results
627
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import json
4
+ import re
5
+ import time
6
+ import asyncio
7
+ from io import BytesIO
8
+ from typing import Any, Dict, List, Optional, Tuple
9
+ import httpx
10
+
11
+ try:
12
+ import fitz # PyMuPDF
13
+ from PIL import Image
14
+ PDF_SUPPORT = True
15
+ except ImportError as e:
16
+ PDF_SUPPORT = False
17
+ print(f"[WARNING] PDF support libraries not available: {e}. PDF conversion will not work.")
18
+
19
+
20
+ # RunPod Serverless OCR Configuration
21
+ RUNPOD_ENDPOINT = os.environ.get("RUNPOD_ENDPOINT", "https://api.runpod.ai/v2/j2jvf8t6n0rk5c/run")
22
+ RUNPOD_API_KEY = os.environ.get("RUNPOD_API_KEY", "rpa_0UJOK33ZO7SID9B3ASFSKKPUHNPBQC5Z2128RB4O4qi9ts")
23
+
24
+ # Extract endpoint ID from endpoint URL for status polling
25
+ # URL format: https://api.runpod.ai/v2/{endpoint_id}/run
26
+ _endpoint_id = RUNPOD_ENDPOINT.split("/v2/")[1].split("/")[0] if "/v2/" in RUNPOD_ENDPOINT else None
27
+ RUNPOD_STATUS_ENDPOINT = f"https://api.runpod.ai/v2/{_endpoint_id}/status" if _endpoint_id else None
28
+
29
+
30
+ def _pdf_to_images(pdf_bytes: bytes) -> List[bytes]:
31
+ """
32
+ Convert PDF pages to PNG images.
33
+ Returns a list of PNG image bytes, one per page.
34
+ """
35
+ if not PDF_SUPPORT:
36
+ raise RuntimeError("PyMuPDF not installed. Cannot convert PDF to images.")
37
+
38
+ pdf_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
39
+ images = []
40
+
41
+ print(f"[INFO] PDF has {len(pdf_doc)} page(s)")
42
+
43
+ for page_num in range(len(pdf_doc)):
44
+ page = pdf_doc[page_num]
45
+ # Render page to image (zoom factor 2 for better quality)
46
+ mat = fitz.Matrix(2.0, 2.0) # 2x zoom for better quality
47
+ pix = page.get_pixmap(matrix=mat)
48
+
49
+ # Convert to PIL Image
50
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
51
+
52
+ # Resize if too large to avoid GPU memory issues (max 1920px on longest side)
53
+ max_size = 1920
54
+ w, h = img.size
55
+ if w > max_size or h > max_size:
56
+ if w > h:
57
+ new_w = max_size
58
+ new_h = int(h * (max_size / w))
59
+ else:
60
+ new_h = max_size
61
+ new_w = int(w * (max_size / h))
62
+ img = img.resize((new_w, new_h), Image.LANCZOS)
63
+ print(f"[INFO] Resized page {page_num + 1} from {w}x{h} to {new_w}x{new_h}")
64
+ else:
65
+ print(f"[INFO] Converted page {page_num + 1} to image ({w}x{h})")
66
+
67
+ # Convert to JPEG bytes (better compression)
68
+ img_bytes = BytesIO()
69
+ img.save(img_bytes, format="JPEG", quality=95)
70
+ images.append(img_bytes.getvalue())
71
+
72
+ pdf_doc.close()
73
+ return images
74
+
75
+
76
+ def _image_bytes_to_base64(image_bytes: bytes) -> str:
77
+ """Convert image bytes to base64 data URL (JPEG format)."""
78
+ b64 = base64.b64encode(image_bytes).decode("utf-8")
79
+ data_url = f"data:image/jpeg;base64,{b64}"
80
+ print(f"[DEBUG] Base64 encoded image: {len(image_bytes)} bytes -> {len(data_url)} chars")
81
+ return data_url
82
+
83
+
84
+ def _parse_markdown_table(text: str) -> Optional[Tuple[List[str], List[List[str]]]]:
85
+ """
86
+ Parse a markdown table from text.
87
+ Returns (headers, rows) if table found, None otherwise.
88
+ Handles various table formats including malformed ones.
89
+ """
90
+ lines = [line.strip() for line in text.split('\n')]
91
+
92
+ # Find potential table start (line with multiple | and actual text content)
93
+ table_start = None
94
+ for i, line in enumerate(lines):
95
+ if '|' in line and line.count('|') >= 2:
96
+ # Skip separator lines (only |, -, :, spaces)
97
+ if re.match(r'^[\s\|\-:]+$', line):
98
+ continue
99
+ # Check if line has meaningful text (not just | characters)
100
+ cells = [cell.strip() for cell in line.split('|')]
101
+ if cells and not cells[0]:
102
+ cells = cells[1:]
103
+ if cells and not cells[-1]:
104
+ cells = cells[:-1]
105
+ # Must have at least 2 columns with some text
106
+ meaningful_cells = [c for c in cells if len(c) > 0]
107
+ if len(meaningful_cells) >= 2:
108
+ table_start = i
109
+ break
110
+
111
+ if table_start is None:
112
+ return None
113
+
114
+ # Find table end (first non-empty line without | after table start)
115
+ table_end = None
116
+ for i in range(table_start + 1, len(lines)):
117
+ line = lines[i]
118
+ if not line: # Empty line, continue
119
+ continue
120
+ if '|' not in line:
121
+ # Non-empty line without | means table ended
122
+ table_end = i
123
+ break
124
+
125
+ if table_end is None:
126
+ table_end = len(lines)
127
+
128
+ table_lines = lines[table_start:table_end]
129
+
130
+ # Find the actual header row (should have meaningful text, not just | or separators)
131
+ headers = None
132
+ header_idx = None
133
+
134
+ for i, line in enumerate(table_lines):
135
+ if not line or '|' not in line:
136
+ continue
137
+
138
+ # Skip separator lines (lines with only |, -, :, spaces)
139
+ if re.match(r'^[\s\|\-:]+$', line):
140
+ continue
141
+
142
+ # Check if this line has meaningful content (not just | characters)
143
+ cells = [cell.strip() for cell in line.split('|')]
144
+ # Remove empty cells at start/end
145
+ if cells and not cells[0]:
146
+ cells = cells[1:]
147
+ if cells and not cells[-1]:
148
+ cells = cells[:-1]
149
+
150
+ # Header should have at least 3 columns and meaningful text
151
+ if len(cells) >= 3:
152
+ # Check if cells have actual text (not just empty or single char)
153
+ meaningful_cells = [c for c in cells if len(c) > 1]
154
+ if len(meaningful_cells) >= 3:
155
+ headers = cells
156
+ header_idx = i
157
+ break
158
+
159
+ if not headers or header_idx is None:
160
+ return None
161
+
162
+ # Parse data rows (skip separator line after header if present)
163
+ rows = []
164
+ num_columns = len(headers)
165
+
166
+ for i in range(header_idx + 1, len(table_lines)):
167
+ line = table_lines[i]
168
+
169
+ if not line:
170
+ continue
171
+
172
+ # Skip separator lines
173
+ if re.match(r'^[\s\|\-:]+$', line):
174
+ continue
175
+
176
+ if '|' not in line:
177
+ # No more table rows
178
+ break
179
+
180
+ cells = [cell.strip() for cell in line.split('|')]
181
+ # Remove empty cells at start/end
182
+ if cells and not cells[0]:
183
+ cells = cells[1:]
184
+ if cells and not cells[-1]:
185
+ cells = cells[:-1]
186
+
187
+ # Only add rows that match header column count (allow some flexibility)
188
+ if len(cells) == num_columns or (len(cells) >= num_columns - 1 and len(cells) <= num_columns + 1):
189
+ # Pad or trim to match header count
190
+ if len(cells) < num_columns:
191
+ cells.extend([''] * (num_columns - len(cells)))
192
+ elif len(cells) > num_columns:
193
+ cells = cells[:num_columns]
194
+
195
+ # Only add if row has at least one non-empty cell
196
+ if any(cell for cell in cells):
197
+ rows.append(cells)
198
+
199
+ if not rows:
200
+ return None
201
+
202
+ return (headers, rows)
203
+
204
+
205
+ def _extract_metadata(text: str) -> Dict[str, str]:
206
+ """
207
+ Extract metadata from document header text.
208
+ Looks for title, office, notice number, and description.
209
+ """
210
+ metadata = {
211
+ "title": "",
212
+ "office": "",
213
+ "notice_no": "",
214
+ "description": ""
215
+ }
216
+
217
+ lines = [line.strip() for line in text.split('\n') if line.strip()]
218
+
219
+ # Extract office (usually first non-empty line)
220
+ if lines:
221
+ metadata["office"] = lines[0]
222
+
223
+ # Look for notice number pattern (like "पत्रक सं- 1239" or "सं- 1239")
224
+ notice_pattern = r'(?:पत्रक\s+)?सं[-\s:]*(\d+)'
225
+ for line in lines[:10]: # Check first 10 lines
226
+ match = re.search(notice_pattern, line)
227
+ if match:
228
+ metadata["notice_no"] = match.group(1)
229
+ break
230
+
231
+ # Look for title - usually in quotes or contains specific keywords
232
+ # Check for quoted text first
233
+ quoted_title = re.search(r'["""]([^"""]+)["""]', text[:1000])
234
+ if quoted_title:
235
+ metadata["title"] = quoted_title.group(1).strip()
236
+ else:
237
+ # Look for title patterns
238
+ title_keywords = ['सम्पत्ति', 'सूचना', 'विज्ञप्ति', 'नाम परिवर्तन']
239
+ for line in lines[:5]:
240
+ if any(keyword in line for keyword in title_keywords):
241
+ # Extract the title phrase
242
+ title_match = re.search(r'(सम्पत्ति[^।]*|सूचना[^।]*|विज्ञप्ति[^।]*)', line)
243
+ if title_match:
244
+ metadata["title"] = title_match.group(1).strip()
245
+ break
246
+
247
+ # Extract description (text before table, usually contains key phrases)
248
+ description_keywords = ['नाम परिवर्तन', 'अधिनियम', 'धारा', 'प्रकाशन', 'आवेदन']
249
+ description_parts = []
250
+ for i, line in enumerate(lines[:15]): # Check first 15 lines
251
+ if any(keyword in line for keyword in description_keywords):
252
+ description_parts.append(line)
253
+ # Get a few surrounding lines for context
254
+ if i > 0:
255
+ description_parts.insert(0, lines[i-1])
256
+ if i < len(lines) - 1:
257
+ description_parts.append(lines[i+1])
258
+ break
259
+
260
+ if description_parts:
261
+ description = ' '.join(description_parts).strip()
262
+ if len(description) > 30: # Only if substantial
263
+ # Clean up and limit length
264
+ description = re.sub(r'\s+', ' ', description)
265
+ metadata["description"] = description[:300] # Limit length
266
+
267
+ return metadata
268
+
269
+
270
+ def _parse_model_response(response_text: str) -> Tuple[str, Dict[str, Any]]:
271
+ """
272
+ Parse model response to extract text and metadata.
273
+ The model may return text and metadata in various formats.
274
+ Returns: (extracted_text, metadata_dict)
275
+ """
276
+ metadata = {}
277
+ text = response_text
278
+
279
+ # Try to find JSON metadata section
280
+ # Look for METADATA: or metadata: section
281
+ metadata_patterns = [
282
+ r'METADATA:\s*\n?\s*({.*?})(?:\n\n|\nTEXT|$)',
283
+ r'metadata:\s*\n?\s*({.*?})(?:\n\n|\nTEXT|$)',
284
+ r'METADATA:\s*\n?\s*```json\s*({.*?})\s*```',
285
+ r'METADATA:\s*\n?\s*```\s*({.*?})\s*```',
286
+ ]
287
+
288
+ for pattern in metadata_patterns:
289
+ match = re.search(pattern, response_text, re.DOTALL | re.IGNORECASE)
290
+ if match:
291
+ try:
292
+ metadata_json = match.group(1).strip()
293
+ metadata = json.loads(metadata_json)
294
+ # Remove metadata section from text
295
+ text = response_text[:match.start()] + response_text[match.end():]
296
+ break
297
+ except (json.JSONDecodeError, IndexError):
298
+ continue
299
+
300
+ # If no JSON found, try to extract metadata from structured text format
301
+ if not metadata:
302
+ # Look for key-value pairs in METADATA section
303
+ metadata_section = re.search(r'METADATA:\s*\n(.*?)(?:\n\n|\nTEXT|$)', response_text, re.DOTALL | re.IGNORECASE)
304
+ if metadata_section:
305
+ metadata_text = metadata_section.group(1)
306
+ # Parse key-value pairs
307
+ for line in metadata_text.split('\n'):
308
+ if ':' in line:
309
+ parts = line.split(':', 1)
310
+ if len(parts) == 2:
311
+ key = parts[0].strip().lower().replace(' ', '_')
312
+ value = parts[1].strip()
313
+ if value:
314
+ metadata[key] = value
315
+
316
+ # Extract TEXT section if present
317
+ text_match = re.search(r'TEXT:\s*\n(.*?)(?:\n\nMETADATA|$)', response_text, re.DOTALL | re.IGNORECASE)
318
+ if text_match:
319
+ text = text_match.group(1).strip()
320
+ else:
321
+ # If no TEXT section, remove METADATA section if found
322
+ text = re.sub(r'METADATA:.*', '', response_text, flags=re.DOTALL | re.IGNORECASE).strip()
323
+
324
+ # Clean up text
325
+ text = text.strip()
326
+
327
+ # Clean up metadata - remove empty values
328
+ metadata = {k: v for k, v in metadata.items() if v and str(v).strip()}
329
+
330
+ return text, metadata
331
+
332
+
333
+ def _extract_footer_notes(text: str) -> List[str]:
334
+ """
335
+ Extract footer notes from document.
336
+ Usually appears after the table.
337
+ """
338
+ notes = []
339
+
340
+ # Find table end
341
+ lines = text.split('\n')
342
+ table_end_idx = len(lines)
343
+
344
+ for i, line in enumerate(lines):
345
+ if '|' in line:
346
+ # Find last table line
347
+ j = i + 1
348
+ while j < len(lines) and ('|' in lines[j] or re.match(r'^[\s\|\-:]+$', lines[j])):
349
+ j += 1
350
+ table_end_idx = j
351
+ break
352
+
353
+ # Extract footer text (after table)
354
+ footer_lines = lines[table_end_idx:]
355
+ footer_text = '\n'.join(footer_lines).strip()
356
+
357
+ # Split into sentences/notes
358
+ # Look for sentences ending with period, exclamation, or specific keywords
359
+ sentences = re.split(r'[।\.!]\s+', footer_text)
360
+
361
+ for sentence in sentences:
362
+ sentence = sentence.strip()
363
+ if len(sentence) > 20: # Only substantial notes
364
+ # Clean up
365
+ sentence = re.sub(r'\s+', ' ', sentence)
366
+ if sentence:
367
+ notes.append(sentence)
368
+
369
+ # Limit to most relevant notes (usually 2-4)
370
+ return notes[:5]
371
+
372
+
373
+ def _parse_text_with_tables(text: str, page_metadata: Dict[str, Any] = None) -> Dict[str, Any]:
374
+ """
375
+ Parse text and extract structured data including tables.
376
+ Uses model-extracted metadata if provided, otherwise falls back to basic extraction.
377
+ Returns structured JSON format with metadata, table, and footer_notes.
378
+ """
379
+ result = {
380
+ "text": text, # Keep original text
381
+ "metadata": page_metadata if page_metadata else {},
382
+ "table": [],
383
+ "footer_notes": []
384
+ }
385
+
386
+ # Check if text contains a table
387
+ table_data = _parse_markdown_table(text)
388
+
389
+ if table_data:
390
+ headers, rows = table_data
391
+ print(f"[INFO] Found table with {len(headers)} columns and {len(rows)} rows")
392
+
393
+ # Use provided metadata or extract basic metadata as fallback
394
+ if not result["metadata"]:
395
+ result["metadata"] = _extract_metadata(text)
396
+
397
+ # Map headers to field names using original header text
398
+ # Keep original language, just make valid JSON keys and handle duplicates
399
+ header_mapping = {}
400
+ header_counts = {} # Track occurrences of each header
401
+
402
+ for i, header in enumerate(headers):
403
+ header_clean = header.strip()
404
+
405
+ # Create a valid JSON key from the original header
406
+ # Remove special characters that aren't valid in JSON keys, but keep the text
407
+ # Replace spaces and special chars with underscores, but preserve the original text
408
+ header_key = header_clean
409
+
410
+ # Track how many times we've seen this exact header
411
+ if header_key not in header_counts:
412
+ header_counts[header_key] = 0
413
+
414
+ header_counts[header_key] += 1
415
+
416
+ # If this header appears multiple times, append a number
417
+ if header_counts[header_key] > 1:
418
+ header_key = f"{header_key}_{header_counts[header_key]}"
419
+
420
+ # Clean the key to be valid for JSON (remove/replace problematic characters)
421
+ # Keep the original text but make it JSON-safe
422
+ header_key = re.sub(r'[^\w\s\u0900-\u097F]', '', header_key) # Keep Unicode Hindi chars
423
+ header_key = re.sub(r'\s+', '_', header_key) # Replace spaces with underscores
424
+
425
+ # If key is empty after cleaning, use column index
426
+ if not header_key:
427
+ header_key = f"column_{i+1}"
428
+
429
+ header_mapping[i] = header_key
430
+
431
+ # Parse table rows - each row becomes a separate section
432
+ table_rows_dict = {}
433
+ for idx, row in enumerate(rows, start=1):
434
+ row_dict = {}
435
+ for i, header_idx in header_mapping.items():
436
+ if i < len(row):
437
+ row_dict[header_idx] = row[i].strip()
438
+
439
+ if row_dict:
440
+ # Each row is a separate section: row_1, row_2, etc.
441
+ table_rows_dict[f"row_{idx}"] = row_dict
442
+
443
+ # Store rows as separate sections instead of array
444
+ result["table"] = table_rows_dict
445
+
446
+ # Extract footer notes
447
+ result["footer_notes"] = _extract_footer_notes(text)
448
+ else:
449
+ # No table found, just extract basic metadata
450
+ result["metadata"] = _extract_metadata(text)
451
+ result["footer_notes"] = _extract_footer_notes(text)
452
+
453
+ return result
454
+
455
+
456
+ async def _poll_runpod_job(job_id: str, client: httpx.AsyncClient, max_wait_time: int = 300) -> Dict[str, Any]:
457
+ """
458
+ Poll RunPod job status until completion.
459
+ Returns the final job result with output.
460
+ """
461
+ headers = {
462
+ "Content-Type": "application/json",
463
+ "Authorization": f"Bearer {RUNPOD_API_KEY}"
464
+ }
465
+
466
+ start_time = time.time()
467
+ poll_interval = 2 # Poll every 2 seconds
468
+
469
+ while True:
470
+ # Check timeout
471
+ elapsed = time.time() - start_time
472
+ if elapsed > max_wait_time:
473
+ raise RuntimeError(f"Job {job_id} timed out after {max_wait_time} seconds")
474
+
475
+ # Poll job status
476
+ status_url = f"{RUNPOD_STATUS_ENDPOINT}/{job_id}"
477
+ response = await client.get(status_url, headers=headers)
478
+ response.raise_for_status()
479
+ status_result = response.json()
480
+
481
+ status = status_result.get("status", "").upper()
482
+
483
+ if status == "COMPLETED":
484
+ print(f"[INFO] Job {job_id} completed successfully")
485
+ return status_result
486
+ elif status == "FAILED":
487
+ error_msg = status_result.get("error", "Unknown error")
488
+ raise RuntimeError(f"Job {job_id} failed: {error_msg}")
489
+ elif status in ["IN_QUEUE", "IN_PROGRESS"]:
490
+ print(f"[INFO] Job {job_id} status: {status}, waiting...")
491
+ await asyncio.sleep(poll_interval)
492
+ else:
493
+ # Unknown status, wait and retry
494
+ print(f"[INFO] Job {job_id} status: {status}, waiting...")
495
+ await asyncio.sleep(poll_interval)
496
+
497
+
498
+ async def _extract_text_with_ocr(image_bytes: bytes, page_num: int, total_pages: int, custom_prompt: str = None) -> Dict[str, Any]:
499
+ """
500
+ Extract text and metadata from a single page/image using the RunPod serverless OCR model.
501
+ Uses model-driven extraction to identify and extract metadata fields dynamically.
502
+ Returns text output in full_text field and extracted metadata.
503
+
504
+ Args:
505
+ image_bytes: Image bytes to process
506
+ page_num: Page number
507
+ total_pages: Total number of pages
508
+ custom_prompt: Optional custom prompt for field extraction
509
+ """
510
+ # Convert image bytes to base64
511
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
512
+
513
+ print(f"[INFO] OCR: Processing page {page_num}/{total_pages} with RunPod endpoint")
514
+
515
+ try:
516
+ # Use custom prompt if provided, otherwise use default
517
+ if custom_prompt:
518
+ metadata_prompt = custom_prompt
519
+ else:
520
+ # Default prompt for general text extraction
521
+ metadata_prompt = """Extract all text from this image."""
522
+
523
+ # Prepare request payload for RunPod
524
+ # RunPod serverless endpoints expect image_base64, image_url, or image_path
525
+ payload = {
526
+ "input": {
527
+ "prompt": metadata_prompt,
528
+ "image_base64": image_base64 # Base64 encoded image
529
+ }
530
+ }
531
+
532
+ # Make HTTP request to RunPod endpoint
533
+ headers = {
534
+ "Content-Type": "application/json",
535
+ "Authorization": f"Bearer {RUNPOD_API_KEY}"
536
+ }
537
+
538
+ async with httpx.AsyncClient(timeout=300.0) as client:
539
+ # Submit job
540
+ response = await client.post(
541
+ RUNPOD_ENDPOINT,
542
+ headers=headers,
543
+ json=payload
544
+ )
545
+ response.raise_for_status()
546
+ result = response.json()
547
+
548
+ # Check if this is an async job (has job ID and status)
549
+ job_id = result.get("id")
550
+ status = result.get("status", "").upper()
551
+
552
+ if job_id and status in ["IN_QUEUE", "IN_PROGRESS"]:
553
+ # This is an async job, need to poll for completion
554
+ print(f"[INFO] Job submitted with ID: {job_id}, status: {status}")
555
+ if not RUNPOD_STATUS_ENDPOINT:
556
+ raise RuntimeError("RunPod status endpoint not configured. Cannot poll async job.")
557
+
558
+ # Poll until completion
559
+ result = await _poll_runpod_job(job_id, client)
560
+
561
+ # Extract text from RunPod response
562
+ # RunPod serverless typically returns: {"id": "...", "status": "...", "output": "..."}
563
+ # The output might be a string or a dict depending on the model
564
+ extracted_text = ""
565
+
566
+ if "output" in result:
567
+ output = result["output"]
568
+ if isinstance(output, str):
569
+ extracted_text = output
570
+ elif isinstance(output, dict):
571
+ # If output is a dict, try common fields
572
+ extracted_text = output.get("text", output.get("result", output.get("content", "")))
573
+ if not extracted_text and isinstance(output.get("text"), str):
574
+ extracted_text = output["text"]
575
+ elif isinstance(output, list) and len(output) > 0:
576
+ # If output is a list, take the first element
577
+ extracted_text = str(output[0])
578
+ elif "result" in result:
579
+ extracted_text = str(result["result"])
580
+ elif "text" in result:
581
+ extracted_text = str(result["text"])
582
+ else:
583
+ # Fallback: convert entire response to string
584
+ extracted_text = str(result)
585
+
586
+ if not extracted_text:
587
+ extracted_text = ""
588
+
589
+ print(f"[INFO] OCR: Extracted {len(extracted_text)} characters from page {page_num}")
590
+
591
+ # Parse model response to extract text and metadata
592
+ parsed_text, parsed_metadata = _parse_model_response(extracted_text)
593
+
594
+ # Calculate confidence based on response quality
595
+ # Create a mock response object for compatibility with confidence calculation
596
+ mock_response = type('obj', (object,), {
597
+ 'choices': [type('obj', (object,), {'finish_reason': 'stop'})()],
598
+ 'usage': type('obj', (object,), {'completion_tokens': len(parsed_text.split())})()
599
+ })()
600
+ confidence = _calculate_ocr_confidence(mock_response, parsed_text)
601
+
602
+ # Determine document type from metadata if available
603
+ doc_type = parsed_metadata.get("document_type", "other")
604
+ if doc_type == "other" and parsed_metadata.get("title"):
605
+ # Try to infer from title
606
+ title_lower = parsed_metadata.get("title", "").lower()
607
+ if any(kw in title_lower for kw in ["tender", "bid", "quotation"]):
608
+ doc_type = "tender"
609
+ elif any(kw in title_lower for kw in ["recruitment", "appointment", "vacancy"]):
610
+ doc_type = "recruitment"
611
+ elif any(kw in title_lower for kw in ["notice", "notification", "circular"]):
612
+ doc_type = "notice"
613
+
614
+ # Return text and extracted metadata
615
+ return {
616
+ "doc_type": doc_type,
617
+ "confidence": confidence,
618
+ "full_text": parsed_text,
619
+ "fields": parsed_metadata if parsed_metadata else {} # Model-extracted metadata
620
+ }
621
+
622
+ except httpx.HTTPStatusError as e:
623
+ error_msg = f"HTTP {e.response.status_code}: {e.response.text}"
624
+ print(f"[ERROR] OCR API HTTP error for page {page_num}: {error_msg}")
625
+ raise RuntimeError(f"OCR API error for page {page_num}: {error_msg}")
626
+ except Exception as e:
627
+ error_msg = str(e)
628
+ print(f"[ERROR] OCR API error for page {page_num}: {error_msg}")
629
+ raise RuntimeError(f"OCR API error for page {page_num}: {error_msg}")
630
+
631
+
632
+ def _calculate_ocr_confidence(response, extracted_text: str) -> float:
633
+ """
634
+ Calculate confidence score based on OCR response quality.
635
+ Returns a score from 0-100, with higher scores for better extraction quality.
636
+ """
637
+ # Start with a higher base confidence for successful extractions
638
+ base_confidence = 92.0
639
+
640
+ # Adjust confidence based on text quality heuristics
641
+ text_length = len(extracted_text.strip())
642
+
643
+ if text_length == 0:
644
+ return 0.0
645
+ elif text_length < 10:
646
+ # Very short text - might be error or empty
647
+ return max(30.0, base_confidence - 40.0)
648
+ elif text_length < 50:
649
+ # Short text - might be incomplete
650
+ return max(60.0, base_confidence - 20.0)
651
+ elif text_length > 1000:
652
+ # Long text - likely good extraction
653
+ confidence = min(100.0, base_confidence + 5.0)
654
+ elif text_length > 500:
655
+ # Medium-long text - good extraction
656
+ confidence = min(100.0, base_confidence + 3.0)
657
+ else:
658
+ confidence = base_confidence
659
+
660
+ # Check for structured content (tables, etc.) - indicates good extraction
661
+ if '|' in extracted_text and extracted_text.count('|') > 5:
662
+ # Table detected - boost confidence significantly
663
+ confidence = min(100.0, confidence + 6.0)
664
+
665
+ # Check for meaningful content (non-whitespace ratio)
666
+ non_whitespace = len([c for c in extracted_text if not c.isspace()])
667
+ if text_length > 0:
668
+ content_ratio = non_whitespace / text_length
669
+ if content_ratio > 0.85:
670
+ # Very high content ratio - excellent extraction
671
+ confidence = min(100.0, confidence + 5.0)
672
+ elif content_ratio > 0.75:
673
+ # High content ratio - good extraction
674
+ confidence = min(100.0, confidence + 3.0)
675
+ elif content_ratio > 0.6:
676
+ # Moderate content ratio - decent extraction
677
+ confidence = min(100.0, confidence + 1.0)
678
+ elif content_ratio < 0.3:
679
+ # Low content ratio - mostly whitespace
680
+ confidence = max(60.0, confidence - 15.0)
681
+
682
+ # Check for common OCR quality indicators
683
+ # Presence of numbers, dates, and structured patterns indicates good extraction
684
+ has_numbers = any(c.isdigit() for c in extracted_text)
685
+ has_letters = any(c.isalpha() for c in extracted_text)
686
+ has_punctuation = any(c in '.,;:!?()[]{}' for c in extracted_text)
687
+
688
+ if has_numbers and has_letters and has_punctuation:
689
+ # Well-structured text with mixed content - high confidence
690
+ confidence = min(100.0, confidence + 2.0)
691
+
692
+ # Cap at 100% and ensure minimum quality threshold
693
+ return round(min(100.0, max(0.0, confidence)), 1)
694
+
695
+
696
+ async def extract_fields_from_document(
697
+ file_bytes: bytes,
698
+ content_type: str,
699
+ filename: str,
700
+ key_fields: str = None,
701
+ ) -> Dict[str, Any]:
702
+ """
703
+ Extract text from document using OCR model.
704
+ Processes pages separately for better reliability.
705
+ Returns text output in full_text, keeps JSON/XML fields empty for now.
706
+ """
707
+ # Get raw image bytes for processing
708
+ if content_type == "application/pdf" or content_type.endswith("/pdf"):
709
+ if not PDF_SUPPORT:
710
+ raise RuntimeError("PDF support requires PyMuPDF. Please install it.")
711
+ # For PDFs, convert to images
712
+ pdf_images = _pdf_to_images(file_bytes)
713
+ image_bytes_list = pdf_images
714
+ else:
715
+ # For regular images, process the file bytes
716
+ # Convert to JPEG for consistency
717
+ try:
718
+ img = Image.open(BytesIO(file_bytes))
719
+ if img.mode != "RGB":
720
+ img = img.convert("RGB")
721
+
722
+ # Resize if too large (max 1920px on longest side)
723
+ max_size = 1920
724
+ w, h = img.size
725
+ if w > max_size or h > max_size:
726
+ if w > h:
727
+ new_w = max_size
728
+ new_h = int(h * (max_size / w))
729
+ else:
730
+ new_h = max_size
731
+ new_w = int(w * (max_size / h))
732
+ img = img.resize((new_w, new_h), Image.LANCZOS)
733
+ print(f"[INFO] Resized image from {w}x{h} to {new_w}x{new_h}")
734
+
735
+ # Convert to JPEG bytes
736
+ img_bytes = BytesIO()
737
+ img.save(img_bytes, format="JPEG", quality=95)
738
+ image_bytes_list = [img_bytes.getvalue()]
739
+ except Exception as e:
740
+ # Fallback: use original file bytes
741
+ print(f"[WARNING] Could not process image with PIL: {e}. Using original bytes.")
742
+ image_bytes_list = [file_bytes]
743
+
744
+ total_pages = len(image_bytes_list)
745
+ print(f"[INFO] Processing {total_pages} page(s) with OCR model...")
746
+
747
+ # Process each page separately
748
+ page_results = []
749
+ for page_num, img_bytes in enumerate(image_bytes_list):
750
+ print(f"[INFO] Processing page {page_num + 1}/{total_pages}...")
751
+ try:
752
+ page_result = await _extract_text_with_ocr(img_bytes, page_num + 1, total_pages, None)
753
+ page_results.append({
754
+ "page_number": page_num + 1,
755
+ "text": page_result.get("full_text", ""),
756
+ "fields": page_result.get("fields", {}),
757
+ "confidence": page_result.get("confidence", 0),
758
+ "doc_type": page_result.get("doc_type", "other"),
759
+ })
760
+ print(f"[INFO] Page {page_num + 1} processed successfully")
761
+ except Exception as e:
762
+ print(f"[ERROR] Failed to process page {page_num + 1}: {e}")
763
+ page_results.append({
764
+ "page_number": page_num + 1,
765
+ "text": "",
766
+ "fields": {},
767
+ "confidence": 0,
768
+ "error": str(e)
769
+ })
770
+
771
+ # Combine results from all pages
772
+ combined_full_text = "\n\n".join([f"=== PAGE {p['page_number']} ===\n\n{p['text']}" for p in page_results if p.get("text")])
773
+
774
+ # Extract user-specified fields if key_fields provided
775
+ extracted_fields = {}
776
+ if key_fields and key_fields.strip():
777
+ # Parse user input: "Invoice Number, Invoice Date, PO Number" -> ['Invoice Number', 'Invoice Date', 'PO Number']
778
+ field_list = [f.strip() for f in key_fields.split(',') if f.strip()]
779
+ if field_list:
780
+ print(f"[INFO] Extracting user-specified fields: {field_list}")
781
+
782
+ # Format fields as JSON array string for prompt
783
+ fields_json = json.dumps(field_list)
784
+ custom_prompt = f"Extract the following fields from this image and return as JSON: {fields_json}. Return only a valid JSON object with the field names as keys and their extracted values."
785
+
786
+ # Run second OCR pass on first page (usually has most metadata) with custom prompt
787
+ if image_bytes_list and len(image_bytes_list) > 0:
788
+ try:
789
+ print("[INFO] Running second OCR pass for field extraction...")
790
+ field_result = await _extract_text_with_ocr(image_bytes_list[0], 1, 1, custom_prompt)
791
+ field_text = field_result.get("full_text", "")
792
+
793
+ # Try to parse JSON from the response
794
+ try:
795
+ # Look for JSON in the response
796
+ json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', field_text, re.DOTALL)
797
+ if json_match:
798
+ extracted_fields = json.loads(json_match.group(0))
799
+ print(f"[INFO] Successfully extracted {len(extracted_fields)} fields from second OCR pass")
800
+ else:
801
+ # Try parsing the entire response as JSON
802
+ extracted_fields = json.loads(field_text)
803
+ print(f"[INFO] Successfully extracted {len(extracted_fields)} fields from second OCR pass")
804
+ except json.JSONDecodeError:
805
+ print(f"[WARNING] Could not parse JSON from field extraction response: {field_text[:200]}")
806
+ extracted_fields = {}
807
+ except Exception as e:
808
+ print(f"[WARNING] Field extraction failed: {e}")
809
+ extracted_fields = {}
810
+
811
+ # Parse each page for tables and structure the output
812
+ structured_pages = {}
813
+ for page_result in page_results:
814
+ if page_result.get("text"):
815
+ page_num = page_result.get("page_number", 1)
816
+ page_text = page_result.get("text", "")
817
+
818
+ # Parse text for tables and structure
819
+ parsed_data = _parse_text_with_tables(page_text, {})
820
+
821
+ # Build structured page output (without Fields - moved to root level)
822
+ page_key = f"page_{page_num}"
823
+ structured_pages[page_key] = {
824
+ "text": parsed_data["text"],
825
+ "table": parsed_data["table"],
826
+ "footer_notes": parsed_data["footer_notes"],
827
+ "confidence": page_result.get("confidence", 0),
828
+ "doc_type": page_result.get("doc_type", "other")
829
+ }
830
+
831
+ # If we have structured pages, use them; otherwise keep fields empty
832
+ if structured_pages:
833
+ # Always return pages with page_X keys (even for single page)
834
+ combined_fields = structured_pages
835
+ else:
836
+ combined_fields = {}
837
+
838
+ # Calculate average confidence
839
+ confidences = [p.get("confidence", 0) for p in page_results if p.get("confidence", 0) > 0]
840
+ avg_confidence = sum(confidences) / len(confidences) if confidences else 0
841
+
842
+ # Determine doc_type from first successful page
843
+ doc_type = "other"
844
+ for page_result in page_results:
845
+ if page_result.get("doc_type") and page_result["doc_type"] != "other":
846
+ doc_type = page_result["doc_type"]
847
+ break
848
+
849
+ # Build return object - add Fields at root level only if extracted_fields is not empty
850
+ return_obj = {
851
+ "doc_type": doc_type,
852
+ "confidence": avg_confidence,
853
+ "full_text": combined_full_text,
854
+ "fields": combined_fields, # Now contains structured data with tables
855
+ "pages": page_results
856
+ }
857
+
858
+ # Add Fields at root level only if user provided key_fields and extraction succeeded
859
+ if extracted_fields:
860
+ return_obj["Fields"] = extracted_fields
861
+
862
+ return return_obj
backend/app/otp_service.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OTP (One-Time Password) service for email-based authentication.
3
+ """
4
+ import random
5
+ import string
6
+ from datetime import datetime, timedelta
7
+ from typing import Dict, Optional
8
+ from sqlalchemy.orm import Session
9
+ from fastapi import HTTPException
10
+ from .models import User
11
+ from .brevo_service import send_otp_email
12
+
13
+ # Store OTPs in memory (in production, use Redis or database)
14
+ otp_store: Dict[str, dict] = {}
15
+
16
+
17
+ def generate_otp(length: int = 6) -> str:
18
+ """
19
+ Generate a random OTP code.
20
+
21
+ Args:
22
+ length: Length of OTP (default: 6)
23
+
24
+ Returns:
25
+ Random OTP string
26
+ """
27
+ return ''.join(random.choices(string.digits, k=length))
28
+
29
+
30
+ async def request_otp(email: str, db: Session) -> dict:
31
+ """
32
+ Generate and send OTP to email using Brevo.
33
+
34
+ Args:
35
+ email: Email address to send OTP to
36
+ db: Database session
37
+
38
+ Returns:
39
+ Dictionary with success message
40
+ """
41
+ # Generate OTP
42
+ otp = generate_otp()
43
+ expires_at = datetime.utcnow() + timedelta(minutes=10)
44
+
45
+ # Store OTP (in production, use Redis or database with TTL)
46
+ otp_store[email.lower()] = {
47
+ 'otp': otp,
48
+ 'expires_at': expires_at,
49
+ 'attempts': 0,
50
+ 'max_attempts': 5
51
+ }
52
+
53
+ # Send OTP via Brevo
54
+ try:
55
+ await send_otp_email(email, otp)
56
+ print(f"[INFO] OTP generated and sent to {email}")
57
+ except Exception as e:
58
+ # Remove OTP from store if email sending failed
59
+ if email.lower() in otp_store:
60
+ del otp_store[email.lower()]
61
+ raise HTTPException(
62
+ status_code=500,
63
+ detail=f"Failed to send OTP email: {str(e)}"
64
+ )
65
+
66
+ return {
67
+ "message": "OTP sent to your email address",
68
+ "expires_in_minutes": 10
69
+ }
70
+
71
+
72
+ async def verify_otp(email: str, otp: str, db: Session) -> User:
73
+ """
74
+ Verify OTP and return/create user.
75
+
76
+ Args:
77
+ email: Email address
78
+ otp: OTP code to verify
79
+ db: Database session
80
+
81
+ Returns:
82
+ User object
83
+
84
+ Raises:
85
+ HTTPException: If OTP is invalid, expired, or max attempts exceeded
86
+ """
87
+ email_lower = email.lower()
88
+ stored = otp_store.get(email_lower)
89
+
90
+ if not stored:
91
+ raise HTTPException(
92
+ status_code=400,
93
+ detail="OTP not found. Please request a new OTP."
94
+ )
95
+
96
+ # Check if expired
97
+ if datetime.utcnow() > stored['expires_at']:
98
+ del otp_store[email_lower]
99
+ raise HTTPException(
100
+ status_code=400,
101
+ detail="OTP has expired. Please request a new OTP."
102
+ )
103
+
104
+ # Check max attempts
105
+ if stored['attempts'] >= stored['max_attempts']:
106
+ del otp_store[email_lower]
107
+ raise HTTPException(
108
+ status_code=400,
109
+ detail="Maximum verification attempts exceeded. Please request a new OTP."
110
+ )
111
+
112
+ # Verify OTP
113
+ if stored['otp'] != otp:
114
+ stored['attempts'] += 1
115
+ remaining_attempts = stored['max_attempts'] - stored['attempts']
116
+ raise HTTPException(
117
+ status_code=400,
118
+ detail=f"Invalid OTP. {remaining_attempts} attempt(s) remaining."
119
+ )
120
+
121
+ # OTP verified successfully
122
+ # Get or create user
123
+ user = db.query(User).filter(User.email == email_lower).first()
124
+
125
+ if not user:
126
+ user = User(
127
+ email=email_lower,
128
+ auth_method='otp',
129
+ email_verified=True
130
+ )
131
+ db.add(user)
132
+ db.commit()
133
+ db.refresh(user)
134
+ print(f"[INFO] New user created via OTP: {email_lower}")
135
+
136
+ # Enrich contact data from Apollo.io and update Brevo + Monday.com
137
+ try:
138
+ from .apollo_service import enrich_contact_by_email
139
+ from .brevo_service import create_brevo_contact, BREVO_TRIAL_LIST_ID
140
+ from .monday_service import create_monday_lead
141
+
142
+ # Enrich contact data from Apollo.io
143
+ enriched_data = await enrich_contact_by_email(email_lower)
144
+
145
+ # Use enriched data if available
146
+ first_name = enriched_data.get("first_name") if enriched_data else None
147
+ last_name = enriched_data.get("last_name") if enriched_data else None
148
+ org_name = enriched_data.get("organization_name") if enriched_data else None
149
+
150
+ # Fallback to email domain if Apollo didn't provide organization
151
+ if not org_name:
152
+ org_domain = email_lower.split('@')[1] if '@' in email_lower else None
153
+ org_name = org_domain.split('.')[0].capitalize() if org_domain else None
154
+
155
+ # Update Brevo contact with enriched data
156
+ await create_brevo_contact(
157
+ email=email_lower,
158
+ first_name=first_name,
159
+ last_name=last_name,
160
+ organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
161
+ phone_number=enriched_data.get("phone_number") if enriched_data else None,
162
+ linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
163
+ title=enriched_data.get("title") if enriched_data else None,
164
+ headline=enriched_data.get("headline") if enriched_data else None,
165
+ organization_website=enriched_data.get("organization_website") if enriched_data else None,
166
+ organization_address=enriched_data.get("organization_address") if enriched_data else None,
167
+ list_id=BREVO_TRIAL_LIST_ID
168
+ )
169
+
170
+ # Create lead in Monday.com
171
+ await create_monday_lead(
172
+ email=email_lower,
173
+ first_name=first_name,
174
+ last_name=last_name,
175
+ phone_number=enriched_data.get("phone_number") if enriched_data else None,
176
+ linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
177
+ title=enriched_data.get("title") if enriched_data else None,
178
+ headline=enriched_data.get("headline") if enriched_data else None,
179
+ organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
180
+ organization_website=enriched_data.get("organization_website") if enriched_data else None,
181
+ organization_address=enriched_data.get("organization_address") if enriched_data else None,
182
+ )
183
+ except Exception as e:
184
+ # Don't fail user creation if integrations fail
185
+ print(f"[WARNING] Failed to enrich/update contact for {email_lower}: {str(e)}")
186
+ else:
187
+ user.email_verified = True
188
+ if user.auth_method != 'otp':
189
+ user.auth_method = 'otp'
190
+ db.commit()
191
+ print(f"[INFO] User verified via OTP: {email_lower}")
192
+
193
+ # Remove OTP from store after successful verification
194
+ del otp_store[email_lower]
195
+
196
+ return user
197
+
backend/app/schemas.py CHANGED
@@ -1,26 +1,26 @@
1
- from pydantic import BaseModel
2
- from typing import Dict, Optional
3
- from datetime import datetime
4
-
5
-
6
- class ExtractionStage(BaseModel):
7
- time: int
8
- status: str
9
- variation: str
10
-
11
-
12
- class ExtractionRecordBase(BaseModel):
13
- id: int
14
- fileName: str
15
- fileType: str
16
- fileSize: str
17
- extractedAt: datetime
18
- status: str
19
- confidence: float
20
- fieldsExtracted: int
21
- totalTime: int
22
- stages: Dict[str, ExtractionStage]
23
- errorMessage: Optional[str] = None
24
-
25
- class Config:
26
- orm_mode = True
 
1
+ from pydantic import BaseModel
2
+ from typing import Dict, Optional
3
+ from datetime import datetime
4
+
5
+
6
+ class ExtractionStage(BaseModel):
7
+ time: int
8
+ status: str
9
+ variation: str
10
+
11
+
12
+ class ExtractionRecordBase(BaseModel):
13
+ id: int
14
+ fileName: str
15
+ fileType: str
16
+ fileSize: str
17
+ extractedAt: datetime
18
+ status: str
19
+ confidence: float
20
+ fieldsExtracted: int
21
+ totalTime: int
22
+ stages: Dict[str, ExtractionStage]
23
+ errorMessage: Optional[str] = None
24
+
25
+ class Config:
26
+ from_attributes = True
backend/requirements.txt CHANGED
@@ -1,11 +1,15 @@
1
- fastapi
2
- uvicorn[standard]
3
- python-multipart
4
- pydantic
5
- sqlalchemy
6
- httpx
7
- python-dotenv
8
- pymupdf
9
- pillow
10
- huggingface-hub
11
- openai
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-multipart
4
+ pydantic[email]
5
+ sqlalchemy
6
+ httpx
7
+ python-dotenv
8
+ pymupdf
9
+ pillow
10
+ huggingface-hub
11
+ openai
12
+ firebase-admin
13
+ pyjwt
14
+ python-jose[cryptography]
15
+ email-validator
frontend/build-env.sh ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+ # Script to create .env file from environment variables for Vite build
3
+ # This is used in Docker build when environment variables are available
4
+
5
+ # Debug: Check if variables are set (without exposing values)
6
+ echo "Checking environment variables..."
7
+ [ -z "$VITE_FIREBASE_API_KEY" ] && echo "WARNING: VITE_FIREBASE_API_KEY is not set" || echo "✓ VITE_FIREBASE_API_KEY is set"
8
+ [ -z "$VITE_FIREBASE_AUTH_DOMAIN" ] && echo "WARNING: VITE_FIREBASE_AUTH_DOMAIN is not set" || echo "✓ VITE_FIREBASE_AUTH_DOMAIN is set"
9
+ [ -z "$VITE_FIREBASE_PROJECT_ID" ] && echo "WARNING: VITE_FIREBASE_PROJECT_ID is not set" || echo "✓ VITE_FIREBASE_PROJECT_ID is set"
10
+
11
+ cat > .env << EOF
12
+ VITE_FIREBASE_API_KEY=${VITE_FIREBASE_API_KEY:-}
13
+ VITE_FIREBASE_AUTH_DOMAIN=${VITE_FIREBASE_AUTH_DOMAIN:-}
14
+ VITE_FIREBASE_PROJECT_ID=${VITE_FIREBASE_PROJECT_ID:-}
15
+ VITE_FIREBASE_STORAGE_BUCKET=${VITE_FIREBASE_STORAGE_BUCKET:-}
16
+ VITE_FIREBASE_MESSAGING_SENDER_ID=${VITE_FIREBASE_MESSAGING_SENDER_ID:-}
17
+ VITE_FIREBASE_APP_ID=${VITE_FIREBASE_APP_ID:-}
18
+ VITE_API_BASE_URL=${VITE_API_BASE_URL:-}
19
+ EOF
20
+
21
+ echo "Created .env file with environment variables"
22
+
frontend/index.html CHANGED
@@ -1,12 +1,13 @@
1
- <!doctype html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8" />
5
- <title>Document Capture Demo</title>
6
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
- </head>
8
- <body class="bg-[#FAFAFA]">
9
- <div id="root"></div>
10
- <script type="module" src="/src/main.jsx"></script>
11
- </body>
12
- </html>
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <link rel="icon" type="image/png" href="/logo.png" />
6
+ <title>EZOFIS AI - VRP Document Intelligence</title>
7
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
8
+ </head>
9
+ <body class="bg-[#FAFAFA]">
10
+ <div id="root"></div>
11
+ <script type="module" src="/src/main.jsx"></script>
12
+ </body>
13
+ </html>
frontend/package.json CHANGED
@@ -1,25 +1,26 @@
1
- {
2
- "name": "document-capture-demo",
3
- "version": "1.0.0",
4
- "private": true,
5
- "scripts": {
6
- "dev": "vite",
7
- "build": "vite build",
8
- "preview": "vite preview"
9
- },
10
- "dependencies": {
11
- "react": "^18.3.1",
12
- "react-dom": "^18.3.1",
13
- "react-router-dom": "^6.26.2",
14
- "framer-motion": "^11.0.0",
15
- "lucide-react": "^0.471.0",
16
- "pdfjs-dist": "^4.0.379"
17
- },
18
- "devDependencies": {
19
- "@vitejs/plugin-react": "^4.1.0",
20
- "autoprefixer": "^10.4.20",
21
- "postcss": "^8.4.47",
22
- "tailwindcss": "^3.4.14",
23
- "vite": "^5.4.0"
24
- }
25
- }
 
 
1
+ {
2
+ "name": "document-capture-demo",
3
+ "version": "1.0.0",
4
+ "private": true,
5
+ "scripts": {
6
+ "dev": "vite",
7
+ "build": "vite build",
8
+ "preview": "vite preview"
9
+ },
10
+ "dependencies": {
11
+ "react": "^18.3.1",
12
+ "react-dom": "^18.3.1",
13
+ "react-router-dom": "^6.26.2",
14
+ "framer-motion": "^11.0.0",
15
+ "lucide-react": "^0.471.0",
16
+ "pdfjs-dist": "^4.0.379",
17
+ "firebase": "^10.7.1"
18
+ },
19
+ "devDependencies": {
20
+ "@vitejs/plugin-react": "^4.1.0",
21
+ "autoprefixer": "^10.4.20",
22
+ "postcss": "^8.4.47",
23
+ "tailwindcss": "^3.4.14",
24
+ "vite": "^5.4.0"
25
+ }
26
+ }
frontend/postcss.config.cjs CHANGED
@@ -1,6 +1,6 @@
1
- module.exports = {
2
- plugins: {
3
- tailwindcss: {},
4
- autoprefixer: {}
5
- }
6
- };
 
1
+ module.exports = {
2
+ plugins: {
3
+ tailwindcss: {},
4
+ autoprefixer: {}
5
+ }
6
+ };
frontend/src/App.jsx CHANGED
@@ -1,30 +1,106 @@
1
- // frontend/src/App.jsx
2
-
3
- import React from "react";
4
- import { Routes, Route } from "react-router-dom";
5
- import Layout from "./Layout";
6
- import Dashboard from "./pages/Dashboard";
7
- import History from "./pages/History";
8
-
9
- export default function App() {
10
- return (
11
- <Routes>
12
- <Route
13
- path="/"
14
- element={
15
- <Layout currentPageName="Dashboard">
16
- <Dashboard />
17
- </Layout>
18
- }
19
- />
20
- <Route
21
- path="/history"
22
- element={
23
- <Layout currentPageName="History">
24
- <History />
25
- </Layout>
26
- }
27
- />
28
- </Routes>
29
- );
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // frontend/src/App.jsx
2
+
3
+ import React, { useEffect } from "react";
4
+ import { Routes, Route, useNavigate, useSearchParams } from "react-router-dom";
5
+ import { AuthProvider, useAuth } from "./contexts/AuthContext";
6
+ import Layout from "./Layout";
7
+ import Dashboard from "./pages/Dashboard";
8
+ import History from "./pages/History";
9
+ import ShareHandler from "./pages/ShareHandler";
10
+ import LoginForm from "./components/auth/LoginForm";
11
+
12
+ // Auth callback handler component
13
+ function AuthCallback() {
14
+ const [searchParams] = useSearchParams();
15
+ const { handleAuthCallback } = useAuth();
16
+ const navigate = useNavigate();
17
+
18
+ useEffect(() => {
19
+ const token = searchParams.get("token");
20
+ if (token) {
21
+ handleAuthCallback(token);
22
+ navigate("/");
23
+ } else {
24
+ navigate("/");
25
+ }
26
+ }, [searchParams, handleAuthCallback, navigate]);
27
+
28
+ return (
29
+ <div className="min-h-screen flex items-center justify-center">
30
+ <div className="text-center">
31
+ <p className="text-slate-600">Completing authentication...</p>
32
+ </div>
33
+ </div>
34
+ );
35
+ }
36
+
37
+ // Protected route wrapper
38
+ function ProtectedRoute({ children }) {
39
+ const { isAuthenticated, loading } = useAuth();
40
+
41
+ if (loading) {
42
+ return (
43
+ <div className="min-h-screen flex items-center justify-center">
44
+ <div className="text-center">
45
+ <div className="h-16 w-16 mx-auto rounded-2xl bg-indigo-100 flex items-center justify-center mb-4 animate-pulse">
46
+ <div className="h-8 w-8 rounded-lg bg-indigo-600"></div>
47
+ </div>
48
+ <p className="text-slate-600">Loading...</p>
49
+ </div>
50
+ </div>
51
+ );
52
+ }
53
+
54
+ if (!isAuthenticated) {
55
+ return <LoginForm />;
56
+ }
57
+
58
+ return children;
59
+ }
60
+
61
+ function AppRoutes() {
62
+ return (
63
+ <Routes>
64
+ <Route
65
+ path="/auth/callback"
66
+ element={<AuthCallback />}
67
+ />
68
+ <Route
69
+ path="/share/:token"
70
+ element={
71
+ <ProtectedRoute>
72
+ <ShareHandler />
73
+ </ProtectedRoute>
74
+ }
75
+ />
76
+ <Route
77
+ path="/"
78
+ element={
79
+ <ProtectedRoute>
80
+ <Layout currentPageName="Dashboard">
81
+ <Dashboard />
82
+ </Layout>
83
+ </ProtectedRoute>
84
+ }
85
+ />
86
+ <Route
87
+ path="/history"
88
+ element={
89
+ <ProtectedRoute>
90
+ <Layout currentPageName="History">
91
+ <History />
92
+ </Layout>
93
+ </ProtectedRoute>
94
+ }
95
+ />
96
+ </Routes>
97
+ );
98
+ }
99
+
100
+ export default function App() {
101
+ return (
102
+ <AuthProvider>
103
+ <AppRoutes />
104
+ </AuthProvider>
105
+ );
106
+ }
frontend/src/Layout.jsx CHANGED
@@ -1,143 +1,179 @@
1
- // frontend/src/Layout.jsx
2
-
3
- import React, { useState } from "react";
4
- import { Link } from "react-router-dom";
5
- import { createPageUrl } from "./utils";
6
- import {
7
- LayoutDashboard,
8
- History as HistoryIcon,
9
- ChevronLeft,
10
- Sparkles,
11
- } from "lucide-react";
12
- import { cn } from "@/lib/utils";
13
-
14
- // Import logo - Vite will process this and handle the path correctly
15
- // For production, the logo should be in frontend/public/logo.png
16
- // Vite will copy it to dist/logo.png during build
17
- const logoPath = "/logo.png";
18
-
19
- export default function Layout({ children, currentPageName }) {
20
- const [collapsed, setCollapsed] = useState(false);
21
-
22
- const navItems = [
23
- { name: "Dashboard", icon: LayoutDashboard, page: "Dashboard" },
24
- { name: "History", icon: HistoryIcon, page: "History" },
25
- ];
26
-
27
- return (
28
- <div className="min-h-screen bg-[#FAFAFA] flex">
29
- {/* Sidebar */}
30
- <aside
31
- className={cn(
32
- "fixed left-0 top-0 h-screen bg-white border-r border-slate-200/80 z-50 transition-all duration-300 ease-out flex flex-col",
33
- collapsed ? "w-[72px]" : "w-[260px]"
34
- )}
35
- >
36
- {/* Logo */}
37
- <div
38
- className={cn(
39
- "h-16 flex items-center border-b border-slate-100 px-4",
40
- collapsed ? "justify-center" : "justify-between"
41
- )}
42
- >
43
- <Link to={createPageUrl("Dashboard")} className="flex items-center gap-3">
44
- <div className="h-9 w-9 flex items-center justify-center flex-shrink-0">
45
- <img
46
- src={logoPath}
47
- alt="EZOFIS AI Logo"
48
- className="h-full w-full object-contain"
49
- onError={(e) => {
50
- // Fallback: hide image and show placeholder if logo not found
51
- e.target.style.display = 'none';
52
- }}
53
- />
54
- </div>
55
- {!collapsed && (
56
- <div className="flex flex-col">
57
- <span className="font-semibold text-slate-900 tracking-tight">EZOFIS AI</span>
58
- <span className="text-[10px] text-slate-400 font-medium tracking-wide uppercase">
59
- Agentic Extract
60
- </span>
61
- </div>
62
- )}
63
- </Link>
64
- {!collapsed && (
65
- <button
66
- onClick={() => setCollapsed(true)}
67
- className="h-7 w-7 rounded-lg hover:bg-slate-100 flex items-center justify-center text-slate-400 hover:text-slate-600 transition-colors"
68
- >
69
- <ChevronLeft className="h-4 w-4" />
70
- </button>
71
- )}
72
- </div>
73
-
74
- {/* Navigation */}
75
- <nav className="flex-1 p-3 space-y-1">
76
- {navItems.map((item) => {
77
- const isActive = currentPageName === item.page;
78
- return (
79
- <Link
80
- key={item.name}
81
- to={createPageUrl(item.page)}
82
- className={cn(
83
- "flex items-center gap-3 px-3 py-2.5 rounded-xl transition-all duration-200 group",
84
- isActive
85
- ? "bg-gradient-to-r from-indigo-50 to-violet-50 text-indigo-600"
86
- : "text-slate-500 hover:bg-slate-50 hover:text-slate-700"
87
- )}
88
- >
89
- <item.icon
90
- className={cn(
91
- "h-5 w-5 flex-shrink-0",
92
- isActive ? "text-indigo-600" : "text-slate-400 group-hover:text-slate-600"
93
- )}
94
- />
95
- {!collapsed && (
96
- <span className="font-medium text-sm">{item.name}</span>
97
- )}
98
- </Link>
99
- );
100
- })}
101
- </nav>
102
-
103
- {/* Collapse Toggle (when collapsed) */}
104
- {collapsed && (
105
- <button
106
- onClick={() => setCollapsed(false)}
107
- className="m-3 h-10 rounded-xl bg-slate-50 hover:bg-slate-100 flex items-center justify-center text-slate-400 hover:text-slate-600 transition-colors"
108
- >
109
- <ChevronLeft className="h-4 w-4 rotate-180" />
110
- </button>
111
- )}
112
-
113
- {/* Pro Badge */}
114
- {!collapsed && (
115
- <div className="p-3">
116
- <div className="p-4 rounded-2xl bg-gradient-to-br from-slate-900 to-slate-800 text-white">
117
- <div className="flex items-center gap-2 mb-2">
118
- <Sparkles className="h-4 w-4 text-amber-400" />
119
- <span className="text-xs font-semibold tracking-wide">DEPLOY THIS AGENT</span>
120
- </div>
121
- <p className="text-xs text-slate-400 mb-3">
122
- Unlock batch extractions &amp; API access
123
- </p>
124
- <button className="w-full py-2 px-3 rounded-lg bg-white text-slate-900 text-sm font-semibold hover:bg-slate-100 transition-colors">
125
- Talk to us
126
- </button>
127
- </div>
128
- </div>
129
- )}
130
- </aside>
131
-
132
- {/* Main Content */}
133
- <main
134
- className={cn(
135
- "flex-1 transition-all duration-300",
136
- collapsed ? "ml-[72px]" : "ml-[260px]"
137
- )}
138
- >
139
- {children}
140
- </main>
141
- </div>
142
- );
143
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // frontend/src/Layout.jsx
2
+
3
+ import React, { useState } from "react";
4
+ import { Link } from "react-router-dom";
5
+ import { createPageUrl } from "./utils";
6
+ import {
7
+ LayoutDashboard,
8
+ History as HistoryIcon,
9
+ ChevronLeft,
10
+ Sparkles,
11
+ LogOut,
12
+ User,
13
+ } from "lucide-react";
14
+ import { cn } from "@/lib/utils";
15
+ import { useAuth } from "./contexts/AuthContext";
16
+
17
+ // Import logo - Vite will process this and handle the path correctly
18
+ // For production, the logo should be in frontend/public/logo.png
19
+ // Vite will copy it to dist/logo.png during build
20
+ const logoPath = "/logo.png";
21
+
22
+ export default function Layout({ children, currentPageName }) {
23
+ const [collapsed, setCollapsed] = useState(false);
24
+ const { user, logout } = useAuth();
25
+
26
+ const navItems = [
27
+ { name: "Dashboard", icon: LayoutDashboard, page: "Dashboard" },
28
+ { name: "History", icon: HistoryIcon, page: "History" },
29
+ ];
30
+
31
+ return (
32
+ <div className="min-h-screen bg-[#FAFAFA] flex">
33
+ {/* Sidebar */}
34
+ <aside
35
+ className={cn(
36
+ "fixed left-0 top-0 h-screen bg-white border-r border-slate-200/80 z-50 transition-all duration-300 ease-out flex flex-col",
37
+ collapsed ? "w-[72px]" : "w-[260px]"
38
+ )}
39
+ >
40
+ {/* Logo */}
41
+ <div
42
+ className={cn(
43
+ "h-16 flex items-center border-b border-slate-100 px-4",
44
+ collapsed ? "justify-center" : "justify-between"
45
+ )}
46
+ >
47
+ <Link to={createPageUrl("Dashboard")} className="flex items-center gap-3">
48
+ <div className="h-9 w-9 flex items-center justify-center flex-shrink-0">
49
+ <img
50
+ src={logoPath}
51
+ alt="EZOFIS AI Logo"
52
+ className="h-full w-full object-contain"
53
+ onError={(e) => {
54
+ // Fallback: hide image and show placeholder if logo not found
55
+ e.target.style.display = 'none';
56
+ }}
57
+ />
58
+ </div>
59
+ {!collapsed && (
60
+ <div className="flex flex-col">
61
+ <span className="font-semibold text-slate-900 tracking-tight">EZOFIS AI</span>
62
+ <span className="text-[10px] text-slate-400 font-medium tracking-wide uppercase">
63
+ VRP Intelligence
64
+ </span>
65
+ </div>
66
+ )}
67
+ </Link>
68
+ {!collapsed && (
69
+ <button
70
+ onClick={() => setCollapsed(true)}
71
+ className="h-7 w-7 rounded-lg hover:bg-slate-100 flex items-center justify-center text-slate-400 hover:text-slate-600 transition-colors"
72
+ >
73
+ <ChevronLeft className="h-4 w-4" />
74
+ </button>
75
+ )}
76
+ </div>
77
+
78
+ {/* Navigation */}
79
+ <nav className="flex-1 p-3 space-y-1">
80
+ {navItems.map((item) => {
81
+ const isActive = currentPageName === item.page;
82
+ return (
83
+ <Link
84
+ key={item.name}
85
+ to={createPageUrl(item.page)}
86
+ className={cn(
87
+ "flex items-center gap-3 px-3 py-2.5 rounded-xl transition-all duration-200 group",
88
+ isActive
89
+ ? "bg-gradient-to-r from-indigo-50 to-violet-50 text-indigo-600"
90
+ : "text-slate-500 hover:bg-slate-50 hover:text-slate-700"
91
+ )}
92
+ >
93
+ <item.icon
94
+ className={cn(
95
+ "h-5 w-5 flex-shrink-0",
96
+ isActive ? "text-indigo-600" : "text-slate-400 group-hover:text-slate-600"
97
+ )}
98
+ />
99
+ {!collapsed && (
100
+ <span className="font-medium text-sm">{item.name}</span>
101
+ )}
102
+ </Link>
103
+ );
104
+ })}
105
+ </nav>
106
+
107
+ {/* Collapse Toggle (when collapsed) */}
108
+ {collapsed && (
109
+ <button
110
+ onClick={() => setCollapsed(false)}
111
+ className="m-3 h-10 rounded-xl bg-slate-50 hover:bg-slate-100 flex items-center justify-center text-slate-400 hover:text-slate-600 transition-colors"
112
+ >
113
+ <ChevronLeft className="h-4 w-4 rotate-180" />
114
+ </button>
115
+ )}
116
+
117
+ {/* Pro Badge */}
118
+ {!collapsed && (
119
+ <div className="p-3">
120
+ <div className="p-4 rounded-2xl bg-gradient-to-br from-slate-900 to-slate-800 text-white">
121
+ <div className="flex items-center gap-2 mb-2">
122
+ <Sparkles className="h-4 w-4 text-amber-400" />
123
+ <span className="text-xs font-semibold tracking-wide">DEPLOY CUSTOM AGENT</span>
124
+ </div>
125
+ <p className="text-xs text-slate-400 mb-3">
126
+ Batch extractions, custom model, field mapping, complex lineitems, tables, workflows, &amp; API access
127
+ </p>
128
+ <button className="w-full py-2 px-3 rounded-lg bg-white text-slate-900 text-sm font-semibold hover:bg-slate-100 transition-colors">
129
+ Book a Custom Demo
130
+ </button>
131
+ </div>
132
+ </div>
133
+ )}
134
+
135
+ {/* User Profile */}
136
+ {!collapsed && user && (
137
+ <div className="p-3 border-t border-slate-200">
138
+ <div className="flex items-center gap-3 p-3 rounded-xl bg-slate-50 hover:bg-slate-100 transition-colors">
139
+ {user.picture ? (
140
+ <img
141
+ src={user.picture}
142
+ alt={user.name || user.email}
143
+ className="h-10 w-10 rounded-lg object-cover"
144
+ />
145
+ ) : (
146
+ <div className="h-10 w-10 rounded-lg bg-indigo-100 flex items-center justify-center">
147
+ <User className="h-5 w-5 text-indigo-600" />
148
+ </div>
149
+ )}
150
+ <div className="flex-1 min-w-0">
151
+ <p className="text-sm font-medium text-slate-900 truncate">
152
+ {user.name || "User"}
153
+ </p>
154
+ <p className="text-xs text-slate-500 truncate">{user.email}</p>
155
+ </div>
156
+ </div>
157
+ <button
158
+ onClick={logout}
159
+ className="mt-2 w-full flex items-center gap-2 px-3 py-2 rounded-xl text-sm text-slate-600 hover:bg-red-50 hover:text-red-600 transition-colors"
160
+ >
161
+ <LogOut className="h-4 w-4" />
162
+ <span>Sign Out</span>
163
+ </button>
164
+ </div>
165
+ )}
166
+ </aside>
167
+
168
+ {/* Main Content */}
169
+ <main
170
+ className={cn(
171
+ "flex-1 transition-all duration-300",
172
+ collapsed ? "ml-[72px]" : "ml-[260px]"
173
+ )}
174
+ >
175
+ {children}
176
+ </main>
177
+ </div>
178
+ );
179
+ }
frontend/src/components/ErrorBoundary.jsx CHANGED
@@ -1,72 +1,72 @@
1
- import React from "react";
2
-
3
- class ErrorBoundary extends React.Component {
4
- constructor(props) {
5
- super(props);
6
- this.state = { hasError: false, error: null };
7
- }
8
-
9
- static getDerivedStateFromError(error) {
10
- return { hasError: true, error };
11
- }
12
-
13
- componentDidCatch(error, errorInfo) {
14
- console.error("Error caught by boundary:", error, errorInfo);
15
- }
16
-
17
- render() {
18
- if (this.state.hasError) {
19
- return (
20
- <div className="min-h-screen bg-[#FAFAFA] flex items-center justify-center p-8">
21
- <div className="max-w-md w-full bg-white rounded-2xl border border-red-200 p-8 shadow-lg">
22
- <div className="text-center">
23
- <div className="h-16 w-16 mx-auto rounded-full bg-red-100 flex items-center justify-center mb-4">
24
- <svg
25
- className="h-8 w-8 text-red-600"
26
- fill="none"
27
- viewBox="0 0 24 24"
28
- stroke="currentColor"
29
- >
30
- <path
31
- strokeLinecap="round"
32
- strokeLinejoin="round"
33
- strokeWidth={2}
34
- d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"
35
- />
36
- </svg>
37
- </div>
38
- <h2 className="text-xl font-bold text-slate-900 mb-2">
39
- Something went wrong
40
- </h2>
41
- <p className="text-sm text-slate-600 mb-6">
42
- The application encountered an error. Please refresh the page or contact support if the problem persists.
43
- </p>
44
- <button
45
- onClick={() => window.location.reload()}
46
- className="px-6 py-2 bg-indigo-600 text-white rounded-lg font-semibold hover:bg-indigo-700 transition-colors"
47
- >
48
- Refresh Page
49
- </button>
50
- {process.env.NODE_ENV === "development" && this.state.error && (
51
- <details className="mt-6 text-left">
52
- <summary className="text-sm text-slate-500 cursor-pointer mb-2">
53
- Error Details (Development Only)
54
- </summary>
55
- <pre className="text-xs bg-slate-100 p-4 rounded-lg overflow-auto max-h-64">
56
- {this.state.error.toString()}
57
- {this.state.error.stack}
58
- </pre>
59
- </details>
60
- )}
61
- </div>
62
- </div>
63
- </div>
64
- );
65
- }
66
-
67
- return this.props.children;
68
- }
69
- }
70
-
71
- export default ErrorBoundary;
72
-
 
1
+ import React from "react";
2
+
3
+ class ErrorBoundary extends React.Component {
4
+ constructor(props) {
5
+ super(props);
6
+ this.state = { hasError: false, error: null };
7
+ }
8
+
9
+ static getDerivedStateFromError(error) {
10
+ return { hasError: true, error };
11
+ }
12
+
13
+ componentDidCatch(error, errorInfo) {
14
+ console.error("Error caught by boundary:", error, errorInfo);
15
+ }
16
+
17
+ render() {
18
+ if (this.state.hasError) {
19
+ return (
20
+ <div className="min-h-screen bg-[#FAFAFA] flex items-center justify-center p-8">
21
+ <div className="max-w-md w-full bg-white rounded-2xl border border-red-200 p-8 shadow-lg">
22
+ <div className="text-center">
23
+ <div className="h-16 w-16 mx-auto rounded-full bg-red-100 flex items-center justify-center mb-4">
24
+ <svg
25
+ className="h-8 w-8 text-red-600"
26
+ fill="none"
27
+ viewBox="0 0 24 24"
28
+ stroke="currentColor"
29
+ >
30
+ <path
31
+ strokeLinecap="round"
32
+ strokeLinejoin="round"
33
+ strokeWidth={2}
34
+ d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"
35
+ />
36
+ </svg>
37
+ </div>
38
+ <h2 className="text-xl font-bold text-slate-900 mb-2">
39
+ Something went wrong
40
+ </h2>
41
+ <p className="text-sm text-slate-600 mb-6">
42
+ The application encountered an error. Please refresh the page or contact support if the problem persists.
43
+ </p>
44
+ <button
45
+ onClick={() => window.location.reload()}
46
+ className="px-6 py-2 bg-indigo-600 text-white rounded-lg font-semibold hover:bg-indigo-700 transition-colors"
47
+ >
48
+ Refresh Page
49
+ </button>
50
+ {process.env.NODE_ENV === "development" && this.state.error && (
51
+ <details className="mt-6 text-left">
52
+ <summary className="text-sm text-slate-500 cursor-pointer mb-2">
53
+ Error Details (Development Only)
54
+ </summary>
55
+ <pre className="text-xs bg-slate-100 p-4 rounded-lg overflow-auto max-h-64">
56
+ {this.state.error.toString()}
57
+ {this.state.error.stack}
58
+ </pre>
59
+ </details>
60
+ )}
61
+ </div>
62
+ </div>
63
+ </div>
64
+ );
65
+ }
66
+
67
+ return this.props.children;
68
+ }
69
+ }
70
+
71
+ export default ErrorBoundary;
72
+
frontend/src/components/ExportButtons.jsx CHANGED
@@ -1,320 +1,692 @@
1
- import React, { useState } from "react";
2
- import { motion, AnimatePresence } from "framer-motion";
3
- import {
4
- Download,
5
- Braces,
6
- FileCode2,
7
- Check,
8
- Share2,
9
- FileJson,
10
- Copy,
11
- Mail,
12
- Link2,
13
- } from "lucide-react";
14
- import { Button } from "@/components/ui/button";
15
- import {
16
- DropdownMenu,
17
- DropdownMenuContent,
18
- DropdownMenuItem,
19
- DropdownMenuSeparator,
20
- DropdownMenuTrigger,
21
- } from "@/components/ui/dropdown-menu";
22
- import { cn } from "@/lib/utils";
23
-
24
- // Helper functions from ExtractionOutput
25
- function prepareFieldsForOutput(fields, format = "json") {
26
- if (!fields || typeof fields !== "object") {
27
- return fields;
28
- }
29
-
30
- const output = { ...fields };
31
-
32
- // Remove full_text from top-level if pages array exists (to avoid duplication)
33
- if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
34
- delete output.full_text;
35
-
36
- // Clean up each page: remove full_text from page.fields (it duplicates page.text)
37
- output.pages = output.pages.map(page => {
38
- const cleanedPage = { ...page };
39
- if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
40
- const cleanedFields = { ...cleanedPage.fields };
41
- // Remove full_text from page fields (duplicates page.text)
42
- delete cleanedFields.full_text;
43
- cleanedPage.fields = cleanedFields;
44
- }
45
- return cleanedPage;
46
- });
47
- }
48
-
49
- // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
50
- if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
51
- // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
52
- const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text"));
53
-
54
- output.pages.forEach((page, idx) => {
55
- const pageNum = page.page_number || idx + 1;
56
- const pageFields = page.fields || {};
57
-
58
- // Remove duplicate fields from page.fields:
59
- // 1. Remove full_text (duplicates page.text)
60
- // 2. Remove fields that match top-level fields (already shown at root)
61
- const cleanedPageFields = {};
62
- for (const [key, value] of Object.entries(pageFields)) {
63
- // Skip full_text and fields that match top-level exactly
64
- if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
65
- cleanedPageFields[key] = value;
66
- }
67
- }
68
-
69
- const pageObj = {
70
- text: page.text || "",
71
- confidence: page.confidence || 0,
72
- doc_type: page.doc_type || "other"
73
- };
74
-
75
- // Only add fields if there are unique page-specific fields
76
- if (Object.keys(cleanedPageFields).length > 0) {
77
- pageObj.fields = cleanedPageFields;
78
- }
79
-
80
- output[`page_${pageNum}`] = pageObj;
81
- });
82
- // Remove pages array - we now have page_1, page_2, etc. as separate fields
83
- delete output.pages;
84
- }
85
-
86
- return output;
87
- }
88
-
89
- function escapeXML(str) {
90
- return str
91
- .replace(/&/g, "&amp;")
92
- .replace(/</g, "&lt;")
93
- .replace(/>/g, "&gt;")
94
- .replace(/"/g, "&quot;")
95
- .replace(/'/g, "&apos;");
96
- }
97
-
98
- function objectToXML(obj, rootName = "extraction") {
99
- // Prepare fields - remove full_text if pages exist
100
- const preparedObj = prepareFieldsForOutput(obj, "xml");
101
-
102
- let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
103
-
104
- const convert = (obj, indent = " ") => {
105
- for (const [key, value] of Object.entries(obj)) {
106
- if (value === null || value === undefined) continue;
107
-
108
- // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
109
- if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
110
- continue;
111
- }
112
-
113
- if (Array.isArray(value)) {
114
- value.forEach((item) => {
115
- xml += `${indent}<${key}>\n`;
116
- if (typeof item === "object") {
117
- convert(item, indent + " ");
118
- } else {
119
- xml += `${indent} ${escapeXML(String(item))}\n`;
120
- }
121
- xml += `${indent}</${key}>\n`;
122
- });
123
- } else if (typeof value === "object") {
124
- xml += `${indent}<${key}>\n`;
125
- convert(value, indent + " ");
126
- xml += `${indent}</${key}>\n`;
127
- } else {
128
- xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
129
- }
130
- }
131
- };
132
-
133
- convert(preparedObj);
134
- xml += `</${rootName}>`;
135
- return xml;
136
- }
137
-
138
- export default function ExportButtons({ isComplete, extractionResult }) {
139
- const [downloading, setDownloading] = useState(null);
140
- const [copied, setCopied] = useState(false);
141
-
142
- const handleDownload = (format) => {
143
- if (!extractionResult || !extractionResult.fields) {
144
- console.error("No extraction data available");
145
- return;
146
- }
147
-
148
- setDownloading(format);
149
-
150
- try {
151
- const fields = extractionResult.fields;
152
- let content = "";
153
- let filename = "";
154
- let mimeType = "";
155
-
156
- if (format === "json") {
157
- const preparedFields = prepareFieldsForOutput(fields, "json");
158
- content = JSON.stringify(preparedFields, null, 2);
159
- filename = `extraction_${new Date().toISOString().split('T')[0]}.json`;
160
- mimeType = "application/json";
161
- } else if (format === "xml") {
162
- content = objectToXML(fields);
163
- filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`;
164
- mimeType = "application/xml";
165
- }
166
-
167
- // Create blob and download
168
- const blob = new Blob([content], { type: mimeType });
169
- const url = URL.createObjectURL(blob);
170
- const link = document.createElement("a");
171
- link.href = url;
172
- link.download = filename;
173
- document.body.appendChild(link);
174
- link.click();
175
- document.body.removeChild(link);
176
- URL.revokeObjectURL(url);
177
-
178
- setDownloading(null);
179
- } catch (error) {
180
- console.error("Download error:", error);
181
- setDownloading(null);
182
- }
183
- };
184
-
185
- const handleCopyLink = () => {
186
- setCopied(true);
187
- setTimeout(() => setCopied(false), 2000);
188
- };
189
-
190
- if (!isComplete) return null;
191
-
192
- return (
193
- <motion.div
194
- initial={{ opacity: 0, y: 20 }}
195
- animate={{ opacity: 1, y: 0 }}
196
- className="flex items-center gap-3"
197
- >
198
- {/* JSON Download */}
199
- <Button
200
- onClick={() => handleDownload("json")}
201
- disabled={downloading === "json"}
202
- className={cn(
203
- "h-11 px-5 rounded-xl font-semibold transition-all duration-200",
204
- "bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700",
205
- "shadow-lg shadow-indigo-500/25 hover:shadow-xl hover:shadow-indigo-500/30",
206
- "text-white"
207
- )}
208
- >
209
- <AnimatePresence mode="wait">
210
- {downloading === "json" ? (
211
- <motion.div
212
- key="loading"
213
- initial={{ opacity: 0, scale: 0.8 }}
214
- animate={{ opacity: 1, scale: 1 }}
215
- exit={{ opacity: 0, scale: 0.8 }}
216
- className="flex items-center gap-2"
217
- >
218
- <motion.div
219
- animate={{ rotate: 360 }}
220
- transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
221
- >
222
- <Download className="h-4 w-4" />
223
- </motion.div>
224
- Downloading...
225
- </motion.div>
226
- ) : (
227
- <motion.div
228
- key="default"
229
- initial={{ opacity: 0, scale: 0.8 }}
230
- animate={{ opacity: 1, scale: 1 }}
231
- exit={{ opacity: 0, scale: 0.8 }}
232
- className="flex items-center gap-2"
233
- >
234
- <Braces className="h-4 w-4" />
235
- Download JSON
236
- </motion.div>
237
- )}
238
- </AnimatePresence>
239
- </Button>
240
-
241
- {/* XML Download */}
242
- <Button
243
- onClick={() => handleDownload("xml")}
244
- disabled={downloading === "xml"}
245
- variant="outline"
246
- className={cn(
247
- "h-11 px-5 rounded-xl font-semibold transition-all duration-200",
248
- "border-2 border-slate-200 hover:border-slate-300",
249
- "hover:bg-slate-50"
250
- )}
251
- >
252
- <AnimatePresence mode="wait">
253
- {downloading === "xml" ? (
254
- <motion.div
255
- key="loading"
256
- initial={{ opacity: 0, scale: 0.8 }}
257
- animate={{ opacity: 1, scale: 1 }}
258
- exit={{ opacity: 0, scale: 0.8 }}
259
- className="flex items-center gap-2"
260
- >
261
- <motion.div
262
- animate={{ rotate: 360 }}
263
- transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
264
- >
265
- <Download className="h-4 w-4" />
266
- </motion.div>
267
- Downloading...
268
- </motion.div>
269
- ) : (
270
- <motion.div
271
- key="default"
272
- initial={{ opacity: 0, scale: 0.8 }}
273
- animate={{ opacity: 1, scale: 1 }}
274
- exit={{ opacity: 0, scale: 0.8 }}
275
- className="flex items-center gap-2"
276
- >
277
- <FileCode2 className="h-4 w-4" />
278
- Download XML
279
- </motion.div>
280
- )}
281
- </AnimatePresence>
282
- </Button>
283
-
284
- {/* More Options Dropdown */}
285
- <DropdownMenu>
286
- <DropdownMenuTrigger asChild>
287
- <Button variant="ghost" className="h-11 w-11 rounded-xl">
288
- <Share2 className="h-4 w-4" />
289
- </Button>
290
- </DropdownMenuTrigger>
291
- <DropdownMenuContent align="end" className="w-48 rounded-xl p-2">
292
- <DropdownMenuItem
293
- className="rounded-lg cursor-pointer"
294
- onClick={handleCopyLink}
295
- >
296
- {copied ? (
297
- <Check className="h-4 w-4 mr-2 text-emerald-500" />
298
- ) : (
299
- <Link2 className="h-4 w-4 mr-2" />
300
- )}
301
- {copied ? "Link copied!" : "Copy share link"}
302
- </DropdownMenuItem>
303
- <DropdownMenuItem className="rounded-lg cursor-pointer">
304
- <Copy className="h-4 w-4 mr-2" />
305
- Copy to clipboard
306
- </DropdownMenuItem>
307
- <DropdownMenuSeparator />
308
- <DropdownMenuItem className="rounded-lg cursor-pointer">
309
- <Mail className="h-4 w-4 mr-2" />
310
- Send via email
311
- </DropdownMenuItem>
312
- <DropdownMenuItem className="rounded-lg cursor-pointer">
313
- <FileJson className="h-4 w-4 mr-2" />
314
- Export to Google Sheets
315
- </DropdownMenuItem>
316
- </DropdownMenuContent>
317
- </DropdownMenu>
318
- </motion.div>
319
- );
320
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState } from "react";
2
+ import { motion, AnimatePresence } from "framer-motion";
3
+ import {
4
+ Download,
5
+ Braces,
6
+ FileCode2,
7
+ Check,
8
+ Share2,
9
+ FileText,
10
+ Link2,
11
+ Mail,
12
+ } from "lucide-react";
13
+ import { Button } from "@/components/ui/button";
14
+ import {
15
+ DropdownMenu,
16
+ DropdownMenuContent,
17
+ DropdownMenuItem,
18
+ DropdownMenuSeparator,
19
+ DropdownMenuTrigger,
20
+ } from "@/components/ui/dropdown-menu";
21
+ import { cn } from "@/lib/utils";
22
+ import ShareModal from "@/components/ShareModal";
23
+ import ShareLinkModal from "@/components/ShareLinkModal";
24
+ import { shareExtraction, createShareLink } from "@/services/api";
25
+
26
+ // Helper functions from ExtractionOutput
27
+ function prepareFieldsForOutput(fields, format = "json") {
28
+ if (!fields || typeof fields !== "object") {
29
+ return fields;
30
+ }
31
+
32
+ const output = { ...fields };
33
+
34
+ // Extract Fields from root level if it exists
35
+ const rootFields = output.Fields;
36
+ // Remove Fields from output temporarily (will be added back at top)
37
+ delete output.Fields;
38
+
39
+ // Remove full_text from top-level if pages array exists (to avoid duplication)
40
+ if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
41
+ delete output.full_text;
42
+
43
+ // Clean up each page: remove full_text from page.fields (it duplicates page.text)
44
+ output.pages = output.pages.map(page => {
45
+ const cleanedPage = { ...page };
46
+ if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
47
+ const cleanedFields = { ...cleanedPage.fields };
48
+ // Remove full_text from page fields (duplicates page.text)
49
+ delete cleanedFields.full_text;
50
+ cleanedPage.fields = cleanedFields;
51
+ }
52
+ return cleanedPage;
53
+ });
54
+ }
55
+
56
+ // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
57
+ if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
58
+ // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
59
+ const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields"));
60
+
61
+ output.pages.forEach((page, idx) => {
62
+ const pageNum = page.page_number || idx + 1;
63
+ const pageFields = page.fields || {};
64
+
65
+ // Remove duplicate fields from page.fields:
66
+ // 1. Remove full_text (duplicates page.text)
67
+ // 2. Remove fields that match top-level fields (already shown at root)
68
+ const cleanedPageFields = {};
69
+ for (const [key, value] of Object.entries(pageFields)) {
70
+ // Skip full_text and fields that match top-level exactly
71
+ if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
72
+ cleanedPageFields[key] = value;
73
+ }
74
+ }
75
+
76
+ const pageObj = {
77
+ text: page.text || "",
78
+ confidence: page.confidence || 0,
79
+ doc_type: page.doc_type || "other"
80
+ };
81
+
82
+ // Add table and footer_notes if they exist
83
+ if (page.table && Array.isArray(page.table) && page.table.length > 0) {
84
+ pageObj.table = page.table;
85
+ }
86
+ if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) {
87
+ pageObj.footer_notes = page.footer_notes;
88
+ }
89
+
90
+ // Only add fields if there are unique page-specific fields
91
+ if (Object.keys(cleanedPageFields).length > 0) {
92
+ pageObj.fields = cleanedPageFields;
93
+ }
94
+
95
+ output[`page_${pageNum}`] = pageObj;
96
+ });
97
+ // Remove pages array - we now have page_1, page_2, etc. as separate fields
98
+ delete output.pages;
99
+ }
100
+
101
+ // Handle page_X structure (from backend) - remove Fields from page objects if they exist
102
+ if (output && typeof output === "object") {
103
+ const pageKeys = Object.keys(output).filter(k => k.startsWith("page_"));
104
+ for (const pageKey of pageKeys) {
105
+ const pageData = output[pageKey];
106
+ if (pageData && typeof pageData === "object") {
107
+ // Remove Fields from page objects (it's now at root level)
108
+ delete pageData.Fields;
109
+ delete pageData.metadata;
110
+ }
111
+ }
112
+ }
113
+
114
+ // Rebuild output with Fields at the top (only if it exists and is not empty)
115
+ const finalOutput = {};
116
+ if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) {
117
+ finalOutput.Fields = rootFields;
118
+ }
119
+
120
+ // Add all other keys
121
+ Object.keys(output).forEach(key => {
122
+ finalOutput[key] = output[key];
123
+ });
124
+
125
+ return finalOutput;
126
+ }
127
+
128
+ function escapeXML(str) {
129
+ return str
130
+ .replace(/&/g, "&amp;")
131
+ .replace(/</g, "&lt;")
132
+ .replace(/>/g, "&gt;")
133
+ .replace(/"/g, "&quot;")
134
+ .replace(/'/g, "&apos;");
135
+ }
136
+
137
+ function objectToXML(obj, rootName = "extraction") {
138
+ // Prepare fields - remove full_text if pages exist
139
+ const preparedObj = prepareFieldsForOutput(obj, "xml");
140
+
141
+ let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
142
+
143
+ const convert = (obj, indent = " ") => {
144
+ for (const [key, value] of Object.entries(obj)) {
145
+ if (value === null || value === undefined) continue;
146
+
147
+ // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
148
+ if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
149
+ continue;
150
+ }
151
+
152
+ if (Array.isArray(value)) {
153
+ value.forEach((item) => {
154
+ xml += `${indent}<${key}>\n`;
155
+ if (typeof item === "object") {
156
+ convert(item, indent + " ");
157
+ } else {
158
+ xml += `${indent} ${escapeXML(String(item))}\n`;
159
+ }
160
+ xml += `${indent}</${key}>\n`;
161
+ });
162
+ } else if (typeof value === "object") {
163
+ xml += `${indent}<${key}>\n`;
164
+ convert(value, indent + " ");
165
+ xml += `${indent}</${key}>\n`;
166
+ } else {
167
+ xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
168
+ }
169
+ }
170
+ };
171
+
172
+ convert(preparedObj);
173
+ xml += `</${rootName}>`;
174
+ return xml;
175
+ }
176
+
177
+ export default function ExportButtons({ isComplete, extractionResult }) {
178
+ const [downloading, setDownloading] = useState(null);
179
+ const [copied, setCopied] = useState(false);
180
+ const [isShareModalOpen, setIsShareModalOpen] = useState(false);
181
+ const [isShareLinkModalOpen, setIsShareLinkModalOpen] = useState(false);
182
+ const [shareLink, setShareLink] = useState("");
183
+ const [isGeneratingLink, setIsGeneratingLink] = useState(false);
184
+
185
+ // Helper function to extract text from fields (same as in ExtractionOutput)
186
+ const extractTextFromFields = (fields) => {
187
+ if (!fields || typeof fields !== "object") {
188
+ return "";
189
+ }
190
+
191
+ // Check for page_X structure first (preferred format)
192
+ const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_"));
193
+ if (pageKeys.length > 0) {
194
+ // Get text from first page (or combine all pages)
195
+ const pageTexts = pageKeys.map(key => {
196
+ const page = fields[key];
197
+ if (page && page.text) {
198
+ return page.text;
199
+ }
200
+ return "";
201
+ }).filter(text => text);
202
+
203
+ if (pageTexts.length > 0) {
204
+ return pageTexts.join("\n\n");
205
+ }
206
+ }
207
+
208
+ // Fallback to full_text
209
+ if (fields.full_text) {
210
+ return fields.full_text;
211
+ }
212
+
213
+ return "";
214
+ };
215
+
216
+ // Helper function to escape HTML
217
+ const escapeHtml = (text) => {
218
+ if (!text) return '';
219
+ const div = document.createElement('div');
220
+ div.textContent = text;
221
+ return div.innerHTML;
222
+ };
223
+
224
+ // Helper function to convert pipe-separated tables to HTML tables
225
+ const convertPipeTablesToHTML = (text) => {
226
+ if (!text) return text;
227
+
228
+ const lines = text.split('\n');
229
+ const result = [];
230
+ let i = 0;
231
+
232
+ while (i < lines.length) {
233
+ const line = lines[i];
234
+
235
+ // Check if this line looks like a table row (has multiple pipes)
236
+ if (line.includes('|') && line.split('|').length >= 3) {
237
+ // Check if it's a separator line (only |, -, :, spaces)
238
+ const isSeparator = /^[\s|\-:]+$/.test(line.trim());
239
+
240
+ if (!isSeparator) {
241
+ // Start of a table - collect all table rows
242
+ const tableRows = [];
243
+ let j = i;
244
+
245
+ // Collect header row
246
+ const headerLine = lines[j];
247
+ const headerCells = headerLine.split('|').map(cell => cell.trim()).filter(cell => cell || cell === '');
248
+ // Remove empty cells at start/end
249
+ if (headerCells.length > 0 && !headerCells[0]) headerCells.shift();
250
+ if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop();
251
+
252
+ if (headerCells.length >= 2) {
253
+ tableRows.push(headerCells);
254
+ j++;
255
+
256
+ // Skip separator line if present
257
+ if (j < lines.length && /^[\s|\-:]+$/.test(lines[j].trim())) {
258
+ j++;
259
+ }
260
+
261
+ // Collect data rows
262
+ while (j < lines.length) {
263
+ const rowLine = lines[j];
264
+ if (!rowLine.trim()) break; // Empty line ends table
265
+
266
+ // Check if it's still a table row
267
+ if (rowLine.includes('|') && rowLine.split('|').length >= 2) {
268
+ const isRowSeparator = /^[\s|\-:]+$/.test(rowLine.trim());
269
+ if (!isRowSeparator) {
270
+ const rowCells = rowLine.split('|').map(cell => cell.trim());
271
+ // Remove empty cells at start/end
272
+ if (rowCells.length > 0 && !rowCells[0]) rowCells.shift();
273
+ if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop();
274
+ tableRows.push(rowCells);
275
+ j++;
276
+ } else {
277
+ j++;
278
+ }
279
+ } else {
280
+ break; // Not a table row anymore
281
+ }
282
+ }
283
+
284
+ // Convert to HTML table
285
+ if (tableRows.length > 0) {
286
+ let htmlTable = '<table class="border-collapse border border-gray-300 w-full my-4">\n<thead>\n<tr>';
287
+
288
+ // Header row
289
+ tableRows[0].forEach(cell => {
290
+ htmlTable += `<th class="border border-gray-300 px-4 py-2 bg-gray-100 font-semibold text-left">${escapeHtml(cell)}</th>`;
291
+ });
292
+ htmlTable += '</tr>\n</thead>\n<tbody>\n';
293
+
294
+ // Data rows
295
+ for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) {
296
+ htmlTable += '<tr>';
297
+ tableRows[rowIdx].forEach((cell, colIdx) => {
298
+ // Use header cell count to ensure alignment
299
+ const cellContent = cell || '';
300
+ htmlTable += `<td class="border border-gray-300 px-4 py-2">${escapeHtml(cellContent)}</td>`;
301
+ });
302
+ htmlTable += '</tr>\n';
303
+ }
304
+
305
+ htmlTable += '</tbody>\n</table>';
306
+ result.push(htmlTable);
307
+ i = j;
308
+ continue;
309
+ }
310
+ }
311
+ }
312
+ }
313
+
314
+ // Not a table row, add as-is
315
+ result.push(line);
316
+ i++;
317
+ }
318
+
319
+ return result.join('\n');
320
+ };
321
+
322
+ // Helper function to render markdown to HTML (same as in ExtractionOutput)
323
+ const renderMarkdownToHTML = (text) => {
324
+ if (!text) return "";
325
+
326
+ let html = text;
327
+
328
+ // FIRST: Convert pipe-separated tables to HTML tables
329
+ html = convertPipeTablesToHTML(html);
330
+
331
+ // Convert LaTeX-style superscripts/subscripts FIRST
332
+ html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '<sup>$1</sup>');
333
+ html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '<sup>$1</sup>');
334
+ html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '<sub>$1</sub>');
335
+ html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '<sub>$1</sub>');
336
+
337
+ // Protect HTML table blocks
338
+ const htmlBlocks = [];
339
+ let htmlBlockIndex = 0;
340
+
341
+ html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => {
342
+ const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`;
343
+ htmlBlocks[htmlBlockIndex] = match;
344
+ htmlBlockIndex++;
345
+ return placeholder;
346
+ });
347
+
348
+ // Convert markdown headers
349
+ html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>');
350
+ html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>');
351
+ html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>');
352
+
353
+ // Convert markdown bold/italic
354
+ html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
355
+ html = html.replace(/\*(.*?)\*/g, '<em>$1</em>');
356
+
357
+ // Convert markdown links
358
+ html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>');
359
+
360
+ // Process line breaks
361
+ const parts = html.split(/(__HTML_BLOCK_\d+__)/);
362
+ const processedParts = parts.map((part) => {
363
+ if (part.match(/^__HTML_BLOCK_\d+__$/)) {
364
+ const blockIndex = parseInt(part.match(/\d+/)[0]);
365
+ return htmlBlocks[blockIndex];
366
+ } else {
367
+ let processed = part;
368
+ processed = processed.replace(/\n\n+/g, '</p><p>');
369
+ processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2');
370
+ if (processed.trim() && !processed.trim().startsWith('<')) {
371
+ processed = '<p>' + processed + '</p>';
372
+ }
373
+ return processed;
374
+ }
375
+ });
376
+
377
+ html = processedParts.join('');
378
+ html = html.replace(/<p><\/p>/g, '');
379
+ html = html.replace(/<p>\s*<br>\s*<\/p>/g, '');
380
+ html = html.replace(/<p>\s*<\/p>/g, '');
381
+
382
+ return html;
383
+ };
384
+
385
+ const handleDownload = async (format) => {
386
+ if (!extractionResult || !extractionResult.fields) {
387
+ console.error("No extraction data available");
388
+ return;
389
+ }
390
+
391
+ setDownloading(format);
392
+
393
+ try {
394
+ const fields = extractionResult.fields;
395
+ let content = "";
396
+ let filename = "";
397
+ let mimeType = "";
398
+
399
+ if (format === "json") {
400
+ const preparedFields = prepareFieldsForOutput(fields, "json");
401
+ content = JSON.stringify(preparedFields, null, 2);
402
+ filename = `extraction_${new Date().toISOString().split('T')[0]}.json`;
403
+ mimeType = "application/json";
404
+ } else if (format === "xml") {
405
+ content = objectToXML(fields);
406
+ filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`;
407
+ mimeType = "application/xml";
408
+ } else if (format === "docx") {
409
+ // For DOCX, create a Word-compatible HTML document that preserves layout
410
+ // Extract text and convert to HTML (same as text viewer)
411
+ const textContent = extractTextFromFields(fields);
412
+ const htmlContent = renderMarkdownToHTML(textContent);
413
+
414
+ // Create a Word-compatible HTML document with proper MIME type
415
+ // Word can open HTML files with .docx extension if we use the right MIME type
416
+ const wordHTML = `<!DOCTYPE html>
417
+ <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns="http://www.w3.org/TR/REC-html40">
418
+ <head>
419
+ <meta charset="UTF-8">
420
+ <meta name="ProgId" content="Word.Document">
421
+ <meta name="Generator" content="Microsoft Word">
422
+ <meta name="Originator" content="Microsoft Word">
423
+ <!--[if gte mso 9]><xml>
424
+ <w:WordDocument>
425
+ <w:View>Print</w:View>
426
+ <w:Zoom>100</w:Zoom>
427
+ <w:DoNotOptimizeForBrowser/>
428
+ </w:WordDocument>
429
+ </xml><![endif]-->
430
+ <title>Document Extraction</title>
431
+ <style>
432
+ @page {
433
+ size: 8.5in 11in;
434
+ margin: 1in;
435
+ }
436
+ body {
437
+ font-family: 'Calibri', 'Arial', sans-serif;
438
+ font-size: 11pt;
439
+ line-height: 1.6;
440
+ margin: 0;
441
+ color: #333;
442
+ }
443
+ h1 {
444
+ font-size: 18pt;
445
+ font-weight: bold;
446
+ color: #0f172a;
447
+ margin-top: 24pt;
448
+ margin-bottom: 12pt;
449
+ page-break-after: avoid;
450
+ }
451
+ h2 {
452
+ font-size: 16pt;
453
+ font-weight: 600;
454
+ color: #0f172a;
455
+ margin-top: 20pt;
456
+ margin-bottom: 10pt;
457
+ page-break-after: avoid;
458
+ }
459
+ h3 {
460
+ font-size: 14pt;
461
+ font-weight: 600;
462
+ color: #1e293b;
463
+ margin-top: 16pt;
464
+ margin-bottom: 8pt;
465
+ page-break-after: avoid;
466
+ }
467
+ p {
468
+ margin-top: 6pt;
469
+ margin-bottom: 6pt;
470
+ }
471
+ table {
472
+ width: 100%;
473
+ border-collapse: collapse;
474
+ margin: 12pt 0;
475
+ font-size: 10pt;
476
+ page-break-inside: avoid;
477
+ }
478
+ table th {
479
+ background-color: #f8fafc;
480
+ border: 1pt solid #cbd5e1;
481
+ padding: 6pt;
482
+ text-align: left;
483
+ font-weight: 600;
484
+ color: #0f172a;
485
+ }
486
+ table td {
487
+ border: 1pt solid #cbd5e1;
488
+ padding: 6pt;
489
+ color: #334155;
490
+ }
491
+ table tr:nth-child(even) {
492
+ background-color: #f8fafc;
493
+ }
494
+ sup {
495
+ font-size: 0.75em;
496
+ vertical-align: super;
497
+ line-height: 0;
498
+ }
499
+ sub {
500
+ font-size: 0.75em;
501
+ vertical-align: sub;
502
+ line-height: 0;
503
+ }
504
+ strong {
505
+ font-weight: 600;
506
+ }
507
+ em {
508
+ font-style: italic;
509
+ }
510
+ a {
511
+ color: #4f46e5;
512
+ text-decoration: underline;
513
+ }
514
+ </style>
515
+ </head>
516
+ <body>
517
+ ${htmlContent}
518
+ </body>
519
+ </html>`;
520
+
521
+ content = wordHTML;
522
+ filename = `extraction_${new Date().toISOString().split('T')[0]}.doc`;
523
+ mimeType = "application/msword";
524
+ }
525
+
526
+ // Create blob and download
527
+ const blob = new Blob([content], { type: mimeType });
528
+ const url = URL.createObjectURL(blob);
529
+ const link = document.createElement("a");
530
+ link.href = url;
531
+ link.download = filename;
532
+ document.body.appendChild(link);
533
+ link.click();
534
+ document.body.removeChild(link);
535
+ URL.revokeObjectURL(url);
536
+
537
+ setDownloading(null);
538
+ } catch (error) {
539
+ console.error("Download error:", error);
540
+ setDownloading(null);
541
+ }
542
+ };
543
+
544
+ const handleCopyLink = async () => {
545
+ if (!extractionResult?.id) return;
546
+
547
+ setIsGeneratingLink(true);
548
+ setIsShareLinkModalOpen(true);
549
+ setShareLink("");
550
+
551
+ try {
552
+ const result = await createShareLink(extractionResult.id);
553
+ if (result.success && result.share_link) {
554
+ setShareLink(result.share_link);
555
+ } else {
556
+ throw new Error("Failed to generate share link");
557
+ }
558
+ } catch (err) {
559
+ console.error("Failed to create share link:", err);
560
+ setShareLink("");
561
+ // Still show modal but with error state
562
+ } finally {
563
+ setIsGeneratingLink(false);
564
+ }
565
+ };
566
+
567
+ const handleShare = async (extractionId, recipientEmail) => {
568
+ await shareExtraction(extractionId, recipientEmail);
569
+ };
570
+
571
+ if (!isComplete) return null;
572
+
573
+ return (
574
+ <motion.div
575
+ initial={{ opacity: 0, y: 20 }}
576
+ animate={{ opacity: 1, y: 0 }}
577
+ className="flex items-center gap-3"
578
+ >
579
+ {/* Export Options Dropdown */}
580
+ <DropdownMenu>
581
+ <DropdownMenuTrigger asChild>
582
+ <Button
583
+ variant="ghost"
584
+ className="h-11 w-11 rounded-xl hover:bg-slate-100"
585
+ disabled={downloading !== null}
586
+ >
587
+ {downloading ? (
588
+ <motion.div
589
+ animate={{ rotate: 360 }}
590
+ transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
591
+ >
592
+ <Download className="h-4 w-4" />
593
+ </motion.div>
594
+ ) : (
595
+ <Share2 className="h-4 w-4" />
596
+ )}
597
+ </Button>
598
+ </DropdownMenuTrigger>
599
+ <DropdownMenuContent align="end" className="w-56 rounded-xl p-2">
600
+ <DropdownMenuItem
601
+ className="rounded-lg cursor-pointer"
602
+ onClick={() => setIsShareModalOpen(true)}
603
+ >
604
+ <Mail className="h-4 w-4 mr-2 text-indigo-600" />
605
+ Share output
606
+ </DropdownMenuItem>
607
+ <DropdownMenuItem
608
+ className="rounded-lg cursor-pointer"
609
+ onClick={handleCopyLink}
610
+ >
611
+ <Link2 className="h-4 w-4 mr-2 text-indigo-600" />
612
+ Copy share link
613
+ </DropdownMenuItem>
614
+ <DropdownMenuSeparator />
615
+ <DropdownMenuItem
616
+ className="rounded-lg cursor-pointer"
617
+ onClick={() => handleDownload("docx")}
618
+ disabled={downloading === "docx"}
619
+ >
620
+ {downloading === "docx" ? (
621
+ <motion.div
622
+ animate={{ rotate: 360 }}
623
+ transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
624
+ className="h-4 w-4 mr-2"
625
+ >
626
+ <Download className="h-4 w-4" />
627
+ </motion.div>
628
+ ) : (
629
+ <FileText className="h-4 w-4 mr-2 text-blue-600" />
630
+ )}
631
+ Download Docx
632
+ </DropdownMenuItem>
633
+ <DropdownMenuItem
634
+ className="rounded-lg cursor-pointer"
635
+ onClick={() => handleDownload("json")}
636
+ disabled={downloading === "json"}
637
+ >
638
+ {downloading === "json" ? (
639
+ <motion.div
640
+ animate={{ rotate: 360 }}
641
+ transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
642
+ className="h-4 w-4 mr-2"
643
+ >
644
+ <Download className="h-4 w-4" />
645
+ </motion.div>
646
+ ) : (
647
+ <Braces className="h-4 w-4 mr-2 text-indigo-600" />
648
+ )}
649
+ Download JSON
650
+ </DropdownMenuItem>
651
+ <DropdownMenuItem
652
+ className="rounded-lg cursor-pointer"
653
+ onClick={() => handleDownload("xml")}
654
+ disabled={downloading === "xml"}
655
+ >
656
+ {downloading === "xml" ? (
657
+ <motion.div
658
+ animate={{ rotate: 360 }}
659
+ transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
660
+ className="h-4 w-4 mr-2"
661
+ >
662
+ <Download className="h-4 w-4" />
663
+ </motion.div>
664
+ ) : (
665
+ <FileCode2 className="h-4 w-4 mr-2 text-slate-600" />
666
+ )}
667
+ Download XML
668
+ </DropdownMenuItem>
669
+ </DropdownMenuContent>
670
+ </DropdownMenu>
671
+
672
+ {/* Share Modal */}
673
+ <ShareModal
674
+ isOpen={isShareModalOpen}
675
+ onClose={() => setIsShareModalOpen(false)}
676
+ onShare={handleShare}
677
+ extractionId={extractionResult?.id}
678
+ />
679
+
680
+ {/* Share Link Modal */}
681
+ <ShareLinkModal
682
+ isOpen={isShareLinkModalOpen}
683
+ onClose={() => {
684
+ setIsShareLinkModalOpen(false);
685
+ setShareLink("");
686
+ }}
687
+ shareLink={shareLink}
688
+ isLoading={isGeneratingLink}
689
+ />
690
+ </motion.div>
691
+ );
692
+ }
frontend/src/components/ShareLinkModal.jsx ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect } from "react";
2
+ import { motion, AnimatePresence } from "framer-motion";
3
+ import { X, Copy, Check, Loader2 } from "lucide-react";
4
+ import { Button } from "@/components/ui/button";
5
+ import { Input } from "@/components/ui/input";
6
+
7
+ export default function ShareLinkModal({ isOpen, onClose, shareLink, isLoading }) {
8
+ const [copied, setCopied] = useState(false);
9
+
10
+ useEffect(() => {
11
+ if (!isOpen) {
12
+ setCopied(false);
13
+ }
14
+ }, [isOpen]);
15
+
16
+ const handleCopy = async () => {
17
+ if (!shareLink) return;
18
+
19
+ try {
20
+ await navigator.clipboard.writeText(shareLink);
21
+ setCopied(true);
22
+ setTimeout(() => setCopied(false), 2000);
23
+ } catch (err) {
24
+ // Fallback for older browsers
25
+ const textArea = document.createElement("textarea");
26
+ textArea.value = shareLink;
27
+ textArea.style.position = "fixed";
28
+ textArea.style.opacity = "0";
29
+ document.body.appendChild(textArea);
30
+ textArea.select();
31
+ try {
32
+ document.execCommand("copy");
33
+ setCopied(true);
34
+ setTimeout(() => setCopied(false), 2000);
35
+ } catch (fallbackErr) {
36
+ console.error("Failed to copy:", fallbackErr);
37
+ }
38
+ document.body.removeChild(textArea);
39
+ }
40
+ };
41
+
42
+ if (!isOpen) return null;
43
+
44
+ return (
45
+ <AnimatePresence>
46
+ <div className="fixed inset-0 z-50 flex items-center justify-center">
47
+ {/* Backdrop */}
48
+ <motion.div
49
+ initial={{ opacity: 0 }}
50
+ animate={{ opacity: 1 }}
51
+ exit={{ opacity: 0 }}
52
+ className="absolute inset-0 bg-black/50 backdrop-blur-sm"
53
+ onClick={onClose}
54
+ />
55
+
56
+ {/* Modal */}
57
+ <motion.div
58
+ initial={{ opacity: 0, scale: 0.95, y: 20 }}
59
+ animate={{ opacity: 1, scale: 1, y: 0 }}
60
+ exit={{ opacity: 0, scale: 0.95, y: 20 }}
61
+ className="relative z-10 w-full max-w-md mx-4 bg-white rounded-2xl shadow-2xl overflow-hidden"
62
+ onClick={(e) => e.stopPropagation()}
63
+ >
64
+ {/* Header */}
65
+ <div className="px-6 py-4 border-b border-slate-200 flex items-center justify-between">
66
+ <h2 className="text-xl font-semibold text-slate-900">Copy Share Link</h2>
67
+ <button
68
+ onClick={onClose}
69
+ disabled={isLoading}
70
+ className="p-2 rounded-lg hover:bg-slate-100 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
71
+ >
72
+ <X className="h-5 w-5 text-slate-500" />
73
+ </button>
74
+ </div>
75
+
76
+ {/* Content */}
77
+ <div className="px-6 py-6">
78
+ {isLoading ? (
79
+ <div className="text-center py-8">
80
+ <Loader2 className="h-8 w-8 mx-auto mb-4 text-indigo-600 animate-spin" />
81
+ <p className="text-sm text-slate-600">Generating share link...</p>
82
+ </div>
83
+ ) : shareLink ? (
84
+ <div className="space-y-4">
85
+ <div>
86
+ <label className="block text-sm font-medium text-slate-700 mb-2">
87
+ Share Link
88
+ </label>
89
+ <div className="flex gap-2">
90
+ <Input
91
+ type="text"
92
+ value={shareLink}
93
+ readOnly
94
+ className="flex-1 h-12 rounded-xl border-slate-200 bg-slate-50 text-sm font-mono"
95
+ />
96
+ <Button
97
+ onClick={handleCopy}
98
+ className="h-12 px-4 rounded-xl bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700"
99
+ >
100
+ {copied ? (
101
+ <>
102
+ <Check className="h-4 w-4 mr-2" />
103
+ Copied!
104
+ </>
105
+ ) : (
106
+ <>
107
+ <Copy className="h-4 w-4 mr-2" />
108
+ Copy
109
+ </>
110
+ )}
111
+ </Button>
112
+ </div>
113
+ </div>
114
+ <p className="text-xs text-slate-500">
115
+ Share this link with anyone you want to give access to this extraction. They'll need to sign in to view it.
116
+ </p>
117
+ </div>
118
+ ) : (
119
+ <div className="text-center py-8">
120
+ <p className="text-sm text-slate-600">No share link available</p>
121
+ </div>
122
+ )}
123
+
124
+ <div className="pt-4 mt-6 border-t border-slate-200">
125
+ <Button
126
+ type="button"
127
+ variant="outline"
128
+ onClick={onClose}
129
+ disabled={isLoading}
130
+ className="w-full h-11 rounded-xl"
131
+ >
132
+ Close
133
+ </Button>
134
+ </div>
135
+ </div>
136
+ </motion.div>
137
+ </div>
138
+ </AnimatePresence>
139
+ );
140
+ }
141
+
frontend/src/components/ShareModal.jsx ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState } from "react";
2
+ import { motion, AnimatePresence } from "framer-motion";
3
+ import { X, Mail, Send, Loader2 } from "lucide-react";
4
+ import { Button } from "@/components/ui/button";
5
+ import { Input } from "@/components/ui/input";
6
+
7
+ export default function ShareModal({ isOpen, onClose, onShare, extractionId }) {
8
+ const [email, setEmail] = useState("");
9
+ const [isLoading, setIsLoading] = useState(false);
10
+ const [error, setError] = useState("");
11
+ const [success, setSuccess] = useState(false);
12
+ const [successMessage, setSuccessMessage] = useState("");
13
+
14
+ const handleSubmit = async (e) => {
15
+ e.preventDefault();
16
+ setError("");
17
+ setSuccess(false);
18
+
19
+ // Parse and validate multiple emails (comma or semicolon separated)
20
+ if (!email.trim()) {
21
+ setError("Please enter at least one recipient email address");
22
+ return;
23
+ }
24
+
25
+ // Split by comma or semicolon, trim each email, and filter out empty strings
26
+ const emailList = email
27
+ .split(/[,;]/)
28
+ .map((e) => e.trim())
29
+ .filter((e) => e.length > 0);
30
+
31
+ if (emailList.length === 0) {
32
+ setError("Please enter at least one recipient email address");
33
+ return;
34
+ }
35
+
36
+ // Validate each email
37
+ const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
38
+ const invalidEmails = emailList.filter((e) => !emailRegex.test(e));
39
+
40
+ if (invalidEmails.length > 0) {
41
+ setError(`Invalid email address(es): ${invalidEmails.join(", ")}`);
42
+ return;
43
+ }
44
+
45
+ setIsLoading(true);
46
+ try {
47
+ const result = await onShare(extractionId, emailList);
48
+ setSuccessMessage(result?.message || `Successfully shared with ${emailList.length} recipient(s)`);
49
+ setSuccess(true);
50
+ setEmail("");
51
+ // Close modal after 2 seconds
52
+ setTimeout(() => {
53
+ setSuccess(false);
54
+ setSuccessMessage("");
55
+ onClose();
56
+ }, 2000);
57
+ } catch (err) {
58
+ setError(err.message || "Failed to share extraction. Please try again.");
59
+ } finally {
60
+ setIsLoading(false);
61
+ }
62
+ };
63
+
64
+ const handleClose = () => {
65
+ if (!isLoading) {
66
+ setEmail("");
67
+ setError("");
68
+ setSuccess(false);
69
+ onClose();
70
+ }
71
+ };
72
+
73
+ if (!isOpen) return null;
74
+
75
+ return (
76
+ <AnimatePresence>
77
+ <div className="fixed inset-0 z-50 flex items-center justify-center">
78
+ {/* Backdrop */}
79
+ <motion.div
80
+ initial={{ opacity: 0 }}
81
+ animate={{ opacity: 1 }}
82
+ exit={{ opacity: 0 }}
83
+ className="absolute inset-0 bg-black/50 backdrop-blur-sm"
84
+ onClick={handleClose}
85
+ />
86
+
87
+ {/* Modal */}
88
+ <motion.div
89
+ initial={{ opacity: 0, scale: 0.95, y: 20 }}
90
+ animate={{ opacity: 1, scale: 1, y: 0 }}
91
+ exit={{ opacity: 0, scale: 0.95, y: 20 }}
92
+ className="relative z-10 w-full max-w-md mx-4 bg-white rounded-2xl shadow-2xl overflow-hidden"
93
+ onClick={(e) => e.stopPropagation()}
94
+ >
95
+ {/* Header */}
96
+ <div className="px-6 py-4 border-b border-slate-200 flex items-center justify-between">
97
+ <h2 className="text-xl font-semibold text-slate-900">Share Output</h2>
98
+ <button
99
+ onClick={handleClose}
100
+ disabled={isLoading}
101
+ className="p-2 rounded-lg hover:bg-slate-100 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
102
+ >
103
+ <X className="h-5 w-5 text-slate-500" />
104
+ </button>
105
+ </div>
106
+
107
+ {/* Content */}
108
+ <div className="px-6 py-6">
109
+ {success ? (
110
+ <motion.div
111
+ initial={{ opacity: 0, scale: 0.9 }}
112
+ animate={{ opacity: 1, scale: 1 }}
113
+ className="text-center py-8"
114
+ >
115
+ <div className="w-16 h-16 mx-auto mb-4 rounded-full bg-emerald-100 flex items-center justify-center">
116
+ <Send className="h-8 w-8 text-emerald-600" />
117
+ </div>
118
+ <h3 className="text-lg font-semibold text-slate-900 mb-2">
119
+ Share Sent Successfully!
120
+ </h3>
121
+ <p className="text-sm text-slate-600">
122
+ {successMessage || "The recipient(s) will receive an email with a link to view the extraction."}
123
+ </p>
124
+ </motion.div>
125
+ ) : (
126
+ <form onSubmit={handleSubmit} className="space-y-4">
127
+ <div>
128
+ <label
129
+ htmlFor="recipient-email"
130
+ className="block text-sm font-medium text-slate-700 mb-2"
131
+ >
132
+ Recipient Email(s)
133
+ </label>
134
+ <p className="text-xs text-slate-500 mb-2">
135
+ Separate multiple emails with commas or semicolons
136
+ </p>
137
+ <div className="relative">
138
+ <Mail className="absolute left-3 top-1/2 -translate-y-1/2 h-5 w-5 text-slate-400" />
139
+ <Input
140
+ id="recipient-email"
141
+ type="text"
142
+ value={email}
143
+ onChange={(e) => setEmail(e.target.value)}
144
+ placeholder="Enter email addresses (comma or semicolon separated)"
145
+ className="pl-10 h-12 rounded-xl border-slate-200 focus:border-indigo-500 focus:ring-indigo-500"
146
+ disabled={isLoading}
147
+ autoFocus
148
+ />
149
+ </div>
150
+ {error && (
151
+ <motion.p
152
+ initial={{ opacity: 0, y: -10 }}
153
+ animate={{ opacity: 1, y: 0 }}
154
+ className="mt-2 text-sm text-red-600"
155
+ >
156
+ {error}
157
+ </motion.p>
158
+ )}
159
+ </div>
160
+
161
+ <div className="pt-4 flex gap-3">
162
+ <Button
163
+ type="button"
164
+ variant="outline"
165
+ onClick={handleClose}
166
+ disabled={isLoading}
167
+ className="flex-1 h-11 rounded-xl"
168
+ >
169
+ Cancel
170
+ </Button>
171
+ <Button
172
+ type="submit"
173
+ disabled={isLoading || !email.trim()}
174
+ className="flex-1 h-11 rounded-xl bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700"
175
+ >
176
+ {isLoading ? (
177
+ <>
178
+ <Loader2 className="h-4 w-4 mr-2 animate-spin" />
179
+ Sending...
180
+ </>
181
+ ) : (
182
+ <>
183
+ <Send className="h-4 w-4 mr-2" />
184
+ Send
185
+ </>
186
+ )}
187
+ </Button>
188
+ </div>
189
+ </form>
190
+ )}
191
+ </div>
192
+ </motion.div>
193
+ </div>
194
+ </AnimatePresence>
195
+ );
196
+ }
197
+
frontend/src/components/auth/LoginForm.jsx ADDED
@@ -0,0 +1,512 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState } from "react";
2
+ import { motion } from "framer-motion";
3
+ import { Button } from "@/components/ui/button";
4
+ import { Input } from "@/components/ui/input";
5
+ import { Separator } from "@/components/ui/separator";
6
+ import {
7
+ Zap,
8
+ Target,
9
+ Upload,
10
+ CheckCircle2,
11
+ ArrowRight,
12
+ Mail,
13
+ Sparkles,
14
+ Shield,
15
+ Globe,
16
+ AlertCircle,
17
+ Loader2,
18
+ } from "lucide-react";
19
+ import { useAuth } from "@/contexts/AuthContext";
20
+
21
+ export default function LoginForm() {
22
+ const { firebaseLogin, requestOTP, verifyOTP } = useAuth();
23
+ const [email, setEmail] = useState("");
24
+ const [showOtp, setShowOtp] = useState(false);
25
+ const [otp, setOtp] = useState(["", "", "", "", "", ""]);
26
+ const [loading, setLoading] = useState(false);
27
+ const [error, setError] = useState("");
28
+
29
+ // Business email validation
30
+ const PERSONAL_EMAIL_DOMAINS = [
31
+ "gmail.com",
32
+ "yahoo.com",
33
+ "hotmail.com",
34
+ "outlook.com",
35
+ "aol.com",
36
+ "icloud.com",
37
+ "mail.com",
38
+ "protonmail.com",
39
+ "yandex.com",
40
+ "zoho.com",
41
+ "gmx.com",
42
+ "live.com",
43
+ "msn.com",
44
+ ];
45
+
46
+ const isBusinessEmail = (email) => {
47
+ if (!email || !email.includes("@")) return false;
48
+ const domain = email.split("@")[1].toLowerCase();
49
+ return !PERSONAL_EMAIL_DOMAINS.includes(domain);
50
+ };
51
+
52
+ const handleGoogleLogin = async () => {
53
+ setLoading(true);
54
+ setError("");
55
+ try {
56
+ await firebaseLogin();
57
+ } catch (err) {
58
+ setError(err.message || "Failed to sign in with Google");
59
+ } finally {
60
+ setLoading(false);
61
+ }
62
+ };
63
+
64
+ const handleEmailSubmit = async (e) => {
65
+ e.preventDefault();
66
+ setLoading(true);
67
+ setError("");
68
+
69
+ if (!email) {
70
+ setError("Please enter your email address");
71
+ setLoading(false);
72
+ return;
73
+ }
74
+
75
+ if (!isBusinessEmail(email)) {
76
+ setError("Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, etc.) are not permitted.");
77
+ setLoading(false);
78
+ return;
79
+ }
80
+
81
+ try {
82
+ await requestOTP(email);
83
+ setShowOtp(true);
84
+ } catch (err) {
85
+ setError(err.message || "Failed to send OTP");
86
+ } finally {
87
+ setLoading(false);
88
+ }
89
+ };
90
+
91
+ const handleOtpChange = (index, value) => {
92
+ if (value.length <= 1 && /^\d*$/.test(value)) {
93
+ const newOtp = [...otp];
94
+ newOtp[index] = value;
95
+ setOtp(newOtp);
96
+ setError("");
97
+
98
+ // Auto-focus next input
99
+ if (value && index < 5) {
100
+ const nextInput = document.getElementById(`otp-${index + 1}`);
101
+ nextInput?.focus();
102
+ }
103
+ }
104
+ };
105
+
106
+ const handleOtpPaste = (e, startIndex = 0) => {
107
+ e.preventDefault();
108
+ const pastedData = e.clipboardData.getData("text");
109
+ // Extract only digits from pasted content
110
+ const digits = pastedData.replace(/\D/g, "").slice(0, 6);
111
+
112
+ if (digits.length > 0) {
113
+ const newOtp = [...otp];
114
+ // Fill the OTP array with pasted digits starting from the current field
115
+ for (let i = 0; i < digits.length && (startIndex + i) < 6; i++) {
116
+ newOtp[startIndex + i] = digits[i];
117
+ }
118
+ setOtp(newOtp);
119
+ setError("");
120
+
121
+ // Focus on the next empty input or the last input if all are filled
122
+ const nextEmptyIndex = Math.min(startIndex + digits.length, 5);
123
+ const nextInput = document.getElementById(`otp-${nextEmptyIndex}`);
124
+ nextInput?.focus();
125
+ }
126
+ };
127
+
128
+ const handleOtpKeyDown = (index, e) => {
129
+ if (e.key === "Backspace" && !otp[index] && index > 0) {
130
+ const prevInput = document.getElementById(`otp-${index - 1}`);
131
+ prevInput?.focus();
132
+ }
133
+ };
134
+
135
+ const handleOtpVerify = async (e) => {
136
+ e.preventDefault();
137
+ setLoading(true);
138
+ setError("");
139
+
140
+ const otpString = otp.join("");
141
+ if (otpString.length !== 6) {
142
+ setError("Please enter a valid 6-digit OTP");
143
+ setLoading(false);
144
+ return;
145
+ }
146
+
147
+ try {
148
+ await verifyOTP(email, otpString);
149
+ // Success - user will be redirected by AuthContext
150
+ } catch (err) {
151
+ setError(err.message || "Invalid OTP. Please try again.");
152
+ setOtp(["", "", "", "", "", ""]);
153
+ } finally {
154
+ setLoading(false);
155
+ }
156
+ };
157
+
158
+ const features = [
159
+ {
160
+ icon: Zap,
161
+ title: "Lightning Fast",
162
+ description: "Process documents in seconds and get outputs for ERP ingestion",
163
+ color: "text-amber-500",
164
+ bg: "bg-amber-50",
165
+ },
166
+ {
167
+ icon: Target,
168
+ title: "100% Accuracy",
169
+ description: "Industry-leading extraction with Visual Reasoning Processor",
170
+ color: "text-emerald-500",
171
+ bg: "bg-emerald-50",
172
+ },
173
+ {
174
+ icon: Globe,
175
+ title: "Any Format, Any Language",
176
+ description: "PDF, images, scanned docs — multi-lingual support included",
177
+ color: "text-blue-500",
178
+ bg: "bg-blue-50",
179
+ },
180
+ ];
181
+
182
+ const supportedFormats = [
183
+ { ext: "PDF", color: "bg-red-500" },
184
+ { ext: "PNG", color: "bg-blue-500" },
185
+ { ext: "JPG", color: "bg-green-500" },
186
+ { ext: "TIFF", color: "bg-purple-500" },
187
+ ];
188
+
189
+ return (
190
+ <div className="min-h-screen bg-gradient-to-br from-slate-50 via-white to-blue-50 flex">
191
+ {/* Left Side - Product Showcase */}
192
+ <div className="hidden lg:flex lg:w-[56%] flex-col justify-between p-8 relative overflow-hidden">
193
+ {/* Background Elements */}
194
+ <div className="absolute top-0 right-0 w-96 h-96 bg-blue-100/40 rounded-full blur-3xl -translate-y-1/2 translate-x-1/2" />
195
+ <div className="absolute bottom-0 left-0 w-80 h-80 bg-emerald-100/40 rounded-full blur-3xl translate-y-1/2 -translate-x-1/2" />
196
+
197
+ {/* Logo & Brand */}
198
+ <motion.div
199
+ initial={{ opacity: 0, y: -20 }}
200
+ animate={{ opacity: 1, y: 0 }}
201
+ className="relative z-10 mb-6"
202
+ >
203
+ <div className="flex items-center gap-3">
204
+ <div className="h-12 w-12 flex items-center justify-center flex-shrink-0">
205
+ <img
206
+ src="/logo.png"
207
+ alt="EZOFIS AI Logo"
208
+ className="h-full w-full object-contain"
209
+ onError={(e) => {
210
+ // Fallback: hide image if logo not found
211
+ e.target.style.display = 'none';
212
+ }}
213
+ />
214
+ </div>
215
+ <div>
216
+ <h1 className="text-2xl font-bold text-slate-900 tracking-tight">EZOFISOCR</h1>
217
+ <p className="text-sm text-slate-500 font-medium">VRP Intelligence</p>
218
+ </div>
219
+ </div>
220
+ </motion.div>
221
+
222
+ {/* Main Content */}
223
+ <motion.div
224
+ initial={{ opacity: 0, y: 20 }}
225
+ animate={{ opacity: 1, y: 0 }}
226
+ transition={{ delay: 0.1 }}
227
+ className="relative z-10 space-y-5 flex-1 flex flex-col justify-center ml-24 xl:ml-36"
228
+ >
229
+ <div className="space-y-3">
230
+ <h2 className="text-3xl xl:text-4xl font-bold text-slate-900 leading-tight">
231
+ Pure Agentic
232
+ <span className="block text-transparent bg-clip-text bg-gradient-to-r from-blue-600 to-indigo-600">
233
+ Document Intelligence
234
+ </span>
235
+ </h2>
236
+ <p className="text-base text-slate-600 max-w-lg leading-relaxed">
237
+ Deterministic, layout-aware extraction (without LLM) using our proprietary{" "}
238
+ <span className="font-semibold text-slate-800">Visual Reasoning Processor (VRP)</span>
239
+ </p>
240
+ </div>
241
+
242
+ {/* Product Preview Card */}
243
+ <motion.div
244
+ initial={{ opacity: 0, scale: 0.95 }}
245
+ animate={{ opacity: 1, scale: 1 }}
246
+ transition={{ delay: 0.3 }}
247
+ className="bg-white rounded-2xl border border-slate-200/80 shadow-xl shadow-slate-200/50 p-4 max-w-lg"
248
+ >
249
+ <div className="border-2 border-dashed border-slate-200 rounded-xl p-5 text-center bg-slate-50/50">
250
+ <div className="w-12 h-12 rounded-full bg-slate-100 flex items-center justify-center mx-auto mb-3">
251
+ <Upload className="w-5 h-5 text-slate-400" />
252
+ </div>
253
+ <p className="text-slate-700 font-medium mb-1 text-sm">Drop a document to extract data</p>
254
+ <p className="text-xs text-slate-400">Invoices, purchase orders, delivery notes, receipts, and operational documents</p>
255
+
256
+ <div className="flex items-center justify-center gap-2 mt-3">
257
+ {supportedFormats.map((format, i) => (
258
+ <span key={i} className={`${format.color} text-white text-xs font-bold px-2 py-1 rounded`}>
259
+ {format.ext}
260
+ </span>
261
+ ))}
262
+ </div>
263
+ </div>
264
+
265
+ <div className="flex items-center justify-between mt-3 pt-3 border-t border-slate-100">
266
+ <div className="flex items-center gap-2">
267
+ <div className="w-2 h-2 rounded-full bg-emerald-500 animate-pulse" />
268
+ <span className="text-xs text-slate-600">Ready to extract</span>
269
+ </div>
270
+ <div className="flex items-center gap-1 text-emerald-600">
271
+ <CheckCircle2 className="w-3.5 h-3.5" />
272
+ <span className="text-xs font-semibold">99.8% Accuracy</span>
273
+ </div>
274
+ </div>
275
+ </motion.div>
276
+
277
+ {/* Features */}
278
+ <div className="grid gap-3">
279
+ {features.map((feature, index) => (
280
+ <motion.div
281
+ key={feature.title}
282
+ initial={{ opacity: 0, x: -20 }}
283
+ animate={{ opacity: 1, x: 0 }}
284
+ transition={{ delay: 0.4 + index * 0.1 }}
285
+ className="flex items-start gap-3 group"
286
+ >
287
+ <div
288
+ className={`w-9 h-9 rounded-xl ${feature.bg} flex items-center justify-center flex-shrink-0 group-hover:scale-110 transition-transform`}
289
+ >
290
+ <feature.icon className={`w-4 h-4 ${feature.color}`} />
291
+ </div>
292
+ <div>
293
+ <h3 className="font-semibold text-slate-900 text-sm">{feature.title}</h3>
294
+ <p className="text-xs text-slate-500">{feature.description}</p>
295
+ </div>
296
+ </motion.div>
297
+ ))}
298
+ </div>
299
+ </motion.div>
300
+
301
+ {/* Trust Badge */}
302
+ <motion.div
303
+ initial={{ opacity: 0 }}
304
+ animate={{ opacity: 1 }}
305
+ transition={{ delay: 0.6 }}
306
+ className="relative z-10 flex items-center gap-3 text-xs text-slate-500 mt-6"
307
+ >
308
+ <Shield className="w-4 h-4" />
309
+ <span>Enterprise-grade security • SOC 2 Compliant • GDPR Ready</span>
310
+ </motion.div>
311
+ </div>
312
+
313
+ {/* Right Side - Sign In Form */}
314
+ <div className="w-full lg:w-[44%] flex items-center justify-center p-6 sm:p-10">
315
+ <motion.div
316
+ initial={{ opacity: 0, y: 20 }}
317
+ animate={{ opacity: 1, y: 0 }}
318
+ transition={{ delay: 0.2 }}
319
+ className="w-full max-w-md"
320
+ >
321
+ {/* Mobile Logo */}
322
+ <div className="lg:hidden flex items-center justify-center gap-3 mb-8">
323
+ <div className="h-12 w-12 flex items-center justify-center flex-shrink-0">
324
+ <img
325
+ src="/logo.png"
326
+ alt="EZOFIS AI Logo"
327
+ className="h-full w-full object-contain"
328
+ onError={(e) => {
329
+ // Fallback: hide image if logo not found
330
+ e.target.style.display = 'none';
331
+ }}
332
+ />
333
+ </div>
334
+ <div>
335
+ <h1 className="text-2xl font-bold text-slate-900 tracking-tight">EZOFISOCR</h1>
336
+ <p className="text-sm text-slate-500 font-medium">VRP Intelligence</p>
337
+ </div>
338
+ </div>
339
+
340
+ <div className="bg-white rounded-3xl border border-slate-200/80 shadow-2xl shadow-slate-200/50 p-8 sm:p-10">
341
+ <div className="text-center mb-8">
342
+ <h2 className="text-2xl font-bold text-slate-900 mb-2">
343
+ {showOtp ? "Enter verification code" : "Secure Access"}
344
+ </h2>
345
+ <p className="text-slate-500">
346
+ {showOtp ? `We sent a code to ${email}` : "Access your document intelligence workspace"}
347
+ </p>
348
+ </div>
349
+
350
+ {/* Error Message */}
351
+ {error && (
352
+ <motion.div
353
+ initial={{ opacity: 0, y: -10 }}
354
+ animate={{ opacity: 1, y: 0 }}
355
+ className="mb-6 p-3 bg-red-50 border border-red-200 rounded-xl flex items-start gap-2 text-sm text-red-700"
356
+ >
357
+ <AlertCircle className="h-4 w-4 flex-shrink-0 mt-0.5" />
358
+ <p>{error}</p>
359
+ </motion.div>
360
+ )}
361
+
362
+ {!showOtp ? (
363
+ <>
364
+ {/* Google Sign In */}
365
+ <Button
366
+ onClick={handleGoogleLogin}
367
+ disabled={loading}
368
+ variant="outline"
369
+ className="w-full h-12 text-base font-medium border-slate-200 hover:bg-slate-50 hover:border-slate-300 transition-all group"
370
+ >
371
+ {loading ? (
372
+ <Loader2 className="w-5 h-5 mr-3 animate-spin" />
373
+ ) : (
374
+ <svg className="w-5 h-5 mr-3" viewBox="0 0 24 24">
375
+ <path fill="#4285F4" d="M22.56 12.25c0-.78-.07-1.53-.2-2.25H12v4.26h5.92c-.26 1.37-1.04 2.53-2.21 3.31v2.77h3.57c2.08-1.92 3.28-4.74 3.28-8.09z" />
376
+ <path fill="#34A853" d="M12 23c2.97 0 5.46-.98 7.28-2.66l-3.57-2.77c-.98.66-2.23 1.06-3.71 1.06-2.86 0-5.29-1.93-6.16-4.53H2.18v2.84C3.99 20.53 7.7 23 12 23z" />
377
+ <path fill="#FBBC05" d="M5.84 14.09c-.22-.66-.35-1.36-.35-2.09s.13-1.43.35-2.09V7.07H2.18C1.43 8.55 1 10.22 1 12s.43 3.45 1.18 4.93l2.85-2.22.81-.62z" />
378
+ <path fill="#EA4335" d="M12 5.38c1.62 0 3.06.56 4.21 1.64l3.15-3.15C17.45 2.09 14.97 1 12 1 7.7 1 3.99 3.47 2.18 7.07l3.66 2.84c.87-2.6 3.3-4.53 6.16-4.53z" />
379
+ </svg>
380
+ )}
381
+ Continue with Google
382
+ <ArrowRight className="w-4 h-4 ml-auto opacity-0 -translate-x-2 group-hover:opacity-100 group-hover:translate-x-0 transition-all" />
383
+ </Button>
384
+
385
+ <div className="relative my-8">
386
+ <Separator />
387
+ <span className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2 bg-white px-4 text-sm text-slate-400">
388
+ or continue with email
389
+ </span>
390
+ </div>
391
+
392
+ {/* Email Input */}
393
+ <form onSubmit={handleEmailSubmit} className="space-y-4">
394
+ <div className="relative">
395
+ <Mail className="absolute left-4 top-1/2 -translate-y-1/2 w-5 h-5 text-slate-400" />
396
+ <Input
397
+ type="email"
398
+ placeholder="name@company.com"
399
+ value={email}
400
+ onChange={(e) => {
401
+ setEmail(e.target.value);
402
+ setError("");
403
+ }}
404
+ className="h-12 pl-12 text-base border-slate-200 focus:border-blue-500 focus:ring-blue-500"
405
+ />
406
+ </div>
407
+ <Button
408
+ type="submit"
409
+ disabled={loading}
410
+ className="w-full h-12 text-base font-medium bg-gradient-to-r from-blue-600 to-indigo-600 hover:from-blue-700 hover:to-indigo-700 shadow-lg shadow-blue-500/25 transition-all"
411
+ >
412
+ {loading ? (
413
+ <>
414
+ <Loader2 className="w-4 h-4 mr-2 animate-spin" />
415
+ Sending...
416
+ </>
417
+ ) : (
418
+ <>
419
+ Continue with Email
420
+ <ArrowRight className="w-4 h-4 ml-2" />
421
+ </>
422
+ )}
423
+ </Button>
424
+ </form>
425
+ </>
426
+ ) : (
427
+ /* OTP Input */
428
+ <form onSubmit={handleOtpVerify} className="space-y-6">
429
+ <div className="flex justify-center gap-2">
430
+ {otp.map((digit, index) => (
431
+ <Input
432
+ key={index}
433
+ id={`otp-${index}`}
434
+ type="text"
435
+ inputMode="numeric"
436
+ maxLength={1}
437
+ value={digit}
438
+ onChange={(e) => handleOtpChange(index, e.target.value)}
439
+ onKeyDown={(e) => handleOtpKeyDown(index, e)}
440
+ onPaste={(e) => handleOtpPaste(e, index)}
441
+ className="w-12 h-14 text-center text-xl font-semibold border-slate-200 focus:border-blue-500 focus:ring-blue-500"
442
+ />
443
+ ))}
444
+ </div>
445
+
446
+ <Button
447
+ type="submit"
448
+ disabled={loading || otp.join("").length !== 6}
449
+ className="w-full h-12 text-base font-medium bg-gradient-to-r from-blue-600 to-indigo-600 hover:from-blue-700 hover:to-indigo-700 shadow-lg shadow-blue-500/25"
450
+ >
451
+ {loading ? (
452
+ <>
453
+ <Loader2 className="w-4 h-4 mr-2 animate-spin" />
454
+ Verifying...
455
+ </>
456
+ ) : (
457
+ <>
458
+ Verify & Sign In
459
+ <ArrowRight className="w-4 h-4 ml-2" />
460
+ </>
461
+ )}
462
+ </Button>
463
+
464
+ <button
465
+ type="button"
466
+ onClick={() => {
467
+ setShowOtp(false);
468
+ setOtp(["", "", "", "", "", ""]);
469
+ setError("");
470
+ }}
471
+ className="w-full text-sm text-slate-500 hover:text-slate-700 transition-colors"
472
+ >
473
+ ← Back to sign in options
474
+ </button>
475
+ </form>
476
+ )}
477
+
478
+ {/* Notice */}
479
+ <div className="mt-8 pt-6 border-t border-slate-100">
480
+ <div className="flex items-start gap-2 text-xs text-slate-400 mb-4">
481
+ <Shield className="w-4 h-4 flex-shrink-0 mt-0.5" />
482
+ <span>Only business email addresses are allowed</span>
483
+ </div>
484
+ <p className="text-xs text-slate-400 text-center leading-relaxed">
485
+ By signing in, you agree to our{" "}
486
+ <a href="#" className="text-blue-600 hover:underline">
487
+ Terms of Service
488
+ </a>{" "}
489
+ and{" "}
490
+ <a href="#" className="text-blue-600 hover:underline">
491
+ Privacy Policy
492
+ </a>
493
+ </p>
494
+ </div>
495
+ </div>
496
+
497
+ {/* Mobile Features */}
498
+ <div className="lg:hidden mt-8 space-y-4">
499
+ {features.map((feature) => (
500
+ <div key={feature.title} className="flex items-center gap-3 text-sm">
501
+ <div className={`w-8 h-8 rounded-lg ${feature.bg} flex items-center justify-center`}>
502
+ <feature.icon className={`w-4 h-4 ${feature.color}`} />
503
+ </div>
504
+ <span className="text-slate-600">{feature.title}</span>
505
+ </div>
506
+ ))}
507
+ </div>
508
+ </motion.div>
509
+ </div>
510
+ </div>
511
+ );
512
+ }
frontend/src/components/ocr/DocumentPreview.jsx CHANGED
@@ -1,236 +1,229 @@
1
- import React, { useState, useEffect, useRef } from "react";
2
- import { motion } from "framer-motion";
3
- import { FileText, ZoomIn, ZoomOut, RotateCw, Maximize2 } from "lucide-react";
4
- import { Button } from "@/components/ui/button";
5
-
6
- export default function DocumentPreview({ file, isProcessing }) {
7
- const [previewUrls, setPreviewUrls] = useState([]);
8
- const [zoom, setZoom] = useState(100);
9
- const [rotation, setRotation] = useState(0);
10
- const objectUrlsRef = useRef([]);
11
-
12
- useEffect(() => {
13
- if (!file) {
14
- // Cleanup previous URLs
15
- objectUrlsRef.current.forEach((url) => {
16
- if (url && url.startsWith("blob:")) {
17
- URL.revokeObjectURL(url);
18
- }
19
- });
20
- objectUrlsRef.current = [];
21
- setPreviewUrls([]);
22
- return;
23
- }
24
-
25
- const loadPreview = async () => {
26
- const urls = [];
27
- const newObjectUrls = [];
28
-
29
- // Check if it's a PDF
30
- if (file.type === "application/pdf" || file.name?.toLowerCase().endsWith(".pdf")) {
31
- try {
32
- // Use pdf.js to render PDF pages
33
- const pdfjsLib = await import("pdfjs-dist");
34
-
35
- // Configure worker - use jsdelivr CDN which is more reliable
36
- // This will use the same version as the installed package
37
- const version = pdfjsLib.version || "4.0.379";
38
- pdfjsLib.GlobalWorkerOptions.workerSrc = `https://cdn.jsdelivr.net/npm/pdfjs-dist@${version}/build/pdf.worker.min.mjs`;
39
-
40
- const arrayBuffer = await file.arrayBuffer();
41
- const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
42
- const numPages = pdf.numPages;
43
-
44
- for (let pageNum = 1; pageNum <= numPages; pageNum++) {
45
- const page = await pdf.getPage(pageNum);
46
- const viewport = page.getViewport({ scale: 2.0 });
47
-
48
- const canvas = document.createElement("canvas");
49
- const context = canvas.getContext("2d");
50
- canvas.height = viewport.height;
51
- canvas.width = viewport.width;
52
-
53
- await page.render({
54
- canvasContext: context,
55
- viewport: viewport,
56
- }).promise;
57
-
58
- urls.push(canvas.toDataURL("image/jpeg", 0.95));
59
- }
60
- } catch (error) {
61
- console.error("Error loading PDF:", error);
62
- // Fallback: show error message
63
- urls.push(null);
64
- }
65
- } else {
66
- // For images, create object URL
67
- const url = URL.createObjectURL(file);
68
- urls.push(url);
69
- newObjectUrls.push(url);
70
- }
71
-
72
- // Cleanup old object URLs
73
- objectUrlsRef.current.forEach((url) => {
74
- if (url && url.startsWith("blob:")) {
75
- URL.revokeObjectURL(url);
76
- }
77
- });
78
- objectUrlsRef.current = newObjectUrls;
79
- setPreviewUrls(urls);
80
- };
81
-
82
- loadPreview();
83
-
84
- // Cleanup function - revoke object URLs when component unmounts or file changes
85
- return () => {
86
- objectUrlsRef.current.forEach((url) => {
87
- if (url && url.startsWith("blob:")) {
88
- URL.revokeObjectURL(url);
89
- }
90
- });
91
- objectUrlsRef.current = [];
92
- };
93
- }, [file]);
94
-
95
- return (
96
- <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden">
97
- {/* Header */}
98
- <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100">
99
- <div className="flex items-center gap-3">
100
- <div className="h-8 w-8 rounded-lg bg-indigo-50 flex items-center justify-center">
101
- <FileText className="h-4 w-4 text-indigo-600" />
102
- </div>
103
- <div>
104
- <h3 className="font-semibold text-slate-800 text-sm">Document Preview</h3>
105
- <p className="text-xs text-slate-400">{file?.name || "No file selected"}</p>
106
- </div>
107
- </div>
108
-
109
- {file && (
110
- <div className="flex items-center gap-1">
111
- <Button
112
- variant="ghost"
113
- size="icon"
114
- className="h-8 w-8 text-slate-400 hover:text-slate-600"
115
- onClick={() => setZoom(Math.max(50, zoom - 25))}
116
- >
117
- <ZoomOut className="h-4 w-4" />
118
- </Button>
119
- <span className="text-xs text-slate-500 w-12 text-center">{zoom}%</span>
120
- <Button
121
- variant="ghost"
122
- size="icon"
123
- className="h-8 w-8 text-slate-400 hover:text-slate-600"
124
- onClick={() => setZoom(Math.min(200, zoom + 25))}
125
- >
126
- <ZoomIn className="h-4 w-4" />
127
- </Button>
128
- <div className="w-px h-4 bg-slate-200 mx-2" />
129
- <Button
130
- variant="ghost"
131
- size="icon"
132
- className="h-8 w-8 text-slate-400 hover:text-slate-600"
133
- onClick={() => setRotation((rotation + 90) % 360)}
134
- >
135
- <RotateCw className="h-4 w-4" />
136
- </Button>
137
- <Button
138
- variant="ghost"
139
- size="icon"
140
- className="h-8 w-8 text-slate-400 hover:text-slate-600"
141
- onClick={() => {
142
- setZoom(100);
143
- setRotation(0);
144
- }}
145
- >
146
- <Maximize2 className="h-4 w-4" />
147
- </Button>
148
- </div>
149
- )}
150
- </div>
151
-
152
- {/* Preview Area */}
153
- <div className="flex-1 p-6 bg-slate-50/50 overflow-auto">
154
- {!file ? (
155
- <div className="h-full flex items-center justify-center">
156
- <div className="text-center">
157
- <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
158
- <FileText className="h-10 w-10 text-slate-300" />
159
- </div>
160
- <p className="text-slate-400 text-sm">Upload a document to preview</p>
161
- </div>
162
- </div>
163
- ) : previewUrls.length === 0 ? (
164
- <div className="h-full flex items-center justify-center">
165
- <div className="text-center">
166
- <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
167
- <FileText className="h-10 w-10 text-slate-300" />
168
- </div>
169
- <p className="text-slate-400 text-sm">Loading preview...</p>
170
- </div>
171
- </div>
172
- ) : (
173
- <div className="space-y-4">
174
- {previewUrls.map((url, index) => (
175
- <motion.div
176
- key={index}
177
- initial={{ opacity: 0, y: 20 }}
178
- animate={{ opacity: 1, y: 0 }}
179
- transition={{ delay: index * 0.1 }}
180
- className="relative bg-white rounded-xl shadow-sm border border-slate-200 overflow-hidden flex items-center justify-center"
181
- style={{
182
- minHeight: "400px",
183
- }}
184
- >
185
- {url ? (
186
- <img
187
- src={url}
188
- alt={`Page ${index + 1}`}
189
- className="w-full h-auto"
190
- style={{
191
- transform: `scale(${zoom / 100}) rotate(${rotation}deg)`,
192
- maxWidth: "100%",
193
- objectFit: "contain",
194
- transition: "transform 0.2s ease",
195
- }}
196
- />
197
- ) : (
198
- <div className="p-8 text-center">
199
- <p className="text-slate-400 text-sm">Unable to load preview</p>
200
- </div>
201
- )}
202
-
203
- {/* Processing overlay */}
204
- {isProcessing && (
205
- <motion.div
206
- initial={{ opacity: 0 }}
207
- animate={{ opacity: 1 }}
208
- className="absolute inset-0 bg-indigo-600/5 backdrop-blur-[1px] pointer-events-none"
209
- >
210
- <motion.div
211
- initial={{ top: 0 }}
212
- animate={{ top: "100%" }}
213
- transition={{
214
- duration: 2,
215
- repeat: Infinity,
216
- ease: "linear",
217
- }}
218
- className="absolute left-0 right-0 h-1 bg-gradient-to-r from-transparent via-indigo-500 to-transparent"
219
- />
220
- </motion.div>
221
- )}
222
-
223
- {/* Page number */}
224
- {previewUrls.length > 1 && (
225
- <div className="absolute bottom-3 right-3 text-xs text-slate-400 bg-white/90 px-2 py-1 rounded">
226
- Page {index + 1}
227
- </div>
228
- )}
229
- </motion.div>
230
- ))}
231
- </div>
232
- )}
233
- </div>
234
- </div>
235
- );
236
- }
 
1
+ import React, { useState, useEffect, useRef } from "react";
2
+ import { motion } from "framer-motion";
3
+ import { FileText, ZoomIn, ZoomOut, RotateCw } from "lucide-react";
4
+ import { Button } from "@/components/ui/button";
5
+
6
+ export default function DocumentPreview({ file, isProcessing, isFromHistory = false }) {
7
+ const [previewUrls, setPreviewUrls] = useState([]);
8
+ const [zoom, setZoom] = useState(100);
9
+ const [rotation, setRotation] = useState(0);
10
+ const objectUrlsRef = useRef([]);
11
+
12
+ useEffect(() => {
13
+ if (!file) {
14
+ // Cleanup previous URLs
15
+ objectUrlsRef.current.forEach((url) => {
16
+ if (url && url.startsWith("blob:")) {
17
+ URL.revokeObjectURL(url);
18
+ }
19
+ });
20
+ objectUrlsRef.current = [];
21
+ setPreviewUrls([]);
22
+ return;
23
+ }
24
+
25
+ const loadPreview = async () => {
26
+ const urls = [];
27
+ const newObjectUrls = [];
28
+
29
+ // Check if it's a PDF
30
+ if (file.type === "application/pdf" || file.name?.toLowerCase().endsWith(".pdf")) {
31
+ try {
32
+ // Use pdf.js to render PDF pages
33
+ const pdfjsLib = await import("pdfjs-dist");
34
+
35
+ // Configure worker - use jsdelivr CDN which is more reliable
36
+ // This will use the same version as the installed package
37
+ const version = pdfjsLib.version || "4.0.379";
38
+ pdfjsLib.GlobalWorkerOptions.workerSrc = `https://cdn.jsdelivr.net/npm/pdfjs-dist@${version}/build/pdf.worker.min.mjs`;
39
+
40
+ const arrayBuffer = await file.arrayBuffer();
41
+ const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
42
+ const numPages = pdf.numPages;
43
+
44
+ for (let pageNum = 1; pageNum <= numPages; pageNum++) {
45
+ const page = await pdf.getPage(pageNum);
46
+ const viewport = page.getViewport({ scale: 2.0 });
47
+
48
+ const canvas = document.createElement("canvas");
49
+ const context = canvas.getContext("2d");
50
+ canvas.height = viewport.height;
51
+ canvas.width = viewport.width;
52
+
53
+ await page.render({
54
+ canvasContext: context,
55
+ viewport: viewport,
56
+ }).promise;
57
+
58
+ urls.push(canvas.toDataURL("image/jpeg", 0.95));
59
+ }
60
+ } catch (error) {
61
+ console.error("Error loading PDF:", error);
62
+ // Fallback: show error message
63
+ urls.push(null);
64
+ }
65
+ } else {
66
+ // For images, create object URL
67
+ const url = URL.createObjectURL(file);
68
+ urls.push(url);
69
+ newObjectUrls.push(url);
70
+ }
71
+
72
+ // Cleanup old object URLs
73
+ objectUrlsRef.current.forEach((url) => {
74
+ if (url && url.startsWith("blob:")) {
75
+ URL.revokeObjectURL(url);
76
+ }
77
+ });
78
+ objectUrlsRef.current = newObjectUrls;
79
+ setPreviewUrls(urls);
80
+ };
81
+
82
+ loadPreview();
83
+
84
+ // Cleanup function - revoke object URLs when component unmounts or file changes
85
+ return () => {
86
+ objectUrlsRef.current.forEach((url) => {
87
+ if (url && url.startsWith("blob:")) {
88
+ URL.revokeObjectURL(url);
89
+ }
90
+ });
91
+ objectUrlsRef.current = [];
92
+ };
93
+ }, [file]);
94
+
95
+ return (
96
+ <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden">
97
+ {/* Header */}
98
+ <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100">
99
+ <div className="flex items-center gap-3">
100
+ <div className="h-8 w-8 rounded-lg bg-indigo-50 flex items-center justify-center">
101
+ <FileText className="h-4 w-4 text-indigo-600" />
102
+ </div>
103
+ <div>
104
+ <h3 className="font-semibold text-slate-800 text-sm">Document Preview</h3>
105
+ <p className="text-xs text-slate-400">{file?.name || "No file selected"}</p>
106
+ </div>
107
+ </div>
108
+
109
+ {file && (
110
+ <div className="flex items-center gap-1">
111
+ <Button
112
+ variant="ghost"
113
+ size="icon"
114
+ className="h-8 w-8 text-slate-400 hover:text-slate-600"
115
+ onClick={() => setZoom(Math.max(50, zoom - 25))}
116
+ >
117
+ <ZoomOut className="h-4 w-4" />
118
+ </Button>
119
+ <span className="text-xs text-slate-500 w-12 text-center">{zoom}%</span>
120
+ <Button
121
+ variant="ghost"
122
+ size="icon"
123
+ className="h-8 w-8 text-slate-400 hover:text-slate-600"
124
+ onClick={() => setZoom(Math.min(200, zoom + 25))}
125
+ >
126
+ <ZoomIn className="h-4 w-4" />
127
+ </Button>
128
+ <div className="w-px h-4 bg-slate-200 mx-2" />
129
+ <Button
130
+ variant="ghost"
131
+ size="icon"
132
+ className="h-8 w-8 text-slate-400 hover:text-slate-600"
133
+ onClick={() => setRotation((rotation + 90) % 360)}
134
+ >
135
+ <RotateCw className="h-4 w-4" />
136
+ </Button>
137
+ </div>
138
+ )}
139
+ </div>
140
+
141
+ {/* Preview Area */}
142
+ <div className="flex-1 p-6 bg-slate-50/50 overflow-auto">
143
+ {!file ? (
144
+ <div className="h-full flex items-center justify-center">
145
+ <div className="text-center">
146
+ <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
147
+ <FileText className="h-10 w-10 text-slate-300" />
148
+ </div>
149
+ <p className="text-slate-400 text-sm">Upload a document to preview</p>
150
+ </div>
151
+ </div>
152
+ ) : previewUrls.length === 0 ? (
153
+ <div className="h-full flex items-center justify-center">
154
+ <div className="text-center">
155
+ <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
156
+ <FileText className="h-10 w-10 text-slate-300" />
157
+ </div>
158
+ <p className="text-slate-400 text-sm">Loading preview...</p>
159
+ </div>
160
+ </div>
161
+ ) : (
162
+ <div className="space-y-4">
163
+ {previewUrls.map((url, index) => (
164
+ <motion.div
165
+ key={index}
166
+ initial={{ opacity: 0, y: 20 }}
167
+ animate={{ opacity: 1, y: 0 }}
168
+ transition={{ delay: index * 0.1 }}
169
+ className="relative bg-white rounded-xl shadow-sm border border-slate-200 overflow-hidden flex items-center justify-center"
170
+ style={{
171
+ minHeight: "400px",
172
+ }}
173
+ >
174
+ {url ? (
175
+ <img
176
+ src={url}
177
+ alt={`Page ${index + 1}`}
178
+ className="w-full h-auto"
179
+ style={{
180
+ transform: `scale(${zoom / 100}) rotate(${rotation}deg)`,
181
+ maxWidth: "100%",
182
+ objectFit: "contain",
183
+ transition: "transform 0.2s ease",
184
+ }}
185
+ />
186
+ ) : (
187
+ <div className="p-8 text-center">
188
+ <p className="text-slate-400 text-sm">
189
+ {isFromHistory
190
+ ? "Original document not available for historical extractions"
191
+ : "Unable to load preview"}
192
+ </p>
193
+ </div>
194
+ )}
195
+
196
+ {/* Processing overlay */}
197
+ {isProcessing && (
198
+ <motion.div
199
+ initial={{ opacity: 0 }}
200
+ animate={{ opacity: 1 }}
201
+ className="absolute inset-0 bg-indigo-600/5 backdrop-blur-[1px] pointer-events-none"
202
+ >
203
+ <motion.div
204
+ initial={{ top: 0 }}
205
+ animate={{ top: "100%" }}
206
+ transition={{
207
+ duration: 2,
208
+ repeat: Infinity,
209
+ ease: "linear",
210
+ }}
211
+ className="absolute left-0 right-0 h-1 bg-gradient-to-r from-transparent via-indigo-500 to-transparent"
212
+ />
213
+ </motion.div>
214
+ )}
215
+
216
+ {/* Page number */}
217
+ {previewUrls.length > 1 && (
218
+ <div className="absolute bottom-3 right-3 text-xs text-slate-400 bg-white/90 px-2 py-1 rounded">
219
+ Page {index + 1}
220
+ </div>
221
+ )}
222
+ </motion.div>
223
+ ))}
224
+ </div>
225
+ )}
226
+ </div>
227
+ </div>
228
+ );
229
+ }
 
 
 
 
 
 
 
frontend/src/components/ocr/ExtractionOutput.jsx CHANGED
@@ -1,639 +1,1201 @@
1
- import React, { useState, useEffect } from "react";
2
- import { motion, AnimatePresence } from "framer-motion";
3
- import {
4
- Code2,
5
- Copy,
6
- Check,
7
- Braces,
8
- FileCode2,
9
- FileText,
10
- Sparkles,
11
- ChevronDown,
12
- } from "lucide-react";
13
- import { Button } from "@/components/ui/button";
14
- import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
15
- import { cn } from "@/lib/utils";
16
-
17
- // Mock extracted data
18
- const mockData = {
19
- document: {
20
- type: "Invoice",
21
- confidence: 0.98,
22
- },
23
- vendor: {
24
- name: "Acme Corporation",
25
- address: "123 Business Ave, Suite 400",
26
- city: "San Francisco",
27
- state: "CA",
28
- zip: "94102",
29
- phone: "+1 (555) 123-4567",
30
- },
31
- invoice: {
32
- number: "INV-2024-0847",
33
- date: "2024-01-15",
34
- due_date: "2024-02-14",
35
- po_number: "PO-9823",
36
- },
37
- items: [
38
- { description: "Professional Services", quantity: 40, unit_price: 150.0, total: 6000.0 },
39
- { description: "Software License", quantity: 5, unit_price: 299.99, total: 1499.95 },
40
- { description: "Support Package", quantity: 1, unit_price: 500.0, total: 500.0 },
41
- ],
42
- totals: {
43
- subtotal: 7999.95,
44
- tax_rate: 0.0875,
45
- tax_amount: 699.99,
46
- total: 8699.94,
47
- },
48
- };
49
-
50
- const mockXML = `<?xml version="1.0" encoding="UTF-8"?>
51
- <extraction>
52
- <document type="Invoice" confidence="0.98"/>
53
- <vendor>
54
- <name>Acme Corporation</name>
55
- <address>123 Business Ave, Suite 400</address>
56
- <city>San Francisco</city>
57
- <state>CA</state>
58
- <zip>94102</zip>
59
- </vendor>
60
- <invoice>
61
- <number>INV-2024-0847</number>
62
- <date>2024-01-15</date>
63
- <due_date>2024-02-14</due_date>
64
- </invoice>
65
- <items>
66
- <item>
67
- <description>Professional Services</description>
68
- <quantity>40</quantity>
69
- <total>6000.00</total>
70
- </item>
71
- </items>
72
- <totals>
73
- <subtotal>7999.95</subtotal>
74
- <tax>699.99</tax>
75
- <total>8699.94</total>
76
- </totals>
77
- </extraction>`;
78
-
79
- const mockText = `INVOICE
80
-
81
- ACME CORPORATION
82
- 123 Business Ave, Suite 400
83
- San Francisco, CA 94102
84
- Phone: +1 (555) 123-4567
85
-
86
- Invoice Number: INV-2024-0847
87
- Invoice Date: January 15, 2024
88
- Due Date: February 14, 2024
89
- PO Number: PO-9823
90
-
91
- BILL TO:
92
- Customer Name
93
- 456 Client Street
94
- New York, NY 10001
95
-
96
- ITEMS:
97
- ─────────────────────────────────────────────────────────
98
- Description Qty Unit Price Total
99
- ─────────────────────────────────────────────────────────
100
- Professional Services 40 $150.00 $6,000.00
101
- Software License 5 $299.99 $1,499.95
102
- Support Package 1 $500.00 $500.00
103
- ─────────────────────────────────────────────────────────
104
-
105
- Subtotal: $7,999.95
106
- Tax (8.75%): $699.99
107
- ─────────────────────────
108
- TOTAL: $8,699.94
109
-
110
- Payment Terms: Net 30
111
- Thank you for your business!`;
112
-
113
- // Helper function to convert object to XML
114
- // Prepare fields for JSON/XML output - remove duplicates and restructure
115
- function prepareFieldsForOutput(fields, format = "json") {
116
- if (!fields || typeof fields !== "object") {
117
- return fields;
118
- }
119
-
120
- const output = { ...fields };
121
-
122
- // Remove full_text from top-level if pages array exists (to avoid duplication)
123
- if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
124
- delete output.full_text;
125
-
126
- // Clean up each page: remove full_text from page.fields (it duplicates page.text)
127
- output.pages = output.pages.map(page => {
128
- const cleanedPage = { ...page };
129
- if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
130
- const cleanedFields = { ...cleanedPage.fields };
131
- // Remove full_text from page fields (duplicates page.text)
132
- delete cleanedFields.full_text;
133
- cleanedPage.fields = cleanedFields;
134
- }
135
- return cleanedPage;
136
- });
137
- }
138
-
139
- // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
140
- if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
141
- // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
142
- const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text"));
143
-
144
- output.pages.forEach((page, idx) => {
145
- const pageNum = page.page_number || idx + 1;
146
- const pageFields = page.fields || {};
147
-
148
- // Remove duplicate fields from page.fields:
149
- // 1. Remove full_text (duplicates page.text)
150
- // 2. Remove fields that match top-level fields (already shown at root)
151
- const cleanedPageFields = {};
152
- for (const [key, value] of Object.entries(pageFields)) {
153
- // Skip full_text and fields that match top-level exactly
154
- if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
155
- cleanedPageFields[key] = value;
156
- }
157
- }
158
-
159
- const pageObj = {
160
- text: page.text || "",
161
- confidence: page.confidence || 0,
162
- doc_type: page.doc_type || "other"
163
- };
164
-
165
- // Only add fields if there are unique page-specific fields
166
- if (Object.keys(cleanedPageFields).length > 0) {
167
- pageObj.fields = cleanedPageFields;
168
- }
169
-
170
- output[`page_${pageNum}`] = pageObj;
171
- });
172
- // Remove pages array - we now have page_1, page_2, etc. as separate fields
173
- delete output.pages;
174
- }
175
-
176
- return output;
177
- }
178
-
179
- function objectToXML(obj, rootName = "extraction") {
180
- // Prepare fields - remove full_text if pages exist
181
- const preparedObj = prepareFieldsForOutput(obj, "xml");
182
-
183
- let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
184
-
185
- const convert = (obj, indent = " ") => {
186
- for (const [key, value] of Object.entries(obj)) {
187
- if (value === null || value === undefined) continue;
188
-
189
- // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
190
- if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
191
- continue;
192
- }
193
-
194
- if (Array.isArray(value)) {
195
- value.forEach((item) => {
196
- xml += `${indent}<${key}>\n`;
197
- if (typeof item === "object") {
198
- convert(item, indent + " ");
199
- } else {
200
- xml += `${indent} ${escapeXML(String(item))}\n`;
201
- }
202
- xml += `${indent}</${key}>\n`;
203
- });
204
- } else if (typeof value === "object") {
205
- xml += `${indent}<${key}>\n`;
206
- convert(value, indent + " ");
207
- xml += `${indent}</${key}>\n`;
208
- } else {
209
- xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
210
- }
211
- }
212
- };
213
-
214
- convert(preparedObj);
215
- xml += `</${rootName}>`;
216
- return xml;
217
- }
218
-
219
- function escapeXML(str) {
220
- return str
221
- .replace(/&/g, "&amp;")
222
- .replace(/</g, "&lt;")
223
- .replace(/>/g, "&gt;")
224
- .replace(/"/g, "&quot;")
225
- .replace(/'/g, "&apos;");
226
- }
227
-
228
- // Helper function to format fields as readable text
229
- function fieldsToText(fields) {
230
- if (!fields || typeof fields !== "object") {
231
- return "No data extracted.";
232
- }
233
-
234
- // If full_text exists, show it prominently first
235
- if (fields.full_text) {
236
- let text = "=== FULL EXTRACTED TEXT ===\n\n";
237
- text += fields.full_text;
238
-
239
- // Don't show pages array separately if full_text already contains page markers
240
- // (full_text from backend already includes "=== PAGE 1 ===" etc.)
241
- const hasPageMarkers = fields.full_text.includes("=== PAGE") || fields.full_text.includes("--- Page");
242
-
243
- // Only show pages array if full_text doesn't already have page breakdown
244
- if (!hasPageMarkers && fields.pages && Array.isArray(fields.pages)) {
245
- text += "\n\n=== TEXT BY PAGE ===\n\n";
246
- fields.pages.forEach((page, idx) => {
247
- text += `--- Page ${page.page_number || idx + 1} ---\n`;
248
- text += page.text || "";
249
- text += "\n\n";
250
- });
251
- }
252
-
253
- // Then show other structured fields
254
- const otherFields = { ...fields };
255
- delete otherFields.full_text;
256
- delete otherFields.pages;
257
-
258
- if (Object.keys(otherFields).length > 0) {
259
- text += "\n\n=== STRUCTURED FIELDS ===\n\n";
260
- const formatValue = (key, value, indent = "") => {
261
- if (Array.isArray(value)) {
262
- text += `${indent}${key}:\n`;
263
- value.forEach((item, idx) => {
264
- if (typeof item === "object") {
265
- text += `${indent} Item ${idx + 1}:\n`;
266
- Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " "));
267
- } else {
268
- text += `${indent} - ${item}\n`;
269
- }
270
- });
271
- } else if (typeof value === "object" && value !== null) {
272
- text += `${indent}${key}:\n`;
273
- Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " "));
274
- } else {
275
- text += `${indent}${key}: ${value}\n`;
276
- }
277
- };
278
-
279
- Object.entries(otherFields).forEach(([key, value]) => {
280
- formatValue(key, value);
281
- text += "\n";
282
- });
283
- }
284
-
285
- return text.trim();
286
- }
287
-
288
- // Fallback: format all fields normally
289
- let text = "";
290
- const formatValue = (key, value, indent = "") => {
291
- if (Array.isArray(value)) {
292
- text += `${indent}${key}:\n`;
293
- value.forEach((item, idx) => {
294
- if (typeof item === "object") {
295
- text += `${indent} Item ${idx + 1}:\n`;
296
- Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " "));
297
- } else {
298
- text += `${indent} - ${item}\n`;
299
- }
300
- });
301
- } else if (typeof value === "object" && value !== null) {
302
- text += `${indent}${key}:\n`;
303
- Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " "));
304
- } else {
305
- text += `${indent}${key}: ${value}\n`;
306
- }
307
- };
308
-
309
- Object.entries(fields).forEach(([key, value]) => {
310
- formatValue(key, value);
311
- text += "\n";
312
- });
313
-
314
- return text.trim() || "No data extracted.";
315
- }
316
-
317
- export default function ExtractionOutput({ hasFile, isProcessing, isComplete, extractionResult }) {
318
- const [activeTab, setActiveTab] = useState("json");
319
- const [copied, setCopied] = useState(false);
320
-
321
- // Get fields from extraction result, default to empty object
322
- const fields = extractionResult?.fields || {};
323
- const confidence = extractionResult?.confidence || 0;
324
- const fieldsExtracted = extractionResult?.fieldsExtracted || 0;
325
- const totalTime = extractionResult?.totalTime || 0;
326
-
327
- // Initialize expanded sections based on available fields
328
- const [expandedSections, setExpandedSections] = useState(() =>
329
- Object.keys(fields).slice(0, 5) // Expand first 5 sections by default
330
- );
331
-
332
- const handleCopy = () => {
333
- let content = "";
334
- if (activeTab === "json") {
335
- const preparedFields = prepareFieldsForOutput(fields, "json");
336
- content = JSON.stringify(preparedFields, null, 2);
337
- } else if (activeTab === "xml") {
338
- content = objectToXML(fields);
339
- } else {
340
- content = fieldsToText(fields);
341
- }
342
-
343
- navigator.clipboard.writeText(content);
344
- setCopied(true);
345
- setTimeout(() => setCopied(false), 2000);
346
- };
347
-
348
- // Get prepared fields for display
349
- const preparedFields = React.useMemo(() => {
350
- return prepareFieldsForOutput(fields, "json");
351
- }, [fields]);
352
-
353
- // Update expanded sections when fields change
354
- React.useEffect(() => {
355
- if (extractionResult?.fields) {
356
- setExpandedSections(Object.keys(extractionResult.fields).slice(0, 5));
357
- }
358
- }, [extractionResult]);
359
-
360
- const toggleSection = (section) => {
361
- setExpandedSections((prev) =>
362
- prev.includes(section) ? prev.filter((s) => s !== section) : [...prev, section]
363
- );
364
- };
365
-
366
- const renderValue = (value) => {
367
- if (typeof value === "number") {
368
- return <span className="text-amber-600">{value}</span>;
369
- }
370
- if (typeof value === "string") {
371
- return <span className="text-emerald-600">"{value}"</span>;
372
- }
373
- return String(value);
374
- };
375
-
376
- const renderSection = (key, value, level = 0) => {
377
- const isExpanded = expandedSections.includes(key);
378
- const isObject = typeof value === "object" && value !== null;
379
- const isArray = Array.isArray(value);
380
-
381
- if (!isObject) {
382
- return (
383
- <div
384
- key={key}
385
- className="flex items-start gap-2 py-1"
386
- style={{ paddingLeft: level * 16 }}
387
- >
388
- <span className="text-violet-500">"{key}"</span>
389
- <span className="text-slate-400">:</span>
390
- {renderValue(value)}
391
- </div>
392
- );
393
- }
394
-
395
- return (
396
- <div key={key}>
397
- <button
398
- onClick={() => toggleSection(key)}
399
- className="flex items-center gap-2 py-1 hover:bg-slate-50 w-full text-left rounded"
400
- style={{ paddingLeft: level * 16 }}
401
- >
402
- <ChevronDown
403
- className={cn(
404
- "h-3 w-3 text-slate-400 transition-transform",
405
- !isExpanded && "-rotate-90"
406
- )}
407
- />
408
- <span className="text-violet-500">"{key}"</span>
409
- <span className="text-slate-400">:</span>
410
- <span className="text-slate-400">{isArray ? "[" : "{"}</span>
411
- {!isExpanded && (
412
- <span className="text-slate-300 text-xs">
413
- {isArray ? `${value.length} items` : `${Object.keys(value).length} fields`}
414
- </span>
415
- )}
416
- </button>
417
- <AnimatePresence>
418
- {isExpanded && (
419
- <motion.div
420
- initial={{ height: 0, opacity: 0 }}
421
- animate={{ height: "auto", opacity: 1 }}
422
- exit={{ height: 0, opacity: 0 }}
423
- transition={{ duration: 0.2 }}
424
- className="overflow-hidden"
425
- >
426
- {isArray ? (
427
- value.map((item, idx) => (
428
- <div key={idx} className="border-l border-slate-100 ml-4">
429
- {Object.entries(item).map(([k, v]) => renderSection(k, v, level + 2))}
430
- {idx < value.length - 1 && <div className="h-2" />}
431
- </div>
432
- ))
433
- ) : (
434
- Object.entries(value).map(([k, v]) => renderSection(k, v, level + 1))
435
- )}
436
- <div style={{ paddingLeft: level * 16 }} className="text-slate-400">
437
- {isArray ? "]" : "}"}
438
- </div>
439
- </motion.div>
440
- )}
441
- </AnimatePresence>
442
- </div>
443
- );
444
- };
445
-
446
- return (
447
- <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden">
448
- {/* Header */}
449
- <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100">
450
- <div className="flex items-center gap-3">
451
- <div className="h-8 w-8 rounded-lg bg-emerald-50 flex items-center justify-center">
452
- <Code2 className="h-4 w-4 text-emerald-600" />
453
- </div>
454
- <div>
455
- <h3 className="font-semibold text-slate-800 text-sm">Extracted Data</h3>
456
- <p className="text-xs text-slate-400">
457
- {isComplete
458
- ? `${fieldsExtracted} field${fieldsExtracted !== 1 ? 's' : ''} extracted`
459
- : "Waiting for extraction"}
460
- </p>
461
- </div>
462
- </div>
463
-
464
- {isComplete && (
465
- <div className="flex items-center gap-2">
466
- <Tabs value={activeTab} onValueChange={setActiveTab}>
467
- <TabsList className="h-8 bg-slate-100 p-0.5">
468
- <TabsTrigger value="text" className="h-7 text-xs gap-1.5">
469
- <FileText className="h-3 w-3" />
470
- Text
471
- </TabsTrigger>
472
- <TabsTrigger value="json" className="h-7 text-xs gap-1.5">
473
- <Braces className="h-3 w-3" />
474
- JSON
475
- </TabsTrigger>
476
- <TabsTrigger value="xml" className="h-7 text-xs gap-1.5">
477
- <FileCode2 className="h-3 w-3" />
478
- XML
479
- </TabsTrigger>
480
- </TabsList>
481
- </Tabs>
482
- <Button
483
- variant="ghost"
484
- size="sm"
485
- onClick={handleCopy}
486
- className="h-8 text-xs gap-1.5"
487
- >
488
- {copied ? (
489
- <>
490
- <Check className="h-3 w-3 text-emerald-500" />
491
- Copied
492
- </>
493
- ) : (
494
- <>
495
- <Copy className="h-3 w-3" />
496
- Copy
497
- </>
498
- )}
499
- </Button>
500
- </div>
501
- )}
502
- </div>
503
-
504
- {/* Output Area */}
505
- <div className="flex-1 overflow-auto">
506
- {!hasFile ? (
507
- <div className="h-full flex items-center justify-center p-6">
508
- <div className="text-center">
509
- <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
510
- <Code2 className="h-10 w-10 text-slate-300" />
511
- </div>
512
- <p className="text-slate-400 text-sm">Extracted data will appear here</p>
513
- </div>
514
- </div>
515
- ) : isProcessing ? (
516
- <div className="h-full flex items-center justify-center p-6">
517
- <div className="text-center">
518
- <motion.div
519
- animate={{ rotate: 360 }}
520
- transition={{ duration: 2, repeat: Infinity, ease: "linear" }}
521
- className="h-16 w-16 mx-auto rounded-2xl bg-gradient-to-br from-indigo-100 to-violet-100 flex items-center justify-center mb-4"
522
- >
523
- <Sparkles className="h-8 w-8 text-indigo-500" />
524
- </motion.div>
525
- <p className="text-slate-700 font-medium mb-1">Extracting data...</p>
526
- <p className="text-slate-400 text-sm">Analyzing document structure</p>
527
-
528
- <div className="mt-6 flex items-center justify-center gap-1">
529
- {[0, 1, 2].map((i) => (
530
- <motion.div
531
- key={i}
532
- animate={{ scale: [1, 1.2, 1] }}
533
- transition={{
534
- duration: 0.6,
535
- repeat: Infinity,
536
- delay: i * 0.2,
537
- }}
538
- className="h-2 w-2 rounded-full bg-indigo-400"
539
- />
540
- ))}
541
- </div>
542
- </div>
543
- </div>
544
- ) : isComplete && Object.keys(fields).length === 0 ? (
545
- <div className="h-full flex items-center justify-center p-6">
546
- <div className="text-center">
547
- <div className="h-20 w-20 mx-auto rounded-2xl bg-amber-100 flex items-center justify-center mb-4">
548
- <Code2 className="h-10 w-10 text-amber-600" />
549
- </div>
550
- <p className="text-slate-600 font-medium mb-1">No data extracted</p>
551
- <p className="text-slate-400 text-sm">The document may not contain extractable fields</p>
552
- </div>
553
- </div>
554
- ) : (
555
- <div className="p-4 font-mono text-sm">
556
- {activeTab === "text" ? (
557
- <pre className="text-sm text-slate-700 whitespace-pre-wrap leading-relaxed">
558
- {fieldsToText(fields)}
559
- </pre>
560
- ) : activeTab === "json" ? (
561
- <div className="space-y-1">
562
- <span className="text-slate-400">{"{"}</span>
563
- {Object.keys(preparedFields).length > 0 ? (
564
- Object.entries(preparedFields).map(([key, value]) =>
565
- renderSection(key, value, 1)
566
- )
567
- ) : (
568
- <div className="pl-4 text-slate-400 italic">No fields extracted</div>
569
- )}
570
- <span className="text-slate-400">{"}"}</span>
571
- </div>
572
- ) : (
573
- <pre className="text-sm text-slate-600 whitespace-pre-wrap">
574
- {objectToXML(fields).split("\n").map((line, i) => (
575
- <div key={i} className="hover:bg-slate-50 px-2 -mx-2 rounded">
576
- {line.includes("<") ? (
577
- <>
578
- {line.split(/(<\/?[\w\s=".-]+>)/g).map((part, j) => {
579
- if (part.startsWith("</")) {
580
- return (
581
- <span key={j} className="text-rose-500">
582
- {part}
583
- </span>
584
- );
585
- }
586
- if (part.startsWith("<")) {
587
- return (
588
- <span key={j} className="text-indigo-500">
589
- {part}
590
- </span>
591
- );
592
- }
593
- return (
594
- <span key={j} className="text-slate-700">
595
- {part}
596
- </span>
597
- );
598
- })}
599
- </>
600
- ) : (
601
- line
602
- )}
603
- </div>
604
- ))}
605
- </pre>
606
- )}
607
- </div>
608
- )}
609
- </div>
610
-
611
- {/* Confidence Footer */}
612
- {isComplete && extractionResult && (
613
- <div className="px-5 py-3 border-t border-slate-100 bg-slate-50/50">
614
- <div className="flex items-center justify-between text-xs">
615
- <div className="flex items-center gap-4">
616
- <div className="flex items-center gap-1.5">
617
- <div className={cn(
618
- "h-2 w-2 rounded-full",
619
- confidence >= 90 ? "bg-emerald-500" : confidence >= 70 ? "bg-amber-500" : "bg-red-500"
620
- )} />
621
- <span className="text-slate-500">Confidence:</span>
622
- <span className="font-semibold text-slate-700">
623
- {confidence > 0 ? `${confidence.toFixed(1)}%` : "N/A"}
624
- </span>
625
- </div>
626
- <div className="flex items-center gap-1.5">
627
- <span className="text-slate-500">Fields:</span>
628
- <span className="font-semibold text-slate-700">{fieldsExtracted}</span>
629
- </div>
630
- </div>
631
- <span className="text-slate-400">
632
- Processed in {totalTime >= 1000 ? `${(totalTime / 1000).toFixed(1)}s` : `${totalTime}ms`}
633
- </span>
634
- </div>
635
- </div>
636
- )}
637
- </div>
638
- );
639
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect, useRef } from "react";
2
+ import { motion, AnimatePresence } from "framer-motion";
3
+ import {
4
+ Code2,
5
+ Copy,
6
+ Check,
7
+ Braces,
8
+ FileCode2,
9
+ FileText,
10
+ Sparkles,
11
+ ChevronDown,
12
+ Upload,
13
+ } from "lucide-react";
14
+ import { Button } from "@/components/ui/button";
15
+ import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
16
+ import { cn } from "@/lib/utils";
17
+
18
+ // Helper function to convert pipe-separated tables to HTML tables
19
+ function convertPipeTablesToHTML(text) {
20
+ if (!text) return text;
21
+
22
+ const lines = text.split('\n');
23
+ const result = [];
24
+ let i = 0;
25
+
26
+ while (i < lines.length) {
27
+ const line = lines[i];
28
+
29
+ // Check if this line looks like a table row (has multiple pipes)
30
+ if (line.includes('|') && line.split('|').length >= 3) {
31
+ // Check if it's a separator line (only |, -, :, spaces)
32
+ const isSeparator = /^[\s|\-:]+$/.test(line.trim());
33
+
34
+ if (!isSeparator) {
35
+ // Start of a table - collect all table rows
36
+ const tableRows = [];
37
+ let j = i;
38
+
39
+ // Collect header row
40
+ const headerLine = lines[j];
41
+ const headerCells = headerLine.split('|').map(cell => cell.trim()).filter(cell => cell || cell === '');
42
+ // Remove empty cells at start/end
43
+ if (headerCells.length > 0 && !headerCells[0]) headerCells.shift();
44
+ if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop();
45
+
46
+ if (headerCells.length >= 2) {
47
+ tableRows.push(headerCells);
48
+ j++;
49
+
50
+ // Skip separator line if present
51
+ if (j < lines.length && /^[\s|\-:]+$/.test(lines[j].trim())) {
52
+ j++;
53
+ }
54
+
55
+ // Collect data rows
56
+ while (j < lines.length) {
57
+ const rowLine = lines[j];
58
+ if (!rowLine.trim()) break; // Empty line ends table
59
+
60
+ // Check if it's still a table row
61
+ if (rowLine.includes('|') && rowLine.split('|').length >= 2) {
62
+ const isRowSeparator = /^[\s|\-:]+$/.test(rowLine.trim());
63
+ if (!isRowSeparator) {
64
+ const rowCells = rowLine.split('|').map(cell => cell.trim());
65
+ // Remove empty cells at start/end
66
+ if (rowCells.length > 0 && !rowCells[0]) rowCells.shift();
67
+ if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop();
68
+ tableRows.push(rowCells);
69
+ j++;
70
+ } else {
71
+ j++;
72
+ }
73
+ } else {
74
+ break; // Not a table row anymore
75
+ }
76
+ }
77
+
78
+ // Convert to HTML table
79
+ if (tableRows.length > 0) {
80
+ let htmlTable = '<table class="border-collapse border border-gray-300 w-full my-4">\n<thead>\n<tr>';
81
+
82
+ // Header row
83
+ tableRows[0].forEach(cell => {
84
+ htmlTable += `<th class="border border-gray-300 px-4 py-2 bg-gray-100 font-semibold text-left">${escapeHtml(cell)}</th>`;
85
+ });
86
+ htmlTable += '</tr>\n</thead>\n<tbody>\n';
87
+
88
+ // Data rows
89
+ for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) {
90
+ htmlTable += '<tr>';
91
+ tableRows[rowIdx].forEach((cell, colIdx) => {
92
+ // Use header cell count to ensure alignment
93
+ const cellContent = cell || '';
94
+ htmlTable += `<td class="border border-gray-300 px-4 py-2">${escapeHtml(cellContent)}</td>`;
95
+ });
96
+ htmlTable += '</tr>\n';
97
+ }
98
+
99
+ htmlTable += '</tbody>\n</table>';
100
+ result.push(htmlTable);
101
+ i = j;
102
+ continue;
103
+ }
104
+ }
105
+ }
106
+ }
107
+
108
+ // Not a table row, add as-is
109
+ result.push(line);
110
+ i++;
111
+ }
112
+
113
+ return result.join('\n');
114
+ }
115
+
116
+ // Helper function to escape HTML
117
+ function escapeHtml(text) {
118
+ if (!text) return '';
119
+ const div = document.createElement('div');
120
+ div.textContent = text;
121
+ return div.innerHTML;
122
+ }
123
+
124
+ // Helper function to convert markdown/HTML text to safe HTML
125
+ function renderMarkdownToHTML(text) {
126
+ if (!text) return "";
127
+
128
+ let html = text;
129
+
130
+ // FIRST: Convert pipe-separated tables to HTML tables
131
+ html = convertPipeTablesToHTML(html);
132
+
133
+ // Convert LaTeX-style superscripts/subscripts FIRST (before protecting tables)
134
+ // This ensures they're converted everywhere, including inside tables
135
+
136
+ // Convert LaTeX-style superscripts: $^{text}$ or $^text$ to <sup>text</sup>
137
+ html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '<sup>$1</sup>');
138
+ html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '<sup>$1</sup>');
139
+
140
+ // Convert LaTeX-style subscripts: $_{text}$ or $_text$ to <sub>text</sub>
141
+ html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '<sub>$1</sub>');
142
+ html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '<sub>$1</sub>');
143
+
144
+ // Split by HTML tags to preserve existing HTML (like tables)
145
+ // Process markdown only in non-HTML sections
146
+
147
+ // First, protect existing HTML blocks (tables, etc.)
148
+ const htmlBlocks = [];
149
+ let htmlBlockIndex = 0;
150
+
151
+ // Extract and protect HTML table blocks
152
+ html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => {
153
+ const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`;
154
+ htmlBlocks[htmlBlockIndex] = match;
155
+ htmlBlockIndex++;
156
+ return placeholder;
157
+ });
158
+
159
+ // Convert markdown headers (only if not inside HTML)
160
+ html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>');
161
+ html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>');
162
+ html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>');
163
+
164
+ // Convert markdown bold/italic (but not inside HTML tags)
165
+ html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
166
+ html = html.replace(/\*(.*?)\*/g, '<em>$1</em>');
167
+
168
+ // Convert markdown links
169
+ html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" target="_blank" rel="noopener noreferrer">$1</a>');
170
+
171
+ // Convert line breaks to paragraphs (but preserve structure around HTML blocks)
172
+ const parts = html.split(/(__HTML_BLOCK_\d+__)/);
173
+ const processedParts = parts.map((part, index) => {
174
+ if (part.match(/^__HTML_BLOCK_\d+__$/)) {
175
+ // Restore HTML block
176
+ const blockIndex = parseInt(part.match(/\d+/)[0]);
177
+ return htmlBlocks[blockIndex];
178
+ } else {
179
+ // Process markdown in this part
180
+ let processed = part;
181
+
182
+ // Convert double line breaks to paragraph breaks
183
+ processed = processed.replace(/\n\n+/g, '</p><p>');
184
+ // Convert single line breaks to <br> (but not if already in a tag)
185
+ processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2');
186
+
187
+ // Wrap in paragraph if there's content
188
+ if (processed.trim() && !processed.trim().startsWith('<')) {
189
+ processed = '<p>' + processed + '</p>';
190
+ }
191
+
192
+ return processed;
193
+ }
194
+ });
195
+
196
+ html = processedParts.join('');
197
+
198
+ // Process LaTeX notation in restored HTML blocks (tables) as well
199
+ // This handles any LaTeX that might be in table cells
200
+ html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*\^\s*\{([^}]+)\}\s*\$([^<]*)(<\/td>|<\/th>)/gi,
201
+ (match, openTag, before, supText, after, closeTag) => {
202
+ return openTag + before + '<sup>' + supText + '</sup>' + after + closeTag;
203
+ });
204
+ html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*\^\s*([^\s$<>]+)\s*\$([^<]*)(<\/td>|<\/th>)/gi,
205
+ (match, openTag, before, supText, after, closeTag) => {
206
+ return openTag + before + '<sup>' + supText + '</sup>' + after + closeTag;
207
+ });
208
+ html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*_\s*\{([^}]+)\}\s*\$([^<]*)(<\/td>|<\/th>)/gi,
209
+ (match, openTag, before, subText, after, closeTag) => {
210
+ return openTag + before + '<sub>' + subText + '</sub>' + after + closeTag;
211
+ });
212
+ html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*_\s*([^\s$<>]+)\s*\$([^<]*)(<\/td>|<\/th>)/gi,
213
+ (match, openTag, before, subText, after, closeTag) => {
214
+ return openTag + before + '<sub>' + subText + '</sub>' + after + closeTag;
215
+ });
216
+
217
+ // Clean up empty paragraphs and fix paragraph structure
218
+ html = html.replace(/<p><\/p>/g, '');
219
+ html = html.replace(/<p>\s*<br>\s*<\/p>/g, '');
220
+ html = html.replace(/<p>\s*<\/p>/g, '');
221
+
222
+ // Ensure proper spacing around HTML blocks
223
+ html = html.replace(/(<\/table>)\s*(<h[1-3])/g, '$1</p><p>$2');
224
+ html = html.replace(/(<\/h[1-3]>)\s*(<table)/g, '$1<p>$2');
225
+ html = html.replace(/(<\/table>)\s*(<p>)/g, '$1$2');
226
+
227
+ return html;
228
+ }
229
+
230
+ // Mock extracted data
231
+ const mockData = {
232
+ document: {
233
+ type: "Invoice",
234
+ confidence: 0.98,
235
+ },
236
+ vendor: {
237
+ name: "Acme Corporation",
238
+ address: "123 Business Ave, Suite 400",
239
+ city: "San Francisco",
240
+ state: "CA",
241
+ zip: "94102",
242
+ phone: "+1 (555) 123-4567",
243
+ },
244
+ invoice: {
245
+ number: "INV-2024-0847",
246
+ date: "2024-01-15",
247
+ due_date: "2024-02-14",
248
+ po_number: "PO-9823",
249
+ },
250
+ items: [
251
+ { description: "Professional Services", quantity: 40, unit_price: 150.0, total: 6000.0 },
252
+ { description: "Software License", quantity: 5, unit_price: 299.99, total: 1499.95 },
253
+ { description: "Support Package", quantity: 1, unit_price: 500.0, total: 500.0 },
254
+ ],
255
+ totals: {
256
+ subtotal: 7999.95,
257
+ tax_rate: 0.0875,
258
+ tax_amount: 699.99,
259
+ total: 8699.94,
260
+ },
261
+ };
262
+
263
+ const mockXML = `<?xml version="1.0" encoding="UTF-8"?>
264
+ <extraction>
265
+ <document type="Invoice" confidence="0.98"/>
266
+ <vendor>
267
+ <name>Acme Corporation</name>
268
+ <address>123 Business Ave, Suite 400</address>
269
+ <city>San Francisco</city>
270
+ <state>CA</state>
271
+ <zip>94102</zip>
272
+ </vendor>
273
+ <invoice>
274
+ <number>INV-2024-0847</number>
275
+ <date>2024-01-15</date>
276
+ <due_date>2024-02-14</due_date>
277
+ </invoice>
278
+ <items>
279
+ <item>
280
+ <description>Professional Services</description>
281
+ <quantity>40</quantity>
282
+ <total>6000.00</total>
283
+ </item>
284
+ </items>
285
+ <totals>
286
+ <subtotal>7999.95</subtotal>
287
+ <tax>699.99</tax>
288
+ <total>8699.94</total>
289
+ </totals>
290
+ </extraction>`;
291
+
292
+ const mockText = `INVOICE
293
+
294
+ ACME CORPORATION
295
+ 123 Business Ave, Suite 400
296
+ San Francisco, CA 94102
297
+ Phone: +1 (555) 123-4567
298
+
299
+ Invoice Number: INV-2024-0847
300
+ Invoice Date: January 15, 2024
301
+ Due Date: February 14, 2024
302
+ PO Number: PO-9823
303
+
304
+ BILL TO:
305
+ Customer Name
306
+ 456 Client Street
307
+ New York, NY 10001
308
+
309
+ ITEMS:
310
+ ─────────────────────────────────────────────────────────
311
+ Description Qty Unit Price Total
312
+ ─────────────────────────────────────────────────────────
313
+ Professional Services 40 $150.00 $6,000.00
314
+ Software License 5 $299.99 $1,499.95
315
+ Support Package 1 $500.00 $500.00
316
+ ─────────────────────────────────────────────────────────
317
+
318
+ Subtotal: $7,999.95
319
+ Tax (8.75%): $699.99
320
+ ─────────────────────────
321
+ TOTAL: $8,699.94
322
+
323
+ Payment Terms: Net 30
324
+ Thank you for your business!`;
325
+
326
+ // Helper function to convert object to XML
327
+ // Prepare fields for JSON/XML output - remove duplicates and restructure
328
+ function prepareFieldsForOutput(fields, format = "json") {
329
+ if (!fields || typeof fields !== "object") {
330
+ return fields;
331
+ }
332
+
333
+ const output = { ...fields };
334
+
335
+ // Extract Fields from root level if it exists
336
+ const rootFields = output.Fields;
337
+ // Remove Fields from output temporarily (will be added back at top)
338
+ delete output.Fields;
339
+
340
+ // Remove full_text from top-level if pages array exists (to avoid duplication)
341
+ if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
342
+ delete output.full_text;
343
+
344
+ // Clean up each page: remove full_text from page.fields (it duplicates page.text)
345
+ output.pages = output.pages.map(page => {
346
+ const cleanedPage = { ...page };
347
+ if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
348
+ const cleanedFields = { ...cleanedPage.fields };
349
+ // Remove full_text from page fields (duplicates page.text)
350
+ delete cleanedFields.full_text;
351
+ cleanedPage.fields = cleanedFields;
352
+ }
353
+ return cleanedPage;
354
+ });
355
+ }
356
+
357
+ // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
358
+ if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
359
+ // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
360
+ const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields"));
361
+
362
+ output.pages.forEach((page, idx) => {
363
+ const pageNum = page.page_number || idx + 1;
364
+ const pageFields = page.fields || {};
365
+
366
+ // Remove duplicate fields from page.fields:
367
+ // 1. Remove full_text (duplicates page.text)
368
+ // 2. Remove fields that match top-level fields (already shown at root)
369
+ const cleanedPageFields = {};
370
+ for (const [key, value] of Object.entries(pageFields)) {
371
+ // Skip full_text and fields that match top-level exactly
372
+ if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
373
+ cleanedPageFields[key] = value;
374
+ }
375
+ }
376
+
377
+ const pageObj = {
378
+ text: page.text || "",
379
+ confidence: page.confidence || 0,
380
+ doc_type: page.doc_type || "other"
381
+ };
382
+
383
+ // Add table and footer_notes if they exist
384
+ if (page.table && Array.isArray(page.table) && page.table.length > 0) {
385
+ pageObj.table = page.table;
386
+ }
387
+ if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) {
388
+ pageObj.footer_notes = page.footer_notes;
389
+ }
390
+
391
+ // Only add fields if there are unique page-specific fields
392
+ if (Object.keys(cleanedPageFields).length > 0) {
393
+ pageObj.fields = cleanedPageFields;
394
+ }
395
+
396
+ output[`page_${pageNum}`] = pageObj;
397
+ });
398
+ // Remove pages array - we now have page_1, page_2, etc. as separate fields
399
+ delete output.pages;
400
+ }
401
+
402
+ // Handle page_X structure (from backend) - remove Fields from page objects if they exist
403
+ if (output && typeof output === "object") {
404
+ const pageKeys = Object.keys(output).filter(k => k.startsWith("page_"));
405
+ for (const pageKey of pageKeys) {
406
+ const pageData = output[pageKey];
407
+ if (pageData && typeof pageData === "object") {
408
+ // Remove Fields from page objects (it's now at root level)
409
+ delete pageData.Fields;
410
+ delete pageData.metadata;
411
+ }
412
+ }
413
+ }
414
+
415
+ // Rebuild output with Fields at the top (only if it exists and is not empty)
416
+ const finalOutput = {};
417
+ if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) {
418
+ finalOutput.Fields = rootFields;
419
+ }
420
+
421
+ // Add all other keys
422
+ Object.keys(output).forEach(key => {
423
+ finalOutput[key] = output[key];
424
+ });
425
+
426
+ return finalOutput;
427
+ }
428
+
429
+ function objectToXML(obj, rootName = "extraction") {
430
+ // Prepare fields - remove full_text if pages exist
431
+ const preparedObj = prepareFieldsForOutput(obj, "xml");
432
+
433
+ let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
434
+
435
+ const convert = (obj, indent = " ") => {
436
+ for (const [key, value] of Object.entries(obj)) {
437
+ if (value === null || value === undefined) continue;
438
+
439
+ // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
440
+ if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
441
+ continue;
442
+ }
443
+
444
+ if (Array.isArray(value)) {
445
+ value.forEach((item) => {
446
+ xml += `${indent}<${key}>\n`;
447
+ if (typeof item === "object") {
448
+ convert(item, indent + " ");
449
+ } else {
450
+ xml += `${indent} ${escapeXML(String(item))}\n`;
451
+ }
452
+ xml += `${indent}</${key}>\n`;
453
+ });
454
+ } else if (typeof value === "object") {
455
+ xml += `${indent}<${key}>\n`;
456
+ convert(value, indent + " ");
457
+ xml += `${indent}</${key}>\n`;
458
+ } else {
459
+ xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
460
+ }
461
+ }
462
+ };
463
+
464
+ convert(preparedObj);
465
+ xml += `</${rootName}>`;
466
+ return xml;
467
+ }
468
+
469
+ function escapeXML(str) {
470
+ return str
471
+ .replace(/&/g, "&amp;")
472
+ .replace(/</g, "&lt;")
473
+ .replace(/>/g, "&gt;")
474
+ .replace(/"/g, "&quot;")
475
+ .replace(/'/g, "&apos;");
476
+ }
477
+
478
+ // Helper function to extract text from page structure
479
+ function extractTextFromFields(fields) {
480
+ if (!fields || typeof fields !== "object") {
481
+ return "";
482
+ }
483
+
484
+ // Check for page_X structure first (preferred format)
485
+ const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_"));
486
+ if (pageKeys.length > 0) {
487
+ // Get text from first page (or combine all pages)
488
+ const pageTexts = pageKeys.map(key => {
489
+ const page = fields[key];
490
+ if (page && page.text) {
491
+ return page.text;
492
+ }
493
+ return "";
494
+ }).filter(text => text);
495
+
496
+ if (pageTexts.length > 0) {
497
+ return pageTexts.join("\n\n");
498
+ }
499
+ }
500
+
501
+ // Fallback to full_text
502
+ if (fields.full_text) {
503
+ return fields.full_text;
504
+ }
505
+
506
+ return "";
507
+ }
508
+
509
+ // Helper function to format fields as readable text
510
+ function fieldsToText(fields) {
511
+ if (!fields || typeof fields !== "object") {
512
+ return "No data extracted.";
513
+ }
514
+
515
+ // Extract text from page structure or full_text
516
+ const extractedText = extractTextFromFields(fields);
517
+
518
+ if (extractedText) {
519
+ return extractedText;
520
+
521
+ // Don't show pages array separately if full_text already contains page markers
522
+ // (full_text from backend already includes "=== PAGE 1 ===" etc.)
523
+ const hasPageMarkers = fields.full_text.includes("=== PAGE") || fields.full_text.includes("--- Page");
524
+
525
+ // Only show pages array if full_text doesn't already have page breakdown
526
+ if (!hasPageMarkers && fields.pages && Array.isArray(fields.pages)) {
527
+ text += "\n\n=== TEXT BY PAGE ===\n\n";
528
+ fields.pages.forEach((page, idx) => {
529
+ text += `--- Page ${page.page_number || idx + 1} ---\n`;
530
+ text += page.text || "";
531
+ text += "\n\n";
532
+ });
533
+ }
534
+
535
+ // Then show other structured fields
536
+ const otherFields = { ...fields };
537
+ delete otherFields.full_text;
538
+ delete otherFields.pages;
539
+
540
+ if (Object.keys(otherFields).length > 0) {
541
+ text += "\n\n=== STRUCTURED FIELDS ===\n\n";
542
+ const formatValue = (key, value, indent = "") => {
543
+ if (Array.isArray(value)) {
544
+ text += `${indent}${key}:\n`;
545
+ value.forEach((item, idx) => {
546
+ if (typeof item === "object") {
547
+ text += `${indent} Item ${idx + 1}:\n`;
548
+ Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " "));
549
+ } else {
550
+ text += `${indent} - ${item}\n`;
551
+ }
552
+ });
553
+ } else if (typeof value === "object" && value !== null) {
554
+ text += `${indent}${key}:\n`;
555
+ Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " "));
556
+ } else {
557
+ text += `${indent}${key}: ${value}\n`;
558
+ }
559
+ };
560
+
561
+ Object.entries(otherFields).forEach(([key, value]) => {
562
+ formatValue(key, value);
563
+ text += "\n";
564
+ });
565
+ }
566
+
567
+ return text.trim();
568
+ }
569
+
570
+ // Fallback: format all fields normally
571
+ let text = "";
572
+ const formatValue = (key, value, indent = "") => {
573
+ if (Array.isArray(value)) {
574
+ text += `${indent}${key}:\n`;
575
+ value.forEach((item, idx) => {
576
+ if (typeof item === "object") {
577
+ text += `${indent} Item ${idx + 1}:\n`;
578
+ Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " "));
579
+ } else {
580
+ text += `${indent} - ${item}\n`;
581
+ }
582
+ });
583
+ } else if (typeof value === "object" && value !== null) {
584
+ text += `${indent}${key}:\n`;
585
+ Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " "));
586
+ } else {
587
+ text += `${indent}${key}: ${value}\n`;
588
+ }
589
+ };
590
+
591
+ Object.entries(fields).forEach(([key, value]) => {
592
+ formatValue(key, value);
593
+ text += "\n";
594
+ });
595
+
596
+ return text.trim() || "No data extracted.";
597
+ }
598
+
599
+ export default function ExtractionOutput({ hasFile, isProcessing, isComplete, extractionResult, onNewUpload }) {
600
+ const [activeTab, setActiveTab] = useState("json");
601
+ const [copied, setCopied] = useState(false);
602
+ const [statusMessage, setStatusMessage] = useState("Preparing document...");
603
+
604
+ // Get fields from extraction result, default to empty object
605
+ const fields = extractionResult?.fields || {};
606
+ const confidence = extractionResult?.confidence || 0;
607
+ const fieldsExtracted = extractionResult?.fieldsExtracted || 0;
608
+ const totalTime = extractionResult?.totalTime || 0;
609
+
610
+ // Dynamic status messages that rotate during processing
611
+ const statusMessages = [
612
+ "Preparing document...",
613
+ "Converting pages to images...",
614
+ "Visual Reasoning...",
615
+ "Reading text from document...",
616
+ "Identifying document structure...",
617
+ "Extracting tables and data...",
618
+ "Analyzing content...",
619
+ "Processing pages...",
620
+ "Organizing extracted information...",
621
+ "Finalizing results...",
622
+ ];
623
+
624
+ // Rotate status messages during processing
625
+ const messageIndexRef = useRef(0);
626
+
627
+ useEffect(() => {
628
+ if (!isProcessing) {
629
+ setStatusMessage("Analyzing document structure");
630
+ messageIndexRef.current = 0;
631
+ return;
632
+ }
633
+
634
+ setStatusMessage(statusMessages[0]);
635
+ messageIndexRef.current = 0;
636
+
637
+ const interval = setInterval(() => {
638
+ messageIndexRef.current = (messageIndexRef.current + 1) % statusMessages.length;
639
+ setStatusMessage(statusMessages[messageIndexRef.current]);
640
+ }, 2500); // Change message every 2.5 seconds
641
+
642
+ return () => clearInterval(interval);
643
+ }, [isProcessing]);
644
+
645
+ // Initialize expanded sections based on available fields
646
+ const [expandedSections, setExpandedSections] = useState(() =>
647
+ Object.keys(fields).slice(0, 5) // Expand first 5 sections by default
648
+ );
649
+
650
+ // Helper function to convert HTML to formatted plain text with layout preserved
651
+ const htmlToFormattedText = (html) => {
652
+ if (!html) return "";
653
+
654
+ // Create a temporary div to parse HTML
655
+ const tempDiv = document.createElement("div");
656
+ tempDiv.innerHTML = html;
657
+
658
+ let text = "";
659
+
660
+ // Process each element
661
+ const processNode = (node) => {
662
+ if (node.nodeType === Node.TEXT_NODE) {
663
+ return node.textContent;
664
+ }
665
+
666
+ if (node.nodeType !== Node.ELEMENT_NODE) {
667
+ return "";
668
+ }
669
+
670
+ const tagName = node.tagName?.toLowerCase();
671
+ const children = Array.from(node.childNodes);
672
+
673
+ switch (tagName) {
674
+ case "h1":
675
+ return "\n\n" + processChildren(children).trim() + "\n\n";
676
+ case "h2":
677
+ return "\n\n" + processChildren(children).trim() + "\n\n";
678
+ case "h3":
679
+ return "\n" + processChildren(children).trim() + "\n";
680
+ case "p":
681
+ return processChildren(children) + "\n\n";
682
+ case "br":
683
+ return "\n";
684
+ case "strong":
685
+ case "b":
686
+ return processChildren(children);
687
+ case "em":
688
+ case "i":
689
+ return processChildren(children);
690
+ case "sup":
691
+ return processChildren(children);
692
+ case "sub":
693
+ return processChildren(children);
694
+ case "table":
695
+ return "\n" + processTable(node) + "\n\n";
696
+ case "ul":
697
+ case "ol":
698
+ return "\n" + processList(node) + "\n\n";
699
+ case "li":
700
+ return " • " + processChildren(children).trim() + "\n";
701
+ default:
702
+ return processChildren(children);
703
+ }
704
+ };
705
+
706
+ const processChildren = (children) => {
707
+ return children.map(processNode).join("");
708
+ };
709
+
710
+ const processTable = (table) => {
711
+ let tableText = "";
712
+ const rows = table.querySelectorAll("tr");
713
+
714
+ if (rows.length === 0) return "";
715
+
716
+ // First pass: calculate column widths
717
+ const allRows = Array.from(rows);
718
+ const columnCount = Math.max(...allRows.map(row => row.querySelectorAll("td, th").length));
719
+ const columnWidths = new Array(columnCount).fill(0);
720
+
721
+ allRows.forEach(row => {
722
+ const cells = row.querySelectorAll("td, th");
723
+ cells.forEach((cell, colIndex) => {
724
+ const cellText = processChildren(Array.from(cell.childNodes)).trim().replace(/\s+/g, " ");
725
+ columnWidths[colIndex] = Math.max(columnWidths[colIndex] || 0, cellText.length, 10);
726
+ });
727
+ });
728
+
729
+ // Second pass: format rows
730
+ allRows.forEach((row, rowIndex) => {
731
+ const cells = row.querySelectorAll("td, th");
732
+ const cellTexts = Array.from(cells).map(cell => {
733
+ let cellContent = processChildren(Array.from(cell.childNodes)).trim();
734
+ cellContent = cellContent.replace(/\s+/g, " ");
735
+ return cellContent;
736
+ });
737
+
738
+ // Pad cells to column widths
739
+ const paddedCells = cellTexts.map((text, i) => {
740
+ const width = columnWidths[i] || 10;
741
+ return text.padEnd(width);
742
+ });
743
+
744
+ tableText += paddedCells.join(" | ") + "\n";
745
+
746
+ // Add separator after header row
747
+ if (rowIndex === 0 && row.querySelector("th")) {
748
+ tableText += columnWidths.map(w => "-".repeat(w)).join("-|-") + "\n";
749
+ }
750
+ });
751
+
752
+ return tableText;
753
+ };
754
+
755
+ const processList = (list) => {
756
+ const items = list.querySelectorAll("li");
757
+ return Array.from(items).map(item => {
758
+ return " • " + processChildren(Array.from(item.childNodes)).trim();
759
+ }).join("\n");
760
+ };
761
+
762
+ text = processChildren(Array.from(tempDiv.childNodes));
763
+
764
+ // Clean up extra newlines
765
+ text = text.replace(/\n{3,}/g, "\n\n");
766
+ text = text.trim();
767
+
768
+ return text;
769
+ };
770
+
771
+ const handleCopy = () => {
772
+ let content = "";
773
+ if (activeTab === "json") {
774
+ const preparedFields = prepareFieldsForOutput(fields, "json");
775
+ content = JSON.stringify(preparedFields, null, 2);
776
+ } else if (activeTab === "xml") {
777
+ content = objectToXML(fields);
778
+ } else {
779
+ // For text tab, get the formatted HTML and convert to plain text with layout
780
+ const textContent = extractTextFromFields(fields);
781
+ const htmlContent = renderMarkdownToHTML(textContent);
782
+ content = htmlToFormattedText(htmlContent);
783
+ }
784
+
785
+ navigator.clipboard.writeText(content);
786
+ setCopied(true);
787
+ setTimeout(() => setCopied(false), 2000);
788
+ };
789
+
790
+ // Get prepared fields for display
791
+ const preparedFields = React.useMemo(() => {
792
+ return prepareFieldsForOutput(fields, "json");
793
+ }, [fields]);
794
+
795
+ // Update expanded sections when fields change
796
+ React.useEffect(() => {
797
+ if (extractionResult?.fields) {
798
+ setExpandedSections(Object.keys(extractionResult.fields).slice(0, 5));
799
+ }
800
+ }, [extractionResult]);
801
+
802
+ const toggleSection = (section) => {
803
+ setExpandedSections((prev) =>
804
+ prev.includes(section) ? prev.filter((s) => s !== section) : [...prev, section]
805
+ );
806
+ };
807
+
808
+ const renderValue = (value) => {
809
+ if (typeof value === "number") {
810
+ return <span className="text-amber-600">{value}</span>;
811
+ }
812
+ if (typeof value === "string") {
813
+ return <span className="text-emerald-600">"{value}"</span>;
814
+ }
815
+ return String(value);
816
+ };
817
+
818
+ const renderSection = (key, value, level = 0) => {
819
+ const isExpanded = expandedSections.includes(key);
820
+ const isObject = typeof value === "object" && value !== null;
821
+ const isArray = Array.isArray(value);
822
+
823
+ if (!isObject) {
824
+ return (
825
+ <div
826
+ key={key}
827
+ className="flex items-start gap-2 py-1"
828
+ style={{ paddingLeft: level * 16 }}
829
+ >
830
+ <span className="text-violet-500">"{key}"</span>
831
+ <span className="text-slate-400">:</span>
832
+ {renderValue(value)}
833
+ </div>
834
+ );
835
+ }
836
+
837
+ return (
838
+ <div key={key}>
839
+ <button
840
+ onClick={() => toggleSection(key)}
841
+ className="flex items-center gap-2 py-1 hover:bg-slate-50 w-full text-left rounded"
842
+ style={{ paddingLeft: level * 16 }}
843
+ >
844
+ <ChevronDown
845
+ className={cn(
846
+ "h-3 w-3 text-slate-400 transition-transform",
847
+ !isExpanded && "-rotate-90"
848
+ )}
849
+ />
850
+ <span className="text-violet-500">"{key}"</span>
851
+ <span className="text-slate-400">:</span>
852
+ <span className="text-slate-400">{isArray ? "[" : "{"}</span>
853
+ {!isExpanded && (
854
+ <span className="text-slate-300 text-xs">
855
+ {isArray ? `${value.length} items` : `${Object.keys(value).length} fields`}
856
+ </span>
857
+ )}
858
+ </button>
859
+ <AnimatePresence>
860
+ {isExpanded && (
861
+ <motion.div
862
+ initial={{ height: 0, opacity: 0 }}
863
+ animate={{ height: "auto", opacity: 1 }}
864
+ exit={{ height: 0, opacity: 0 }}
865
+ transition={{ duration: 0.2 }}
866
+ className="overflow-hidden"
867
+ >
868
+ {isArray ? (
869
+ value.map((item, idx) => (
870
+ <div key={idx} className="border-l border-slate-100 ml-4">
871
+ {Object.entries(item).map(([k, v]) => renderSection(k, v, level + 2))}
872
+ {idx < value.length - 1 && <div className="h-2" />}
873
+ </div>
874
+ ))
875
+ ) : (
876
+ Object.entries(value).map(([k, v]) => renderSection(k, v, level + 1))
877
+ )}
878
+ <div style={{ paddingLeft: level * 16 }} className="text-slate-400">
879
+ {isArray ? "]" : "}"}
880
+ </div>
881
+ </motion.div>
882
+ )}
883
+ </AnimatePresence>
884
+ </div>
885
+ );
886
+ };
887
+
888
+ return (
889
+ <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden">
890
+ {/* Header */}
891
+ <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100">
892
+ <div className="flex items-center gap-3">
893
+ <div className="h-8 w-8 rounded-lg bg-emerald-50 flex items-center justify-center">
894
+ <Code2 className="h-4 w-4 text-emerald-600" />
895
+ </div>
896
+ <div>
897
+ <h3 className="font-semibold text-slate-800 text-sm">Extracted Data</h3>
898
+ <p className="text-xs text-slate-400">
899
+ {isComplete
900
+ ? `${fieldsExtracted} field${fieldsExtracted !== 1 ? 's' : ''} extracted`
901
+ : "Waiting for extraction"}
902
+ </p>
903
+ </div>
904
+ {isComplete && onNewUpload && (
905
+ <Button
906
+ variant="ghost"
907
+ size="sm"
908
+ onClick={onNewUpload}
909
+ className="h-8 ml-auto text-xs gap-1.5 text-indigo-600 hover:text-indigo-700 hover:bg-indigo-50"
910
+ title="Upload new document"
911
+ >
912
+ <Upload className="h-3.5 w-3.5" />
913
+ New
914
+ </Button>
915
+ )}
916
+ </div>
917
+
918
+ {isComplete && (
919
+ <div className="flex items-center gap-2">
920
+ <Tabs value={activeTab} onValueChange={setActiveTab}>
921
+ <TabsList className="h-8 bg-slate-100 p-0.5">
922
+ <TabsTrigger value="text" className="h-7 text-xs gap-1.5">
923
+ <FileText className="h-3 w-3" />
924
+ Text
925
+ </TabsTrigger>
926
+ <TabsTrigger value="json" className="h-7 text-xs gap-1.5">
927
+ <Braces className="h-3 w-3" />
928
+ JSON
929
+ </TabsTrigger>
930
+ <TabsTrigger value="xml" className="h-7 text-xs gap-1.5">
931
+ <FileCode2 className="h-3 w-3" />
932
+ XML
933
+ </TabsTrigger>
934
+ </TabsList>
935
+ </Tabs>
936
+ <Button
937
+ variant="ghost"
938
+ size="sm"
939
+ onClick={handleCopy}
940
+ className="h-8 text-xs gap-1.5"
941
+ >
942
+ {copied ? (
943
+ <>
944
+ <Check className="h-3 w-3 text-emerald-500" />
945
+ Copied
946
+ </>
947
+ ) : (
948
+ <>
949
+ <Copy className="h-3 w-3" />
950
+ Copy
951
+ </>
952
+ )}
953
+ </Button>
954
+ </div>
955
+ )}
956
+ </div>
957
+
958
+ {/* Output Area */}
959
+ <div className="flex-1 overflow-auto">
960
+ {!hasFile ? (
961
+ <div className="h-full flex items-center justify-center p-6">
962
+ <div className="text-center">
963
+ <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
964
+ <Code2 className="h-10 w-10 text-slate-300" />
965
+ </div>
966
+ <p className="text-slate-400 text-sm">Extracted data will appear here</p>
967
+ </div>
968
+ </div>
969
+ ) : isProcessing ? (
970
+ <div className="h-full flex items-center justify-center p-6">
971
+ <div className="text-center">
972
+ <motion.div
973
+ animate={{ rotate: 360 }}
974
+ transition={{ duration: 2, repeat: Infinity, ease: "linear" }}
975
+ className="h-16 w-16 mx-auto rounded-2xl bg-gradient-to-br from-indigo-100 to-violet-100 flex items-center justify-center mb-4"
976
+ >
977
+ <Sparkles className="h-8 w-8 text-indigo-500" />
978
+ </motion.div>
979
+ <p className="text-slate-700 font-medium mb-1">Extracting data...</p>
980
+ <p className="text-slate-400 text-sm">{statusMessage}</p>
981
+
982
+ <div className="mt-6 flex items-center justify-center gap-1">
983
+ {[0, 1, 2].map((i) => (
984
+ <motion.div
985
+ key={i}
986
+ animate={{ scale: [1, 1.2, 1] }}
987
+ transition={{
988
+ duration: 0.6,
989
+ repeat: Infinity,
990
+ delay: i * 0.2,
991
+ }}
992
+ className="h-2 w-2 rounded-full bg-indigo-400"
993
+ />
994
+ ))}
995
+ </div>
996
+ </div>
997
+ </div>
998
+ ) : isComplete && Object.keys(fields).length === 0 ? (
999
+ <div className="h-full flex items-center justify-center p-6">
1000
+ <div className="text-center">
1001
+ <div className="h-20 w-20 mx-auto rounded-2xl bg-amber-100 flex items-center justify-center mb-4">
1002
+ <Code2 className="h-10 w-10 text-amber-600" />
1003
+ </div>
1004
+ <p className="text-slate-600 font-medium mb-1">No data extracted</p>
1005
+ <p className="text-slate-400 text-sm">The document may not contain extractable fields</p>
1006
+ </div>
1007
+ </div>
1008
+ ) : (
1009
+ <div className="p-4 font-mono text-sm">
1010
+ {activeTab === "text" ? (
1011
+ <div
1012
+ className="text-sm text-slate-700 leading-relaxed"
1013
+ style={{
1014
+ fontFamily: 'system-ui, -apple-system, sans-serif'
1015
+ }}
1016
+ >
1017
+ <div
1018
+ className="markdown-content"
1019
+ dangerouslySetInnerHTML={{ __html: renderMarkdownToHTML(fieldsToText(fields)) }}
1020
+ style={{
1021
+ lineHeight: '1.6'
1022
+ }}
1023
+ />
1024
+ <style>{`
1025
+ .markdown-content h1 {
1026
+ font-size: 1.5rem;
1027
+ font-weight: 700;
1028
+ color: #0f172a;
1029
+ margin-top: 1.5rem;
1030
+ margin-bottom: 1rem;
1031
+ line-height: 1.3;
1032
+ }
1033
+ .markdown-content h2 {
1034
+ font-size: 1.25rem;
1035
+ font-weight: 600;
1036
+ color: #0f172a;
1037
+ margin-top: 1.25rem;
1038
+ margin-bottom: 0.75rem;
1039
+ line-height: 1.3;
1040
+ }
1041
+ .markdown-content h3 {
1042
+ font-size: 1.125rem;
1043
+ font-weight: 600;
1044
+ color: #1e293b;
1045
+ margin-top: 1rem;
1046
+ margin-bottom: 0.5rem;
1047
+ line-height: 1.3;
1048
+ }
1049
+ .markdown-content p {
1050
+ margin-top: 0.75rem;
1051
+ margin-bottom: 0.75rem;
1052
+ color: #334155;
1053
+ }
1054
+ .markdown-content table {
1055
+ width: 100%;
1056
+ border-collapse: collapse;
1057
+ margin: 1.5rem 0;
1058
+ font-size: 0.875rem;
1059
+ box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1);
1060
+ }
1061
+ .markdown-content table caption {
1062
+ font-weight: 600;
1063
+ margin-bottom: 0.5rem;
1064
+ text-align: left;
1065
+ }
1066
+ .markdown-content table th {
1067
+ background-color: #f8fafc;
1068
+ border: 1px solid #cbd5e1;
1069
+ padding: 0.75rem;
1070
+ text-align: left;
1071
+ font-weight: 600;
1072
+ color: #0f172a;
1073
+ }
1074
+ .markdown-content table td {
1075
+ border: 1px solid #cbd5e1;
1076
+ padding: 0.75rem;
1077
+ color: #334155;
1078
+ }
1079
+ .markdown-content table tr:nth-child(even) {
1080
+ background-color: #f8fafc;
1081
+ }
1082
+ .markdown-content table tr:hover {
1083
+ background-color: #f1f5f9;
1084
+ }
1085
+ .markdown-content strong {
1086
+ font-weight: 600;
1087
+ color: #0f172a;
1088
+ }
1089
+ .markdown-content em {
1090
+ font-style: italic;
1091
+ }
1092
+ .markdown-content a {
1093
+ color: #4f46e5;
1094
+ text-decoration: underline;
1095
+ }
1096
+ .markdown-content a:hover {
1097
+ color: #4338ca;
1098
+ }
1099
+ .markdown-content sup {
1100
+ font-size: 0.75em;
1101
+ vertical-align: super;
1102
+ line-height: 0;
1103
+ position: relative;
1104
+ top: -0.5em;
1105
+ }
1106
+ .markdown-content sub {
1107
+ font-size: 0.75em;
1108
+ vertical-align: sub;
1109
+ line-height: 0;
1110
+ position: relative;
1111
+ bottom: -0.25em;
1112
+ }
1113
+ .markdown-content ul, .markdown-content ol {
1114
+ margin: 0.75rem 0;
1115
+ padding-left: 1.5rem;
1116
+ }
1117
+ .markdown-content li {
1118
+ margin: 0.25rem 0;
1119
+ }
1120
+ `}</style>
1121
+ </div>
1122
+ ) : activeTab === "json" ? (
1123
+ <div className="space-y-1">
1124
+ <span className="text-slate-400">{"{"}</span>
1125
+ {Object.keys(preparedFields).length > 0 ? (
1126
+ Object.entries(preparedFields).map(([key, value]) =>
1127
+ renderSection(key, value, 1)
1128
+ )
1129
+ ) : (
1130
+ <div className="pl-4 text-slate-400 italic">No fields extracted</div>
1131
+ )}
1132
+ <span className="text-slate-400">{"}"}</span>
1133
+ </div>
1134
+ ) : (
1135
+ <pre className="text-sm text-slate-600 whitespace-pre-wrap">
1136
+ {objectToXML(fields).split("\n").map((line, i) => (
1137
+ <div key={i} className="hover:bg-slate-50 px-2 -mx-2 rounded">
1138
+ {line.includes("<") ? (
1139
+ <>
1140
+ {line.split(/(<\/?[\w\s=".-]+>)/g).map((part, j) => {
1141
+ if (part.startsWith("</")) {
1142
+ return (
1143
+ <span key={j} className="text-rose-500">
1144
+ {part}
1145
+ </span>
1146
+ );
1147
+ }
1148
+ if (part.startsWith("<")) {
1149
+ return (
1150
+ <span key={j} className="text-indigo-500">
1151
+ {part}
1152
+ </span>
1153
+ );
1154
+ }
1155
+ return (
1156
+ <span key={j} className="text-slate-700">
1157
+ {part}
1158
+ </span>
1159
+ );
1160
+ })}
1161
+ </>
1162
+ ) : (
1163
+ line
1164
+ )}
1165
+ </div>
1166
+ ))}
1167
+ </pre>
1168
+ )}
1169
+ </div>
1170
+ )}
1171
+ </div>
1172
+
1173
+ {/* Confidence Footer */}
1174
+ {isComplete && extractionResult && (
1175
+ <div className="px-5 py-3 border-t border-slate-100 bg-slate-50/50">
1176
+ <div className="flex items-center justify-between text-xs">
1177
+ <div className="flex items-center gap-4">
1178
+ <div className="flex items-center gap-1.5">
1179
+ <div className={cn(
1180
+ "h-2 w-2 rounded-full",
1181
+ confidence >= 90 ? "bg-emerald-500" : confidence >= 70 ? "bg-amber-500" : "bg-red-500"
1182
+ )} />
1183
+ <span className="text-slate-500">Confidence:</span>
1184
+ <span className="font-semibold text-slate-700">
1185
+ {confidence > 0 ? `${confidence.toFixed(1)}%` : "N/A"}
1186
+ </span>
1187
+ </div>
1188
+ <div className="flex items-center gap-1.5">
1189
+ <span className="text-slate-500">Fields:</span>
1190
+ <span className="font-semibold text-slate-700">{fieldsExtracted}</span>
1191
+ </div>
1192
+ </div>
1193
+ <span className="text-slate-400">
1194
+ Processed in {totalTime >= 1000 ? `${(totalTime / 1000).toFixed(1)}s` : `${totalTime}ms`}
1195
+ </span>
1196
+ </div>
1197
+ </div>
1198
+ )}
1199
+ </div>
1200
+ );
1201
+ }
frontend/src/components/ocr/ProcessingStatus.jsx CHANGED
@@ -1,111 +1,118 @@
1
- import React from "react";
2
- import { motion } from "framer-motion";
3
- import {
4
- FileSearch,
5
- Cpu,
6
- TableProperties,
7
- CheckCircle2,
8
- Loader2,
9
- } from "lucide-react";
10
- import { cn } from "@/lib/utils";
11
-
12
- const steps = [
13
- { id: "upload", label: "Received", icon: FileSearch },
14
- { id: "analyze", label: "Analysis", icon: Cpu },
15
- { id: "extract", label: "Extraction", icon: TableProperties },
16
- { id: "complete", label: "Done", icon: CheckCircle2 },
17
- ];
18
-
19
- export default function ProcessingStatus({ isProcessing, isComplete }) {
20
- const getCurrentStep = () => {
21
- if (isComplete) return 4;
22
- if (isProcessing) return 2;
23
- return 0;
24
- };
25
-
26
- const currentStep = getCurrentStep();
27
-
28
- if (!isProcessing && !isComplete) return null;
29
-
30
- return (
31
- <motion.div
32
- initial={{ opacity: 0, y: -10 }}
33
- animate={{ opacity: 1, y: 0 }}
34
- className="bg-white rounded-xl border border-slate-200 px-4 py-3"
35
- >
36
- <div className="flex items-center justify-between gap-2">
37
- {steps.map((step, index) => {
38
- const isActive = index + 1 === currentStep;
39
- const isCompleted = index + 1 < currentStep || isComplete;
40
- const Icon = step.icon;
41
-
42
- return (
43
- <React.Fragment key={step.id}>
44
- <div className="flex items-center gap-2">
45
- <motion.div
46
- initial={false}
47
- animate={{
48
- scale: (isActive && !isComplete) ? 1.05 : 1,
49
- backgroundColor: isCompleted
50
- ? "rgb(16 185 129)"
51
- : (isActive && !isComplete)
52
- ? "rgb(99 102 241)"
53
- : "rgb(241 245 249)",
54
- }}
55
- className={cn(
56
- "h-8 w-8 rounded-lg flex items-center justify-center transition-colors",
57
- (isCompleted || isActive) && "shadow-md"
58
- )}
59
- style={{
60
- boxShadow: (isActive && !isComplete)
61
- ? "0 4px 8px -2px rgba(99, 102, 241, 0.3)"
62
- : isCompleted
63
- ? "0 4px 8px -2px rgba(16, 185, 129, 0.3)"
64
- : "none",
65
- }}
66
- >
67
- {(isActive && !isComplete) ? (
68
- <motion.div
69
- animate={{ rotate: 360 }}
70
- transition={{ duration: 1.5, repeat: Infinity, ease: "linear" }}
71
- >
72
- <Loader2 className="h-4 w-4 text-white" />
73
- </motion.div>
74
- ) : isCompleted ? (
75
- <CheckCircle2 className="h-4 w-4 text-white" />
76
- ) : (
77
- <Icon className={cn("h-4 w-4 text-slate-400")} />
78
- )}
79
- </motion.div>
80
- <span
81
- className={cn(
82
- "text-xs font-medium hidden sm:inline",
83
- isActive ? "text-indigo-600" : isCompleted ? "text-emerald-600" : "text-slate-400"
84
- )}
85
- >
86
- {step.label}
87
- </span>
88
- </div>
89
-
90
- {index < steps.length - 1 && (
91
- <div className="flex-1 h-0.5 mx-1 relative overflow-hidden rounded-full bg-slate-100">
92
- <motion.div
93
- initial={{ width: 0 }}
94
- animate={{
95
- width: isCompleted ? "100%" : isActive ? "50%" : "0%",
96
- }}
97
- transition={{ duration: 0.5 }}
98
- className={cn(
99
- "absolute inset-y-0 left-0",
100
- isCompleted ? "bg-emerald-500" : "bg-indigo-500"
101
- )}
102
- />
103
- </div>
104
- )}
105
- </React.Fragment>
106
- );
107
- })}
108
- </div>
109
- </motion.div>
110
- );
111
- }
 
 
 
 
 
 
 
 
1
+ import React from "react";
2
+ import { motion } from "framer-motion";
3
+ import {
4
+ FileSearch,
5
+ Cpu,
6
+ TableProperties,
7
+ CheckCircle2,
8
+ Loader2,
9
+ } from "lucide-react";
10
+ import { cn } from "@/lib/utils";
11
+
12
+ const steps = [
13
+ { id: "upload", label: "Received", icon: FileSearch },
14
+ { id: "analyze", label: "Analysis", icon: Cpu },
15
+ { id: "extract", label: "Extraction", icon: TableProperties },
16
+ { id: "complete", label: "Done", icon: CheckCircle2 },
17
+ ];
18
+
19
+ export default function ProcessingStatus({ isProcessing, isComplete, currentStage }) {
20
+ const getCurrentStep = () => {
21
+ if (isComplete) return 4; // Done
22
+ if (!isProcessing) return 0; // Not started
23
+
24
+ // Use provided currentStage or default based on isProcessing
25
+ if (currentStage === "extraction") return 3; // Extraction
26
+ if (currentStage === "analysis") return 2; // Analysis
27
+ if (currentStage === "received") return 1; // Received
28
+
29
+ // Default: if processing, start at Analysis
30
+ return 2; // Analysis
31
+ };
32
+
33
+ const currentStep = getCurrentStep();
34
+
35
+ if (!isProcessing && !isComplete) return null;
36
+
37
+ return (
38
+ <motion.div
39
+ initial={{ opacity: 0, y: -10 }}
40
+ animate={{ opacity: 1, y: 0 }}
41
+ className="bg-white rounded-xl border border-slate-200 px-4 py-3"
42
+ >
43
+ <div className="flex items-center justify-between gap-2">
44
+ {steps.map((step, index) => {
45
+ const isActive = index + 1 === currentStep;
46
+ const isCompleted = index + 1 < currentStep || isComplete;
47
+ const Icon = step.icon;
48
+
49
+ return (
50
+ <React.Fragment key={step.id}>
51
+ <div className="flex items-center gap-2">
52
+ <motion.div
53
+ initial={false}
54
+ animate={{
55
+ scale: (isActive && !isComplete) ? 1.05 : 1,
56
+ backgroundColor: isCompleted
57
+ ? "rgb(16 185 129)"
58
+ : (isActive && !isComplete)
59
+ ? "rgb(99 102 241)"
60
+ : "rgb(241 245 249)",
61
+ }}
62
+ className={cn(
63
+ "h-8 w-8 rounded-lg flex items-center justify-center transition-colors",
64
+ (isCompleted || isActive) && "shadow-md"
65
+ )}
66
+ style={{
67
+ boxShadow: (isActive && !isComplete)
68
+ ? "0 4px 8px -2px rgba(99, 102, 241, 0.3)"
69
+ : isCompleted
70
+ ? "0 4px 8px -2px rgba(16, 185, 129, 0.3)"
71
+ : "none",
72
+ }}
73
+ >
74
+ {(isActive && !isComplete) ? (
75
+ <motion.div
76
+ animate={{ rotate: 360 }}
77
+ transition={{ duration: 1.5, repeat: Infinity, ease: "linear" }}
78
+ >
79
+ <Loader2 className="h-4 w-4 text-white" />
80
+ </motion.div>
81
+ ) : isCompleted ? (
82
+ <CheckCircle2 className="h-4 w-4 text-white" />
83
+ ) : (
84
+ <Icon className={cn("h-4 w-4 text-slate-400")} />
85
+ )}
86
+ </motion.div>
87
+ <span
88
+ className={cn(
89
+ "text-xs font-medium hidden sm:inline",
90
+ isActive ? "text-indigo-600" : isCompleted ? "text-emerald-600" : "text-slate-400"
91
+ )}
92
+ >
93
+ {step.label}
94
+ </span>
95
+ </div>
96
+
97
+ {index < steps.length - 1 && (
98
+ <div className="flex-1 h-0.5 mx-1 relative overflow-hidden rounded-full bg-slate-100">
99
+ <motion.div
100
+ initial={{ width: 0 }}
101
+ animate={{
102
+ width: isCompleted ? "100%" : isActive ? "50%" : "0%",
103
+ }}
104
+ transition={{ duration: 0.5 }}
105
+ className={cn(
106
+ "absolute inset-y-0 left-0",
107
+ isCompleted ? "bg-emerald-500" : "bg-indigo-500"
108
+ )}
109
+ />
110
+ </div>
111
+ )}
112
+ </React.Fragment>
113
+ );
114
+ })}
115
+ </div>
116
+ </motion.div>
117
+ );
118
+ }
frontend/src/components/ocr/UpgradeModal.jsx ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from "react";
2
+ import { motion } from "framer-motion";
3
+ import { cn } from "@/lib/utils";
4
+ import {
5
+ X,
6
+ Sparkles,
7
+ Zap,
8
+ Shield,
9
+ Cloud,
10
+ BarChart3,
11
+ Bot,
12
+ Globe,
13
+ Lock,
14
+ Rocket,
15
+ Users,
16
+ CheckCircle2,
17
+ ArrowRight
18
+ } from "lucide-react";
19
+ import { Button } from "@/components/ui/button";
20
+
21
+ const features = [
22
+ {
23
+ icon: Zap,
24
+ title: "Production-Scale Processing",
25
+ description: "Remove trial limits and run live AP and operations workflows",
26
+ color: "amber",
27
+ cta: "Explore with a demo",
28
+ gradient: "from-amber-500 to-orange-500"
29
+ },
30
+ {
31
+ icon: Bot,
32
+ title: "Advanced Agentic Processing",
33
+ description: "You can customize your own agentic pipeline with your own data",
34
+ color: "indigo",
35
+ cta: "Talk to Sales",
36
+ gradient: "from-indigo-500 to-violet-500"
37
+ },
38
+ {
39
+ icon: Cloud,
40
+ title: "API Access",
41
+ description: "Integrate EZOFIS into your workflow with our REST API",
42
+ color: "blue",
43
+ cta: "Talk to a Techie!",
44
+ gradient: "from-blue-500 to-cyan-500"
45
+ }
46
+ ];
47
+
48
+ export default function UpgradeModal({ open, onClose }) {
49
+ if (!open) return null;
50
+
51
+ return (
52
+ <div className="fixed inset-0 z-50 flex items-center justify-center">
53
+ {/* Backdrop */}
54
+ <motion.div
55
+ initial={{ opacity: 0 }}
56
+ animate={{ opacity: 1 }}
57
+ exit={{ opacity: 0 }}
58
+ className="absolute inset-0 bg-black/50 backdrop-blur-sm"
59
+ onClick={onClose}
60
+ />
61
+
62
+ {/* Modal */}
63
+ <motion.div
64
+ initial={{ opacity: 0, scale: 0.95, y: 20 }}
65
+ animate={{ opacity: 1, scale: 1, y: 0 }}
66
+ exit={{ opacity: 0, scale: 0.95, y: 20 }}
67
+ className="relative z-10 w-full max-w-6xl max-h-[90vh] mx-4 bg-white rounded-2xl shadow-2xl overflow-hidden flex flex-col"
68
+ onClick={(e) => e.stopPropagation()}
69
+ >
70
+ {/* Header */}
71
+ <div className="sticky top-0 bg-gradient-to-r from-indigo-600 via-violet-600 to-purple-600 text-white px-8 py-6 z-10">
72
+ <button
73
+ onClick={onClose}
74
+ className="absolute right-6 top-6 h-8 w-8 rounded-lg bg-white/10 hover:bg-white/20 flex items-center justify-center transition-colors"
75
+ >
76
+ <X className="h-4 w-4" />
77
+ </button>
78
+
79
+ <motion.div
80
+ initial={{ opacity: 0, y: 20 }}
81
+ animate={{ opacity: 1, y: 0 }}
82
+ className="text-center"
83
+ >
84
+ <div className="inline-flex items-center gap-2 px-4 py-1.5 rounded-full bg-white/10 backdrop-blur-sm mb-4">
85
+ <Sparkles className="h-4 w-4" />
86
+ <span className="text-sm font-medium">Trial Limit Reached</span>
87
+ </div>
88
+ <h2 className="text-3xl font-bold mb-2">You've processed 2 documents</h2>
89
+ <p className="text-white/80 text-lg">Continue with production-ready document intelligence</p>
90
+ </motion.div>
91
+ </div>
92
+
93
+ {/* Stats Bar */}
94
+ <div className="grid grid-cols-3 gap-6 px-8 py-6 bg-slate-50 border-b border-slate-200">
95
+ {[
96
+ { label: "Accuracy Rate", value: "99.8%", icon: CheckCircle2 },
97
+ { label: "Processing Speed", value: "< 10s", icon: Zap },
98
+ { label: "Operational Users", value: "10,000+", icon: Users }
99
+ ].map((stat, i) => (
100
+ <motion.div
101
+ key={stat.label}
102
+ initial={{ opacity: 0, y: 20 }}
103
+ animate={{ opacity: 1, y: 0 }}
104
+ transition={{ delay: i * 0.1 }}
105
+ className="text-center"
106
+ >
107
+ <div className="flex items-center justify-center gap-2 mb-1">
108
+ <stat.icon className="h-4 w-4 text-indigo-600" />
109
+ <span className="text-2xl font-bold text-slate-900">{stat.value}</span>
110
+ </div>
111
+ <p className="text-sm text-slate-500">{stat.label}</p>
112
+ </motion.div>
113
+ ))}
114
+ </div>
115
+
116
+ {/* Features Grid - Scrollable */}
117
+ <div className="flex-1 overflow-auto px-8 py-8">
118
+ <div className="text-center mb-8">
119
+ <h3 className="text-2xl font-bold text-slate-900 mb-2">
120
+ Continue to Production Use
121
+ </h3>
122
+
123
+ </div>
124
+
125
+ <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
126
+ {features.map((feature, index) => (
127
+ <motion.div
128
+ key={feature.title}
129
+ initial={{ opacity: 0, y: 20 }}
130
+ animate={{ opacity: 1, y: 0 }}
131
+ transition={{ delay: 0.2 + index * 0.1 }}
132
+ className="group relative bg-white rounded-2xl border border-slate-200 p-6 hover:shadow-xl hover:shadow-slate-200/50 transition-all duration-300 hover:-translate-y-1 overflow-hidden"
133
+ >
134
+ {/* Gradient Background on Hover */}
135
+ <div className={`absolute inset-0 bg-gradient-to-br ${feature.gradient} opacity-0 group-hover:opacity-5 transition-opacity duration-300`} />
136
+
137
+ <div className="relative">
138
+ <div className={cn(
139
+ "h-12 w-12 rounded-xl flex items-center justify-center mb-4 group-hover:scale-110 transition-transform duration-300",
140
+ feature.color === "amber" && "bg-amber-50",
141
+ feature.color === "indigo" && "bg-indigo-50",
142
+ feature.color === "blue" && "bg-blue-50",
143
+ feature.color === "emerald" && "bg-emerald-50",
144
+ feature.color === "slate" && "bg-slate-50",
145
+ feature.color === "purple" && "bg-purple-50"
146
+ )}>
147
+ <feature.icon className={cn(
148
+ "h-6 w-6",
149
+ feature.color === "amber" && "text-amber-600",
150
+ feature.color === "indigo" && "text-indigo-600",
151
+ feature.color === "blue" && "text-blue-600",
152
+ feature.color === "emerald" && "text-emerald-600",
153
+ feature.color === "slate" && "text-slate-600",
154
+ feature.color === "purple" && "text-purple-600"
155
+ )} />
156
+ </div>
157
+ <h4 className="font-semibold text-slate-900 mb-2">{feature.title}</h4>
158
+ <p className="text-sm text-slate-600 mb-4 leading-relaxed">{feature.description}</p>
159
+
160
+ <Button
161
+ variant="ghost"
162
+ size="sm"
163
+ className={cn(
164
+ "w-full h-9 border transition-all group-hover:shadow-md",
165
+ feature.color === "amber" && "text-amber-600 hover:bg-amber-50 border-amber-200 hover:border-amber-300",
166
+ feature.color === "indigo" && "text-indigo-600 hover:bg-indigo-50 border-indigo-200 hover:border-indigo-300",
167
+ feature.color === "blue" && "text-blue-600 hover:bg-blue-50 border-blue-200 hover:border-blue-300",
168
+ feature.color === "emerald" && "text-emerald-600 hover:bg-emerald-50 border-emerald-200 hover:border-emerald-300",
169
+ feature.color === "slate" && "text-slate-600 hover:bg-slate-50 border-slate-200 hover:border-slate-300",
170
+ feature.color === "purple" && "text-purple-600 hover:bg-purple-50 border-purple-200 hover:border-purple-300"
171
+ )}
172
+ >
173
+ {feature.cta}
174
+ <ArrowRight className="h-3.5 w-3.5 ml-2 group-hover:translate-x-1 transition-transform" />
175
+ </Button>
176
+ </div>
177
+ </motion.div>
178
+ ))}
179
+ </div>
180
+ </div>
181
+
182
+ {/* CTA Footer */}
183
+ <div className="sticky bottom-0 bg-white border-t border-slate-200 px-8 py-6">
184
+ <div className="flex items-center justify-between gap-6">
185
+ <div className="flex-1">
186
+ <h4 className="font-semibold text-slate-900 mb-1">Ready to scale?</h4>
187
+ <p className="text-sm text-slate-600">No commitment. We’ll tailor the demo to your documents and workflows.</p>
188
+ </div>
189
+ <div className="flex items-center gap-3">
190
+ <Button
191
+ variant="outline"
192
+ size="lg"
193
+ className="h-11 border-slate-300"
194
+ >
195
+ <Users className="h-4 w-4 mr-2" />
196
+ Talk to Sales
197
+ </Button>
198
+ <Button
199
+ size="lg"
200
+ className="h-11 bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700 shadow-lg shadow-indigo-500/25 hover:shadow-xl hover:shadow-indigo-500/30"
201
+ >
202
+ <Rocket className="h-4 w-4 mr-2" />
203
+ Start a production evaluation
204
+ <Sparkles className="h-4 w-4 ml-2" />
205
+ </Button>
206
+ </div>
207
+ </div>
208
+ </div>
209
+ </motion.div>
210
+ </div>
211
+ );
212
+ }
213
+
frontend/src/components/ocr/UploadZone.jsx CHANGED
@@ -1,147 +1,251 @@
1
- import React, { useState } from "react";
2
- import { motion, AnimatePresence } from "framer-motion";
3
- import { Upload, FileText, Image, FileSpreadsheet, X, Sparkles } from "lucide-react";
4
- import { cn } from "@/lib/utils";
5
-
6
- export default function UploadZone({ onFileSelect, selectedFile, onClear }) {
7
- const [isDragging, setIsDragging] = useState(false);
8
-
9
- const handleDragOver = (e) => {
10
- e.preventDefault();
11
- setIsDragging(true);
12
- };
13
-
14
- const handleDragLeave = () => {
15
- setIsDragging(false);
16
- };
17
-
18
- const handleDrop = (e) => {
19
- e.preventDefault();
20
- setIsDragging(false);
21
- const file = e.dataTransfer.files[0];
22
- if (file) onFileSelect(file);
23
- };
24
-
25
- const getFileIcon = (type) => {
26
- if (type?.includes("image")) return Image;
27
- if (type?.includes("spreadsheet") || type?.includes("excel")) return FileSpreadsheet;
28
- return FileText;
29
- };
30
-
31
- const FileIcon = selectedFile ? getFileIcon(selectedFile.type) : FileText;
32
-
33
- return (
34
- <div className="w-full">
35
- <AnimatePresence mode="wait">
36
- {!selectedFile ? (
37
- <motion.div
38
- key="upload"
39
- initial={{ opacity: 0, y: 10 }}
40
- animate={{ opacity: 1, y: 0 }}
41
- exit={{ opacity: 0, y: -10 }}
42
- transition={{ duration: 0.2 }}
43
- onDragOver={handleDragOver}
44
- onDragLeave={handleDragLeave}
45
- onDrop={handleDrop}
46
- className={cn(
47
- "relative group cursor-pointer",
48
- "border-2 border-dashed rounded-2xl",
49
- "transition-all duration-300 ease-out",
50
- isDragging
51
- ? "border-indigo-400 bg-indigo-50/50"
52
- : "border-slate-200 hover:border-indigo-300 hover:bg-slate-50/50"
53
- )}
54
- >
55
- <label className="flex flex-col items-center justify-center py-16 px-8 cursor-pointer">
56
- <motion.div
57
- animate={isDragging ? { scale: 1.1, y: -5 } : { scale: 1, y: 0 }}
58
- className={cn(
59
- "h-16 w-16 rounded-2xl flex items-center justify-center mb-6 transition-colors duration-300",
60
- isDragging
61
- ? "bg-indigo-100"
62
- : "bg-gradient-to-br from-slate-100 to-slate-50 group-hover:from-indigo-100 group-hover:to-violet-50"
63
- )}
64
- >
65
- <Upload
66
- className={cn(
67
- "h-7 w-7 transition-colors duration-300",
68
- isDragging ? "text-indigo-600" : "text-slate-400 group-hover:text-indigo-500"
69
- )}
70
- />
71
- </motion.div>
72
-
73
- <div className="text-center">
74
- <p className="text-lg font-semibold text-slate-700 mb-1">
75
- {isDragging ? "Drop your file here" : "Drop your file here, or browse"}
76
- </p>
77
- <p className="text-sm text-slate-400">
78
- Supports PDF, PNG, JPG, TIFF, DOCX up to 50MB
79
- </p>
80
- </div>
81
-
82
- <div className="flex items-center gap-2 mt-6">
83
- <div className="flex -space-x-1">
84
- {[
85
- "bg-red-100 text-red-600",
86
- "bg-blue-100 text-blue-600",
87
- "bg-green-100 text-green-600",
88
- "bg-amber-100 text-amber-600",
89
- ].map((color, i) => (
90
- <div
91
- key={i}
92
- className={`h-8 w-8 rounded-lg ${color.split(" ")[0]} flex items-center justify-center border-2 border-white`}
93
- >
94
- <FileText className={`h-4 w-4 ${color.split(" ")[1]}`} />
95
- </div>
96
- ))}
97
- </div>
98
- <span className="text-xs text-slate-400 ml-2">Multiple formats supported</span>
99
- </div>
100
-
101
- <input
102
- type="file"
103
- className="hidden"
104
- accept=".pdf,.png,.jpg,.jpeg,.tiff,.docx,.xlsx"
105
- onChange={(e) => e.target.files[0] && onFileSelect(e.target.files[0])}
106
- />
107
- </label>
108
-
109
- {/* Decorative gradient border on hover */}
110
- <div className="absolute inset-0 -z-10 rounded-2xl bg-gradient-to-r from-indigo-500 via-violet-500 to-purple-500 opacity-0 group-hover:opacity-10 blur-xl transition-opacity duration-500" />
111
- </motion.div>
112
- ) : (
113
- <motion.div
114
- key="selected"
115
- initial={{ opacity: 0, scale: 0.95 }}
116
- animate={{ opacity: 1, scale: 1 }}
117
- exit={{ opacity: 0, scale: 0.95 }}
118
- className="relative bg-gradient-to-br from-indigo-50 to-violet-50 rounded-xl p-3 border border-indigo-100"
119
- >
120
- <div className="flex items-center gap-3">
121
- <div className="h-10 w-10 rounded-lg bg-white shadow-sm flex items-center justify-center flex-shrink-0">
122
- <FileIcon className="h-5 w-5 text-indigo-600" />
123
- </div>
124
- <div className="flex-1 min-w-0">
125
- <p className="font-medium text-slate-800 truncate text-sm">{selectedFile.name}</p>
126
- <div className="flex items-center gap-2 text-xs text-slate-500">
127
- <span>{(selectedFile.size / 1024 / 1024).toFixed(2)} MB</span>
128
- <span className="text-indigo-500">•</span>
129
- <span className="text-indigo-600 flex items-center gap-1">
130
- <Sparkles className="h-3 w-3" />
131
- Ready for extraction
132
- </span>
133
- </div>
134
- </div>
135
- <button
136
- onClick={onClear}
137
- className="h-8 w-8 rounded-lg bg-white hover:bg-red-50 border border-slate-200 hover:border-red-200 flex items-center justify-center text-slate-400 hover:text-red-500 transition-colors"
138
- >
139
- <X className="h-4 w-4" />
140
- </button>
141
- </div>
142
- </motion.div>
143
- )}
144
- </AnimatePresence>
145
- </div>
146
- );
147
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect } from "react";
2
+ import { motion, AnimatePresence } from "framer-motion";
3
+ import { Upload, FileText, Image, FileSpreadsheet, X, Sparkles, AlertCircle } from "lucide-react";
4
+ import { cn } from "@/lib/utils";
5
+ import { Input } from "@/components/ui/input";
6
+
7
+ // Allowed file types
8
+ const ALLOWED_TYPES = [
9
+ "application/pdf",
10
+ "image/png",
11
+ "image/jpeg",
12
+ "image/jpg",
13
+ "image/tiff",
14
+ "image/tif"
15
+ ];
16
+
17
+ // Allowed file extensions (for fallback validation)
18
+ const ALLOWED_EXTENSIONS = [".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"];
19
+
20
+ // Maximum file size: 4 MB
21
+ const MAX_FILE_SIZE = 4 * 1024 * 1024; // 4 MB in bytes
22
+
23
+ export default function UploadZone({ onFileSelect, selectedFile, onClear, keyFields = "", onKeyFieldsChange = () => {} }) {
24
+ const [isDragging, setIsDragging] = useState(false);
25
+ const [error, setError] = useState(null);
26
+
27
+ const validateFile = (file) => {
28
+ // Reset error
29
+ setError(null);
30
+
31
+ // Check file type
32
+ const fileExtension = "." + file.name.split(".").pop().toLowerCase();
33
+ const isValidType = ALLOWED_TYPES.includes(file.type) || ALLOWED_EXTENSIONS.includes(fileExtension);
34
+
35
+ if (!isValidType) {
36
+ setError("Only PDF, PNG, JPG, and TIFF files are allowed.");
37
+ return false;
38
+ }
39
+
40
+ // Check file size
41
+ if (file.size > MAX_FILE_SIZE) {
42
+ const fileSizeMB = (file.size / 1024 / 1024).toFixed(2);
43
+ setError(`File size exceeds 4 MB limit. Your file is ${fileSizeMB} MB.`);
44
+ return false;
45
+ }
46
+
47
+ return true;
48
+ };
49
+
50
+ const handleFileSelect = (file) => {
51
+ if (validateFile(file)) {
52
+ setError(null);
53
+ onFileSelect(file);
54
+ }
55
+ };
56
+
57
+ const handleDragOver = (e) => {
58
+ e.preventDefault();
59
+ setIsDragging(true);
60
+ };
61
+
62
+ const handleDragLeave = () => {
63
+ setIsDragging(false);
64
+ };
65
+
66
+ const handleDrop = (e) => {
67
+ e.preventDefault();
68
+ setIsDragging(false);
69
+ const file = e.dataTransfer.files[0];
70
+ if (file) {
71
+ handleFileSelect(file);
72
+ }
73
+ };
74
+
75
+ const getFileIcon = (type) => {
76
+ if (type?.includes("image")) return Image;
77
+ if (type?.includes("spreadsheet") || type?.includes("excel")) return FileSpreadsheet;
78
+ return FileText;
79
+ };
80
+
81
+ const FileIcon = selectedFile ? getFileIcon(selectedFile.type) : FileText;
82
+
83
+ // Clear error when file is cleared
84
+ useEffect(() => {
85
+ if (!selectedFile) {
86
+ setError(null);
87
+ }
88
+ }, [selectedFile]);
89
+
90
+ return (
91
+ <div className="w-full">
92
+ <AnimatePresence mode="wait">
93
+ {!selectedFile ? (
94
+ <motion.div
95
+ key="upload"
96
+ initial={{ opacity: 0, y: 10 }}
97
+ animate={{ opacity: 1, y: 0 }}
98
+ exit={{ opacity: 0, y: -10 }}
99
+ transition={{ duration: 0.2 }}
100
+ onDragOver={handleDragOver}
101
+ onDragLeave={handleDragLeave}
102
+ onDrop={handleDrop}
103
+ className={cn(
104
+ "relative group cursor-pointer",
105
+ "border-2 border-dashed rounded-2xl",
106
+ "transition-all duration-300 ease-out",
107
+ isDragging
108
+ ? "border-indigo-400 bg-indigo-50/50"
109
+ : "border-slate-200 hover:border-indigo-300 hover:bg-slate-50/50"
110
+ )}
111
+ >
112
+ <label className="flex flex-col items-center justify-center py-16 px-8 cursor-pointer">
113
+ <motion.div
114
+ animate={isDragging ? { scale: 1.1, y: -5 } : { scale: 1, y: 0 }}
115
+ className={cn(
116
+ "h-16 w-16 rounded-2xl flex items-center justify-center mb-6 transition-colors duration-300",
117
+ isDragging
118
+ ? "bg-indigo-100"
119
+ : "bg-gradient-to-br from-slate-100 to-slate-50 group-hover:from-indigo-100 group-hover:to-violet-50"
120
+ )}
121
+ >
122
+ <Upload
123
+ className={cn(
124
+ "h-7 w-7 transition-colors duration-300",
125
+ isDragging ? "text-indigo-600" : "text-slate-400 group-hover:text-indigo-500"
126
+ )}
127
+ />
128
+ </motion.div>
129
+
130
+ <div className="text-center">
131
+ <p className="text-lg font-semibold text-slate-700 mb-1">
132
+ {isDragging ? "Drop your file here" : "Drop your file here, or browse"}
133
+ </p>
134
+ <p className="text-sm text-slate-400">
135
+ Supports PDF, PNG, JPG, TIFF up to 4MB
136
+ </p>
137
+ </div>
138
+
139
+ <div className="flex items-center gap-2 mt-6">
140
+ <div className="flex -space-x-1">
141
+ {[
142
+ "bg-red-100 text-red-600",
143
+ "bg-blue-100 text-blue-600",
144
+ "bg-green-100 text-green-600",
145
+ "bg-amber-100 text-amber-600",
146
+ ].map((color, i) => (
147
+ <div
148
+ key={i}
149
+ className={`h-8 w-8 rounded-lg ${color.split(" ")[0]} flex items-center justify-center border-2 border-white`}
150
+ >
151
+ <FileText className={`h-4 w-4 ${color.split(" ")[1]}`} />
152
+ </div>
153
+ ))}
154
+ </div>
155
+ <span className="text-xs text-slate-400 ml-2">Multiple formats supported</span>
156
+ </div>
157
+
158
+ <input
159
+ type="file"
160
+ className="hidden"
161
+ accept=".pdf,.png,.jpg,.jpeg,.tiff,.tif"
162
+ onChange={(e) => {
163
+ const file = e.target.files[0];
164
+ if (file) {
165
+ handleFileSelect(file);
166
+ }
167
+ // Reset input so same file can be selected again after error
168
+ e.target.value = "";
169
+ }}
170
+ />
171
+ </label>
172
+
173
+ {/* Decorative gradient border on hover */}
174
+ <div className="absolute inset-0 -z-10 rounded-2xl bg-gradient-to-r from-indigo-500 via-violet-500 to-purple-500 opacity-0 group-hover:opacity-10 blur-xl transition-opacity duration-500" />
175
+ </motion.div>
176
+ ) : (
177
+ <motion.div
178
+ key="selected"
179
+ initial={{ opacity: 0, scale: 0.95 }}
180
+ animate={{ opacity: 1, scale: 1 }}
181
+ exit={{ opacity: 0, scale: 0.95 }}
182
+ className="grid grid-cols-1 lg:grid-cols-2 gap-3"
183
+ >
184
+ {/* File Info Box */}
185
+ <div className="relative bg-gradient-to-br from-indigo-50 to-violet-50 rounded-xl p-3 border border-indigo-100">
186
+ <div className="flex items-center gap-3">
187
+ <div className="h-10 w-10 rounded-lg bg-white shadow-sm flex items-center justify-center flex-shrink-0">
188
+ <FileIcon className="h-5 w-5 text-indigo-600" />
189
+ </div>
190
+ <div className="flex-1 min-w-0">
191
+ <p className="font-medium text-slate-800 truncate text-sm">{selectedFile.name}</p>
192
+ <div className="flex items-center gap-2 text-xs text-slate-500">
193
+ <span>{(selectedFile.size / 1024 / 1024).toFixed(2)} MB</span>
194
+ <span className="text-indigo-500">•</span>
195
+ <span className="text-indigo-600 flex items-center gap-1">
196
+ <Sparkles className="h-3 w-3" />
197
+ Ready for extraction
198
+ </span>
199
+ </div>
200
+ </div>
201
+ <button
202
+ onClick={onClear}
203
+ className="h-8 w-8 rounded-lg bg-white hover:bg-red-50 border border-slate-200 hover:border-red-200 flex items-center justify-center text-slate-400 hover:text-red-500 transition-colors"
204
+ >
205
+ <X className="h-4 w-4" />
206
+ </button>
207
+ </div>
208
+ </div>
209
+
210
+ {/* Key Fields Box */}
211
+ <div className="relative bg-white rounded-xl p-3 border border-slate-200">
212
+ <label className="block text-xs font-medium text-slate-600 mb-1.5">
213
+ <span className="font-bold">Key Fields</span> <span className="font-normal">(if required)</span>
214
+ </label>
215
+ <Input
216
+ type="text"
217
+ value={keyFields || ""}
218
+ onChange={(e) => {
219
+ if (onKeyFieldsChange) {
220
+ onKeyFieldsChange(e.target.value);
221
+ }
222
+ }}
223
+ placeholder="Invoice Number, Invoice Date, PO Number, Supplier Name, Total Amount, Payment terms, Additional Notes"
224
+ className="h-8 text-xs border-slate-200 focus:border-indigo-300 focus:ring-indigo-200"
225
+ />
226
+ </div>
227
+ </motion.div>
228
+ )}
229
+ </AnimatePresence>
230
+
231
+ {/* Error Message */}
232
+ {error && (
233
+ <motion.div
234
+ initial={{ opacity: 0, y: -10 }}
235
+ animate={{ opacity: 1, y: 0 }}
236
+ exit={{ opacity: 0, y: -10 }}
237
+ className="mt-3 p-3 bg-red-50 border border-red-200 rounded-xl flex items-start gap-2"
238
+ >
239
+ <AlertCircle className="h-4 w-4 text-red-600 flex-shrink-0 mt-0.5" />
240
+ <p className="text-sm text-red-700 flex-1">{error}</p>
241
+ <button
242
+ onClick={() => setError(null)}
243
+ className="text-red-600 hover:text-red-800 transition-colors"
244
+ >
245
+ <X className="h-4 w-4" />
246
+ </button>
247
+ </motion.div>
248
+ )}
249
+ </div>
250
+ );
251
+ }
frontend/src/components/ui/badge.jsx CHANGED
@@ -1,24 +1,24 @@
1
- import React from "react";
2
- import { cn } from "@/lib/utils";
3
-
4
- const variants = {
5
- default:
6
- "bg-slate-900 text-white hover:bg-slate-900/90",
7
- secondary:
8
- "bg-slate-100 text-slate-800 border border-slate-200",
9
- outline:
10
- "border border-slate-200 text-slate-700",
11
- };
12
-
13
- export function Badge({ className, variant = "default", ...props }) {
14
- return (
15
- <span
16
- className={cn(
17
- "inline-flex items-center rounded-full px-2.5 py-0.5 text-xs font-medium",
18
- variants[variant] || variants.default,
19
- className
20
- )}
21
- {...props}
22
- />
23
- );
24
- }
 
1
+ import React from "react";
2
+ import { cn } from "@/lib/utils";
3
+
4
+ const variants = {
5
+ default:
6
+ "bg-slate-900 text-white hover:bg-slate-900/90",
7
+ secondary:
8
+ "bg-slate-100 text-slate-800 border border-slate-200",
9
+ outline:
10
+ "border border-slate-200 text-slate-700",
11
+ };
12
+
13
+ export function Badge({ className, variant = "default", ...props }) {
14
+ return (
15
+ <span
16
+ className={cn(
17
+ "inline-flex items-center rounded-full px-2.5 py-0.5 text-xs font-medium",
18
+ variants[variant] || variants.default,
19
+ className
20
+ )}
21
+ {...props}
22
+ />
23
+ );
24
+ }
frontend/src/components/ui/button.jsx CHANGED
@@ -1,38 +1,38 @@
1
- import React from "react";
2
- import { cn } from "@/lib/utils";
3
-
4
- const base =
5
- "inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500 focus-visible:ring-offset-2 disabled:opacity-50 disabled:pointer-events-none";
6
-
7
- const variants = {
8
- default: "bg-indigo-600 text-white hover:bg-indigo-700 shadow-sm",
9
- outline:
10
- "border border-slate-200 bg-white text-slate-900 hover:bg-slate-50",
11
- ghost: "bg-transparent text-slate-700 hover:bg-slate-100",
12
- };
13
-
14
- const sizes = {
15
- default: "h-10 px-4 py-2",
16
- sm: "h-8 px-3 text-xs",
17
- lg: "h-11 px-6 text-sm",
18
- icon: "h-9 w-9",
19
- };
20
-
21
- export function Button({
22
- className,
23
- variant = "default",
24
- size = "default",
25
- ...props
26
- }) {
27
- return (
28
- <button
29
- className={cn(
30
- base,
31
- variants[variant] || variants.default,
32
- sizes[size] || sizes.default,
33
- className
34
- )}
35
- {...props}
36
- />
37
- );
38
- }
 
1
+ import React from "react";
2
+ import { cn } from "@/lib/utils";
3
+
4
+ const base =
5
+ "inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500 focus-visible:ring-offset-2 disabled:opacity-50 disabled:pointer-events-none";
6
+
7
+ const variants = {
8
+ default: "bg-indigo-600 text-white hover:bg-indigo-700 shadow-sm",
9
+ outline:
10
+ "border border-slate-200 bg-white text-slate-900 hover:bg-slate-50",
11
+ ghost: "bg-transparent text-slate-700 hover:bg-slate-100",
12
+ };
13
+
14
+ const sizes = {
15
+ default: "h-10 px-4 py-2",
16
+ sm: "h-8 px-3 text-xs",
17
+ lg: "h-11 px-6 text-sm",
18
+ icon: "h-9 w-9",
19
+ };
20
+
21
+ export function Button({
22
+ className,
23
+ variant = "default",
24
+ size = "default",
25
+ ...props
26
+ }) {
27
+ return (
28
+ <button
29
+ className={cn(
30
+ base,
31
+ variants[variant] || variants.default,
32
+ sizes[size] || sizes.default,
33
+ className
34
+ )}
35
+ {...props}
36
+ />
37
+ );
38
+ }
frontend/src/components/ui/dropdown-menu.jsx CHANGED
@@ -1,113 +1,113 @@
1
- import React, {
2
- createContext,
3
- useContext,
4
- useState,
5
- useRef,
6
- useEffect,
7
- } from "react";
8
- import { cn } from "@/lib/utils";
9
-
10
- const DropdownContext = createContext(null);
11
-
12
- export function DropdownMenu({ children }) {
13
- const [open, setOpen] = useState(false);
14
- const triggerRef = useRef(null);
15
-
16
- // Close on outside click
17
- useEffect(() => {
18
- if (!open) return;
19
- function handleClick(e) {
20
- if (!triggerRef.current) return;
21
- if (!triggerRef.current.parentElement.contains(e.target)) {
22
- setOpen(false);
23
- }
24
- }
25
- document.addEventListener("mousedown", handleClick);
26
- return () => document.removeEventListener("mousedown", handleClick);
27
- }, [open]);
28
-
29
- return (
30
- <DropdownContext.Provider value={{ open, setOpen, triggerRef }}>
31
- <div className="relative inline-block">{children}</div>
32
- </DropdownContext.Provider>
33
- );
34
- }
35
-
36
- export function DropdownMenuTrigger({ asChild, children }) {
37
- const { setOpen, triggerRef } = useContext(DropdownContext);
38
-
39
- const handleClick = (e) => {
40
- e.stopPropagation();
41
- setOpen((o) => !o);
42
- };
43
-
44
- if (asChild && React.isValidElement(children)) {
45
- return React.cloneElement(children, {
46
- ref: triggerRef,
47
- onClick: (e) => {
48
- children.props.onClick?.(e);
49
- handleClick(e);
50
- },
51
- });
52
- }
53
-
54
- return (
55
- <button
56
- ref={triggerRef}
57
- type="button"
58
- onClick={handleClick}
59
- className="inline-flex"
60
- >
61
- {children}
62
- </button>
63
- );
64
- }
65
-
66
- export function DropdownMenuContent({ className, align = "end", ...props }) {
67
- const { open } = useContext(DropdownContext);
68
- if (!open) return null;
69
-
70
- const alignment =
71
- align === "end"
72
- ? "right-0 origin-top-right"
73
- : align === "start"
74
- ? "left-0 origin-top-left"
75
- : "left-1/2 -translate-x-1/2 origin-top";
76
-
77
- return (
78
- <div
79
- className={cn(
80
- "absolute z-50 mt-2 min-w-[8rem] rounded-md border border-slate-200 bg-white shadow-lg focus:outline-none",
81
- alignment,
82
- className
83
- )}
84
- {...props}
85
- />
86
- );
87
- }
88
-
89
- export function DropdownMenuItem({ className, onClick, ...props }) {
90
- const { setOpen } = useContext(DropdownContext);
91
- const handleClick = (e) => {
92
- onClick?.(e);
93
- setOpen(false);
94
- };
95
- return (
96
- <div
97
- className={cn(
98
- "flex cursor-pointer select-none items-center px-2 py-1.5 text-sm text-slate-700 hover:bg-slate-100 rounded-md",
99
- className
100
- )}
101
- onClick={handleClick}
102
- {...props}
103
- />
104
- );
105
- }
106
-
107
- export function DropdownMenuSeparator({ className }) {
108
- return (
109
- <div
110
- className={cn("my-1 h-px bg-slate-200 w-full", className)}
111
- />
112
- );
113
- }
 
1
+ import React, {
2
+ createContext,
3
+ useContext,
4
+ useState,
5
+ useRef,
6
+ useEffect,
7
+ } from "react";
8
+ import { cn } from "@/lib/utils";
9
+
10
+ const DropdownContext = createContext(null);
11
+
12
+ export function DropdownMenu({ children }) {
13
+ const [open, setOpen] = useState(false);
14
+ const triggerRef = useRef(null);
15
+
16
+ // Close on outside click
17
+ useEffect(() => {
18
+ if (!open) return;
19
+ function handleClick(e) {
20
+ if (!triggerRef.current) return;
21
+ if (!triggerRef.current.parentElement.contains(e.target)) {
22
+ setOpen(false);
23
+ }
24
+ }
25
+ document.addEventListener("mousedown", handleClick);
26
+ return () => document.removeEventListener("mousedown", handleClick);
27
+ }, [open]);
28
+
29
+ return (
30
+ <DropdownContext.Provider value={{ open, setOpen, triggerRef }}>
31
+ <div className="relative inline-block">{children}</div>
32
+ </DropdownContext.Provider>
33
+ );
34
+ }
35
+
36
+ export function DropdownMenuTrigger({ asChild, children }) {
37
+ const { setOpen, triggerRef } = useContext(DropdownContext);
38
+
39
+ const handleClick = (e) => {
40
+ e.stopPropagation();
41
+ setOpen((o) => !o);
42
+ };
43
+
44
+ if (asChild && React.isValidElement(children)) {
45
+ return React.cloneElement(children, {
46
+ ref: triggerRef,
47
+ onClick: (e) => {
48
+ children.props.onClick?.(e);
49
+ handleClick(e);
50
+ },
51
+ });
52
+ }
53
+
54
+ return (
55
+ <button
56
+ ref={triggerRef}
57
+ type="button"
58
+ onClick={handleClick}
59
+ className="inline-flex"
60
+ >
61
+ {children}
62
+ </button>
63
+ );
64
+ }
65
+
66
+ export function DropdownMenuContent({ className, align = "end", ...props }) {
67
+ const { open } = useContext(DropdownContext);
68
+ if (!open) return null;
69
+
70
+ const alignment =
71
+ align === "end"
72
+ ? "right-0 origin-top-right"
73
+ : align === "start"
74
+ ? "left-0 origin-top-left"
75
+ : "left-1/2 -translate-x-1/2 origin-top";
76
+
77
+ return (
78
+ <div
79
+ className={cn(
80
+ "absolute z-50 mt-2 min-w-[8rem] rounded-md border border-slate-200 bg-white shadow-lg focus:outline-none",
81
+ alignment,
82
+ className
83
+ )}
84
+ {...props}
85
+ />
86
+ );
87
+ }
88
+
89
+ export function DropdownMenuItem({ className, onClick, ...props }) {
90
+ const { setOpen } = useContext(DropdownContext);
91
+ const handleClick = (e) => {
92
+ onClick?.(e);
93
+ setOpen(false);
94
+ };
95
+ return (
96
+ <div
97
+ className={cn(
98
+ "flex cursor-pointer select-none items-center px-2 py-1.5 text-sm text-slate-700 hover:bg-slate-100 rounded-md",
99
+ className
100
+ )}
101
+ onClick={handleClick}
102
+ {...props}
103
+ />
104
+ );
105
+ }
106
+
107
+ export function DropdownMenuSeparator({ className }) {
108
+ return (
109
+ <div
110
+ className={cn("my-1 h-px bg-slate-200 w-full", className)}
111
+ />
112
+ );
113
+ }
frontend/src/components/ui/input.jsx CHANGED
@@ -1,14 +1,14 @@
1
- import React from "react";
2
- import { cn } from "@/lib/utils";
3
-
4
- export function Input({ className, ...props }) {
5
- return (
6
- <input
7
- className={cn(
8
- "flex h-10 w-full rounded-md border border-slate-200 bg-white px-3 py-2 text-sm text-slate-900 shadow-sm placeholder:text-slate-400 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500 focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
9
- className
10
- )}
11
- {...props}
12
- />
13
- );
14
- }
 
1
+ import React from "react";
2
+ import { cn } from "@/lib/utils";
3
+
4
+ export function Input({ className, ...props }) {
5
+ return (
6
+ <input
7
+ className={cn(
8
+ "flex h-10 w-full rounded-md border border-slate-200 bg-white px-3 py-2 text-sm text-slate-900 shadow-sm placeholder:text-slate-400 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500 focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
9
+ className
10
+ )}
11
+ {...props}
12
+ />
13
+ );
14
+ }
frontend/src/components/ui/select.jsx CHANGED
@@ -1,116 +1,116 @@
1
- import React, {
2
- createContext,
3
- useContext,
4
- useState,
5
- useRef,
6
- useEffect,
7
- } from "react";
8
- import { cn } from "@/lib/utils";
9
-
10
- const SelectContext = createContext(null);
11
-
12
- export function Select({ value, onValueChange, children }) {
13
- const [open, setOpen] = useState(false);
14
- const [items, setItems] = useState({});
15
- const triggerRef = useRef(null);
16
-
17
- // Close on outside click
18
- useEffect(() => {
19
- if (!open) return;
20
- function handleClick(e) {
21
- if (!triggerRef.current) return;
22
- if (!triggerRef.current.parentElement.contains(e.target)) {
23
- setOpen(false);
24
- }
25
- }
26
- document.addEventListener("mousedown", handleClick);
27
- return () => document.removeEventListener("mousedown", handleClick);
28
- }, [open]);
29
-
30
- const registerItem = (val, label) => {
31
- setItems((prev) => ({ ...prev, [val]: label }));
32
- };
33
-
34
- return (
35
- <SelectContext.Provider
36
- value={{
37
- value,
38
- onValueChange,
39
- open,
40
- setOpen,
41
- items,
42
- registerItem,
43
- triggerRef,
44
- }}
45
- >
46
- <div className="relative inline-block">{children}</div>
47
- </SelectContext.Provider>
48
- );
49
- }
50
-
51
- export function SelectTrigger({ className, children }) {
52
- const { setOpen, triggerRef } = useContext(SelectContext);
53
- return (
54
- <button
55
- type="button"
56
- ref={triggerRef}
57
- onClick={() => setOpen((o) => !o)}
58
- className={cn(
59
- "flex items-center justify-between rounded-md border bg-white px-3 py-2 text-sm text-slate-700 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500",
60
- className
61
- )}
62
- >
63
- {children}
64
- </button>
65
- );
66
- }
67
-
68
- export function SelectValue({ placeholder }) {
69
- const { value, items } = useContext(SelectContext);
70
- const label = value ? items[value] : null;
71
- return (
72
- <span className={cn("truncate text-sm", !label && "text-slate-400")}>
73
- {label || placeholder}
74
- </span>
75
- );
76
- }
77
-
78
- export function SelectContent({ className, children }) {
79
- const { open } = useContext(SelectContext);
80
- if (!open) return null;
81
- return (
82
- <div
83
- className={cn(
84
- "absolute z-50 mt-2 min-w-[8rem] rounded-md border border-slate-200 bg-white shadow-lg",
85
- className
86
- )}
87
- >
88
- {children}
89
- </div>
90
- );
91
- }
92
-
93
- export function SelectItem({ value, children, className }) {
94
- const { onValueChange, setOpen, registerItem } = useContext(SelectContext);
95
-
96
- useEffect(() => {
97
- registerItem(value, typeof children === "string" ? children : String(children));
98
- }, [value, children, registerItem]);
99
-
100
- const handleClick = () => {
101
- onValueChange?.(value);
102
- setOpen(false);
103
- };
104
-
105
- return (
106
- <div
107
- onClick={handleClick}
108
- className={cn(
109
- "cursor-pointer select-none px-3 py-1.5 text-sm text-slate-700 hover:bg-slate-100",
110
- className
111
- )}
112
- >
113
- {children}
114
- </div>
115
- );
116
- }
 
1
+ import React, {
2
+ createContext,
3
+ useContext,
4
+ useState,
5
+ useRef,
6
+ useEffect,
7
+ } from "react";
8
+ import { cn } from "@/lib/utils";
9
+
10
+ const SelectContext = createContext(null);
11
+
12
+ export function Select({ value, onValueChange, children }) {
13
+ const [open, setOpen] = useState(false);
14
+ const [items, setItems] = useState({});
15
+ const triggerRef = useRef(null);
16
+
17
+ // Close on outside click
18
+ useEffect(() => {
19
+ if (!open) return;
20
+ function handleClick(e) {
21
+ if (!triggerRef.current) return;
22
+ if (!triggerRef.current.parentElement.contains(e.target)) {
23
+ setOpen(false);
24
+ }
25
+ }
26
+ document.addEventListener("mousedown", handleClick);
27
+ return () => document.removeEventListener("mousedown", handleClick);
28
+ }, [open]);
29
+
30
+ const registerItem = (val, label) => {
31
+ setItems((prev) => ({ ...prev, [val]: label }));
32
+ };
33
+
34
+ return (
35
+ <SelectContext.Provider
36
+ value={{
37
+ value,
38
+ onValueChange,
39
+ open,
40
+ setOpen,
41
+ items,
42
+ registerItem,
43
+ triggerRef,
44
+ }}
45
+ >
46
+ <div className="relative inline-block">{children}</div>
47
+ </SelectContext.Provider>
48
+ );
49
+ }
50
+
51
+ export function SelectTrigger({ className, children }) {
52
+ const { setOpen, triggerRef } = useContext(SelectContext);
53
+ return (
54
+ <button
55
+ type="button"
56
+ ref={triggerRef}
57
+ onClick={() => setOpen((o) => !o)}
58
+ className={cn(
59
+ "flex items-center justify-between rounded-md border bg-white px-3 py-2 text-sm text-slate-700 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500",
60
+ className
61
+ )}
62
+ >
63
+ {children}
64
+ </button>
65
+ );
66
+ }
67
+
68
+ export function SelectValue({ placeholder }) {
69
+ const { value, items } = useContext(SelectContext);
70
+ const label = value ? items[value] : null;
71
+ return (
72
+ <span className={cn("truncate text-sm", !label && "text-slate-400")}>
73
+ {label || placeholder}
74
+ </span>
75
+ );
76
+ }
77
+
78
+ export function SelectContent({ className, children }) {
79
+ const { open } = useContext(SelectContext);
80
+ if (!open) return null;
81
+ return (
82
+ <div
83
+ className={cn(
84
+ "absolute z-50 mt-2 min-w-[8rem] rounded-md border border-slate-200 bg-white shadow-lg",
85
+ className
86
+ )}
87
+ >
88
+ {children}
89
+ </div>
90
+ );
91
+ }
92
+
93
+ export function SelectItem({ value, children, className }) {
94
+ const { onValueChange, setOpen, registerItem } = useContext(SelectContext);
95
+
96
+ useEffect(() => {
97
+ registerItem(value, typeof children === "string" ? children : String(children));
98
+ }, [value, children, registerItem]);
99
+
100
+ const handleClick = () => {
101
+ onValueChange?.(value);
102
+ setOpen(false);
103
+ };
104
+
105
+ return (
106
+ <div
107
+ onClick={handleClick}
108
+ className={cn(
109
+ "cursor-pointer select-none px-3 py-1.5 text-sm text-slate-700 hover:bg-slate-100",
110
+ className
111
+ )}
112
+ >
113
+ {children}
114
+ </div>
115
+ );
116
+ }
frontend/src/components/ui/separator.jsx ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from "react";
2
+ import { cn } from "@/lib/utils";
3
+
4
+ export function Separator({ className, orientation = "horizontal", ...props }) {
5
+ return (
6
+ <div
7
+ className={cn(
8
+ "shrink-0 bg-slate-200",
9
+ orientation === "horizontal" ? "h-px w-full" : "h-full w-px",
10
+ className
11
+ )}
12
+ {...props}
13
+ />
14
+ );
15
+ }
16
+
frontend/src/components/ui/tabs.jsx CHANGED
@@ -1,45 +1,45 @@
1
- import React, { createContext, useContext } from "react";
2
- import { cn } from "@/lib/utils";
3
-
4
- const TabsContext = createContext(null);
5
-
6
- export function Tabs({ value, onValueChange, children, className }) {
7
- return (
8
- <TabsContext.Provider value={{ value, onValueChange }}>
9
- <div className={className}>{children}</div>
10
- </TabsContext.Provider>
11
- );
12
- }
13
-
14
- export function TabsList({ className, ...props }) {
15
- return (
16
- <div
17
- className={cn(
18
- "inline-flex items-center justify-center rounded-lg bg-slate-100 p-0.5",
19
- className
20
- )}
21
- {...props}
22
- />
23
- );
24
- }
25
-
26
- export function TabsTrigger({ value, className, children, ...props }) {
27
- const ctx = useContext(TabsContext);
28
- const selected = ctx?.value === value;
29
-
30
- return (
31
- <button
32
- type="button"
33
- onClick={() => ctx?.onValueChange && ctx.onValueChange(value)}
34
- data-state={selected ? "active" : "inactive"}
35
- className={cn(
36
- "inline-flex items-center justify-center rounded-md px-3 py-1.5 text-xs font-medium text-slate-600 transition-colors",
37
- "data-[state=active]:bg-white data-[state=active]:text-slate-900",
38
- className
39
- )}
40
- {...props}
41
- >
42
- {children}
43
- </button>
44
- );
45
- }
 
1
+ import React, { createContext, useContext } from "react";
2
+ import { cn } from "@/lib/utils";
3
+
4
+ const TabsContext = createContext(null);
5
+
6
+ export function Tabs({ value, onValueChange, children, className }) {
7
+ return (
8
+ <TabsContext.Provider value={{ value, onValueChange }}>
9
+ <div className={className}>{children}</div>
10
+ </TabsContext.Provider>
11
+ );
12
+ }
13
+
14
+ export function TabsList({ className, ...props }) {
15
+ return (
16
+ <div
17
+ className={cn(
18
+ "inline-flex items-center justify-center rounded-lg bg-slate-100 p-0.5",
19
+ className
20
+ )}
21
+ {...props}
22
+ />
23
+ );
24
+ }
25
+
26
+ export function TabsTrigger({ value, className, children, ...props }) {
27
+ const ctx = useContext(TabsContext);
28
+ const selected = ctx?.value === value;
29
+
30
+ return (
31
+ <button
32
+ type="button"
33
+ onClick={() => ctx?.onValueChange && ctx.onValueChange(value)}
34
+ data-state={selected ? "active" : "inactive"}
35
+ className={cn(
36
+ "inline-flex items-center justify-center rounded-md px-3 py-1.5 text-xs font-medium text-slate-600 transition-colors",
37
+ "data-[state=active]:bg-white data-[state=active]:text-slate-900",
38
+ className
39
+ )}
40
+ {...props}
41
+ >
42
+ {children}
43
+ </button>
44
+ );
45
+ }
frontend/src/config/firebase.js ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Firebase configuration and initialization
3
+ */
4
+ import { initializeApp } from 'firebase/app';
5
+ import { getAuth, GoogleAuthProvider } from 'firebase/auth';
6
+
7
+ // Firebase configuration from environment variables
8
+ const firebaseConfig = {
9
+ apiKey: import.meta.env.VITE_FIREBASE_API_KEY,
10
+ authDomain: import.meta.env.VITE_FIREBASE_AUTH_DOMAIN,
11
+ projectId: import.meta.env.VITE_FIREBASE_PROJECT_ID,
12
+ storageBucket: import.meta.env.VITE_FIREBASE_STORAGE_BUCKET,
13
+ messagingSenderId: import.meta.env.VITE_FIREBASE_MESSAGING_SENDER_ID,
14
+ appId: import.meta.env.VITE_FIREBASE_APP_ID,
15
+ };
16
+
17
+ // Initialize Firebase
18
+ const app = initializeApp(firebaseConfig);
19
+
20
+ // Initialize Firebase Authentication and get a reference to the service
21
+ export const auth = getAuth(app);
22
+
23
+ // Configure Google Auth Provider
24
+ export const googleProvider = new GoogleAuthProvider();
25
+ googleProvider.setCustomParameters({
26
+ prompt: 'select_account'
27
+ });
28
+
29
+ export default app;
30
+
frontend/src/contexts/AuthContext.jsx ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { createContext, useContext, useState, useEffect } from "react";
2
+ import { signInWithPopup, signOut as firebaseSignOut } from "firebase/auth";
3
+ import { auth, googleProvider } from "@/config/firebase";
4
+ import { getCurrentUser, firebaseLogin, requestOTP, verifyOTP, logout as apiLogout } from "@/services/auth";
5
+
6
+ const AuthContext = createContext(null);
7
+
8
+ export function AuthProvider({ children }) {
9
+ const [user, setUser] = useState(null);
10
+ const [loading, setLoading] = useState(true);
11
+ const [token, setToken] = useState(localStorage.getItem("auth_token"));
12
+
13
+ useEffect(() => {
14
+ // Check if user is already authenticated
15
+ if (token) {
16
+ checkAuth();
17
+ } else {
18
+ setLoading(false);
19
+ }
20
+ }, [token]);
21
+
22
+ const checkAuth = async () => {
23
+ try {
24
+ const userData = await getCurrentUser();
25
+ setUser(userData);
26
+ } catch (error) {
27
+ // Token is invalid, clear it
28
+ localStorage.removeItem("auth_token");
29
+ setToken(null);
30
+ setUser(null);
31
+ } finally {
32
+ setLoading(false);
33
+ }
34
+ };
35
+
36
+ const handleFirebaseLogin = async () => {
37
+ try {
38
+ const result = await signInWithPopup(auth, googleProvider);
39
+ const idToken = await result.user.getIdToken();
40
+ const response = await firebaseLogin(idToken);
41
+ handleAuthCallback(response.token);
42
+ } catch (error) {
43
+ if (error.code === 'auth/popup-closed' || error.code === 'auth/cancelled-popup-request') {
44
+ // User closed popup or cancelled - don't show error
45
+ return;
46
+ }
47
+ console.error("Firebase login error:", error);
48
+ throw new Error(error.message || "Firebase authentication failed");
49
+ }
50
+ };
51
+
52
+ const handleOTPRequest = async (email) => {
53
+ try {
54
+ await requestOTP(email);
55
+ } catch (error) {
56
+ console.error("OTP request error:", error);
57
+ throw error;
58
+ }
59
+ };
60
+
61
+ const handleOTPVerify = async (email, otp) => {
62
+ try {
63
+ const response = await verifyOTP(email, otp);
64
+ handleAuthCallback(response.token);
65
+ } catch (error) {
66
+ console.error("OTP verify error:", error);
67
+ throw error;
68
+ }
69
+ };
70
+
71
+ const handleLogout = async () => {
72
+ try {
73
+ // Sign out from Firebase if user was using Firebase auth
74
+ if (auth.currentUser) {
75
+ await firebaseSignOut(auth);
76
+ }
77
+ await apiLogout();
78
+ } catch (error) {
79
+ console.error("Logout error:", error);
80
+ } finally {
81
+ localStorage.removeItem("auth_token");
82
+ setToken(null);
83
+ setUser(null);
84
+ }
85
+ };
86
+
87
+ const handleAuthCallback = (newToken) => {
88
+ localStorage.setItem("auth_token", newToken);
89
+ setToken(newToken);
90
+ checkAuth();
91
+ };
92
+
93
+ const value = {
94
+ user,
95
+ token,
96
+ loading,
97
+ firebaseLogin: handleFirebaseLogin,
98
+ requestOTP: handleOTPRequest,
99
+ verifyOTP: handleOTPVerify,
100
+ logout: handleLogout,
101
+ handleAuthCallback,
102
+ isAuthenticated: !!user,
103
+ };
104
+
105
+ return <AuthContext.Provider value={value}>{children}</AuthContext.Provider>;
106
+ }
107
+
108
+ export function useAuth() {
109
+ const context = useContext(AuthContext);
110
+ if (!context) {
111
+ throw new Error("useAuth must be used within an AuthProvider");
112
+ }
113
+ return context;
114
+ }
115
+