Seth commited on
Commit
272f36f
·
2 Parent(s): 8e8c6a4 daae7a9

Merge remote changes, keeping API key implementation

Browse files
Dockerfile CHANGED
@@ -1,83 +1,83 @@
1
- # ---------- 1) Build frontend (React + Vite) ----------
2
- FROM node:20-alpine AS frontend-build
3
- WORKDIR /frontend
4
-
5
- # Accept build arguments for Vite environment variables
6
- ARG VITE_FIREBASE_API_KEY
7
- ARG VITE_FIREBASE_AUTH_DOMAIN
8
- ARG VITE_FIREBASE_PROJECT_ID
9
- ARG VITE_FIREBASE_STORAGE_BUCKET
10
- ARG VITE_FIREBASE_MESSAGING_SENDER_ID
11
- ARG VITE_FIREBASE_APP_ID
12
- ARG VITE_API_BASE_URL
13
-
14
- # Set as environment variables so they're available to the build script
15
- ENV VITE_FIREBASE_API_KEY=$VITE_FIREBASE_API_KEY
16
- ENV VITE_FIREBASE_AUTH_DOMAIN=$VITE_FIREBASE_AUTH_DOMAIN
17
- ENV VITE_FIREBASE_PROJECT_ID=$VITE_FIREBASE_PROJECT_ID
18
- ENV VITE_FIREBASE_STORAGE_BUCKET=$VITE_FIREBASE_STORAGE_BUCKET
19
- ENV VITE_FIREBASE_MESSAGING_SENDER_ID=$VITE_FIREBASE_MESSAGING_SENDER_ID
20
- ENV VITE_FIREBASE_APP_ID=$VITE_FIREBASE_APP_ID
21
- ENV VITE_API_BASE_URL=$VITE_API_BASE_URL
22
-
23
- # Install frontend dependencies
24
- COPY frontend/package*.json ./
25
- RUN npm install
26
-
27
- # Copy rest of frontend
28
- COPY frontend/ .
29
-
30
- # Create .env file from environment variables and build
31
- # Inline the script to avoid permission issues
32
- RUN echo "Checking environment variables..." && \
33
- [ -z "$VITE_FIREBASE_API_KEY" ] && echo "WARNING: VITE_FIREBASE_API_KEY is not set" || echo "✓ VITE_FIREBASE_API_KEY is set" && \
34
- [ -z "$VITE_FIREBASE_AUTH_DOMAIN" ] && echo "WARNING: VITE_FIREBASE_AUTH_DOMAIN is not set" || echo "✓ VITE_FIREBASE_AUTH_DOMAIN is set" && \
35
- [ -z "$VITE_FIREBASE_PROJECT_ID" ] && echo "WARNING: VITE_FIREBASE_PROJECT_ID is not set" || echo "✓ VITE_FIREBASE_PROJECT_ID is set" && \
36
- echo "VITE_FIREBASE_API_KEY=${VITE_FIREBASE_API_KEY:-}" > .env && \
37
- echo "VITE_FIREBASE_AUTH_DOMAIN=${VITE_FIREBASE_AUTH_DOMAIN:-}" >> .env && \
38
- echo "VITE_FIREBASE_PROJECT_ID=${VITE_FIREBASE_PROJECT_ID:-}" >> .env && \
39
- echo "VITE_FIREBASE_STORAGE_BUCKET=${VITE_FIREBASE_STORAGE_BUCKET:-}" >> .env && \
40
- echo "VITE_FIREBASE_MESSAGING_SENDER_ID=${VITE_FIREBASE_MESSAGING_SENDER_ID:-}" >> .env && \
41
- echo "VITE_FIREBASE_APP_ID=${VITE_FIREBASE_APP_ID:-}" >> .env && \
42
- echo "VITE_API_BASE_URL=${VITE_API_BASE_URL:-}" >> .env && \
43
- echo "Created .env file with environment variables" && \
44
- npm run build
45
- # Vite will output to /frontend/dist by default
46
-
47
- # ---------- 2) Backend (FastAPI + Python) ----------
48
- FROM python:3.11-slim
49
-
50
- ENV PYTHONDONTWRITEBYTECODE=1
51
- ENV PYTHONUNBUFFERED=1
52
-
53
- WORKDIR /app
54
-
55
- # System deps (optional but useful for some libs)
56
- RUN apt-get update && apt-get install -y --no-install-recommends \
57
- build-essential \
58
- && rm -rf /var/lib/apt/lists/*
59
-
60
- # Install backend dependencies
61
- COPY backend/requirements.txt ./backend/requirements.txt
62
- RUN pip install --no-cache-dir -r backend/requirements.txt
63
-
64
- # Copy backend code
65
- COPY backend ./backend
66
-
67
- # Copy built frontend into backend/frontend_dist
68
- # FastAPI will serve from this folder later
69
- RUN mkdir -p backend/frontend_dist
70
- COPY --from=frontend-build /frontend/dist ./backend/frontend_dist
71
-
72
- # Create data directory for SQLite
73
- RUN mkdir -p data
74
-
75
- # Env vars used in backend/db.py etc.
76
- ENV DB_PATH=/app/data/app.db
77
- ENV PORT=7860
78
- ENV PYTHONPATH=/app
79
-
80
- EXPOSE 7860
81
-
82
- # Launch FastAPI app (we'll use backend.app.main:app)
83
- CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ # ---------- 1) Build frontend (React + Vite) ----------
2
+ FROM node:20-alpine AS frontend-build
3
+ WORKDIR /frontend
4
+
5
+ # Accept build arguments for Vite environment variables
6
+ ARG VITE_FIREBASE_API_KEY
7
+ ARG VITE_FIREBASE_AUTH_DOMAIN
8
+ ARG VITE_FIREBASE_PROJECT_ID
9
+ ARG VITE_FIREBASE_STORAGE_BUCKET
10
+ ARG VITE_FIREBASE_MESSAGING_SENDER_ID
11
+ ARG VITE_FIREBASE_APP_ID
12
+ ARG VITE_API_BASE_URL
13
+
14
+ # Set as environment variables so they're available to the build script
15
+ ENV VITE_FIREBASE_API_KEY=$VITE_FIREBASE_API_KEY
16
+ ENV VITE_FIREBASE_AUTH_DOMAIN=$VITE_FIREBASE_AUTH_DOMAIN
17
+ ENV VITE_FIREBASE_PROJECT_ID=$VITE_FIREBASE_PROJECT_ID
18
+ ENV VITE_FIREBASE_STORAGE_BUCKET=$VITE_FIREBASE_STORAGE_BUCKET
19
+ ENV VITE_FIREBASE_MESSAGING_SENDER_ID=$VITE_FIREBASE_MESSAGING_SENDER_ID
20
+ ENV VITE_FIREBASE_APP_ID=$VITE_FIREBASE_APP_ID
21
+ ENV VITE_API_BASE_URL=$VITE_API_BASE_URL
22
+
23
+ # Install frontend dependencies
24
+ COPY frontend/package*.json ./
25
+ RUN npm install
26
+
27
+ # Copy rest of frontend
28
+ COPY frontend/ .
29
+
30
+ # Create .env file from environment variables and build
31
+ # Inline the script to avoid permission issues
32
+ RUN echo "Checking environment variables..." && \
33
+ [ -z "$VITE_FIREBASE_API_KEY" ] && echo "WARNING: VITE_FIREBASE_API_KEY is not set" || echo "✓ VITE_FIREBASE_API_KEY is set" && \
34
+ [ -z "$VITE_FIREBASE_AUTH_DOMAIN" ] && echo "WARNING: VITE_FIREBASE_AUTH_DOMAIN is not set" || echo "✓ VITE_FIREBASE_AUTH_DOMAIN is set" && \
35
+ [ -z "$VITE_FIREBASE_PROJECT_ID" ] && echo "WARNING: VITE_FIREBASE_PROJECT_ID is not set" || echo "✓ VITE_FIREBASE_PROJECT_ID is set" && \
36
+ echo "VITE_FIREBASE_API_KEY=${VITE_FIREBASE_API_KEY:-}" > .env && \
37
+ echo "VITE_FIREBASE_AUTH_DOMAIN=${VITE_FIREBASE_AUTH_DOMAIN:-}" >> .env && \
38
+ echo "VITE_FIREBASE_PROJECT_ID=${VITE_FIREBASE_PROJECT_ID:-}" >> .env && \
39
+ echo "VITE_FIREBASE_STORAGE_BUCKET=${VITE_FIREBASE_STORAGE_BUCKET:-}" >> .env && \
40
+ echo "VITE_FIREBASE_MESSAGING_SENDER_ID=${VITE_FIREBASE_MESSAGING_SENDER_ID:-}" >> .env && \
41
+ echo "VITE_FIREBASE_APP_ID=${VITE_FIREBASE_APP_ID:-}" >> .env && \
42
+ echo "VITE_API_BASE_URL=${VITE_API_BASE_URL:-}" >> .env && \
43
+ echo "Created .env file with environment variables" && \
44
+ npm run build
45
+ # Vite will output to /frontend/dist by default
46
+
47
+ # ---------- 2) Backend (FastAPI + Python) ----------
48
+ FROM python:3.11-slim
49
+
50
+ ENV PYTHONDONTWRITEBYTECODE=1
51
+ ENV PYTHONUNBUFFERED=1
52
+
53
+ WORKDIR /app
54
+
55
+ # System deps (optional but useful for some libs)
56
+ RUN apt-get update && apt-get install -y --no-install-recommends \
57
+ build-essential \
58
+ && rm -rf /var/lib/apt/lists/*
59
+
60
+ # Install backend dependencies
61
+ COPY backend/requirements.txt ./backend/requirements.txt
62
+ RUN pip install --no-cache-dir -r backend/requirements.txt
63
+
64
+ # Copy backend code
65
+ COPY backend ./backend
66
+
67
+ # Copy built frontend into backend/frontend_dist
68
+ # FastAPI will serve from this folder later
69
+ RUN mkdir -p backend/frontend_dist
70
+ COPY --from=frontend-build /frontend/dist ./backend/frontend_dist
71
+
72
+ # Create data directory for SQLite
73
+ RUN mkdir -p data
74
+
75
+ # Env vars used in backend/db.py etc.
76
+ ENV DB_PATH=/app/data/app.db
77
+ ENV PORT=7860
78
+ ENV PYTHONPATH=/app
79
+
80
+ EXPOSE 7860
81
+
82
+ # Launch FastAPI app (we'll use backend.app.main:app)
83
+ CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "7860"]
FIREBASE_OTP_SETUP.md CHANGED
@@ -1,3 +1,4 @@
 
1
  # Firebase Authentication + OTP Setup Guide
2
 
3
  This application uses Firebase Authentication for Google sign-in and Brevo for OTP email delivery. Only business email addresses are allowed.
@@ -294,3 +295,289 @@ For issues:
294
  - Firebase: [Firebase Documentation](https://firebase.google.com/docs)
295
  - Brevo: [Brevo API Documentation](https://developers.brevo.com/)
296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  # Firebase Authentication + OTP Setup Guide
3
 
4
  This application uses Firebase Authentication for Google sign-in and Brevo for OTP email delivery. Only business email addresses are allowed.
 
295
  - Firebase: [Firebase Documentation](https://firebase.google.com/docs)
296
  - Brevo: [Brevo API Documentation](https://developers.brevo.com/)
297
 
298
+ =======
299
+ # Firebase Authentication + OTP Setup Guide
300
+
301
+ This application uses Firebase Authentication for Google sign-in and Brevo for OTP email delivery. Only business email addresses are allowed.
302
+
303
+ ## Prerequisites
304
+
305
+ 1. Firebase project
306
+ 2. Brevo account (for sending OTP emails)
307
+ 3. Business email domain verification
308
+
309
+ ---
310
+
311
+ ## Step 1: Firebase Setup
312
+
313
+ ### 1.1 Create Firebase Project
314
+
315
+ 1. Go to [Firebase Console](https://console.firebase.google.com/)
316
+ 2. Click "Add project" or select an existing project
317
+ 3. Follow the setup wizard
318
+
319
+ ### 1.2 Enable Google Authentication
320
+
321
+ 1. In Firebase Console, go to **Authentication** → **Sign-in method**
322
+ 2. Click on **Google** provider
323
+ 3. Enable it and set your project support email
324
+ 4. Save the changes
325
+
326
+ ### 1.3 Get Firebase Web App Configuration
327
+
328
+ 1. In Firebase Console, go to **Project Settings** (gear icon)
329
+ 2. Scroll down to "Your apps" section
330
+ 3. Click the **Web** icon (`</>`) to add a web app
331
+ 4. Register your app (you can skip Firebase Hosting for now)
332
+ 5. Copy the Firebase configuration object
333
+
334
+ ### 1.4 Get Firebase Service Account Key
335
+
336
+ 1. In Firebase Console, go to **Project Settings** → **Service accounts**
337
+ 2. Click **Generate new private key**
338
+ 3. Download the JSON file (keep it secure!)
339
+
340
+ ### 1.5 Set Frontend Environment Variables
341
+
342
+ Create or update `frontend/.env`:
343
+
344
+ ```bash
345
+ VITE_FIREBASE_API_KEY=your-api-key
346
+ VITE_FIREBASE_AUTH_DOMAIN=your-project.firebaseapp.com
347
+ VITE_FIREBASE_PROJECT_ID=your-project-id
348
+ VITE_FIREBASE_STORAGE_BUCKET=your-project.appspot.com
349
+ VITE_FIREBASE_MESSAGING_SENDER_ID=your-sender-id
350
+ VITE_FIREBASE_APP_ID=your-app-id
351
+ ```
352
+
353
+ ### 1.6 Set Backend Environment Variables
354
+
355
+ You have two options for Firebase Admin SDK:
356
+
357
+ **Option A: Service Account JSON File**
358
+ ```bash
359
+ FIREBASE_SERVICE_ACCOUNT_KEY=/path/to/service-account-key.json
360
+ ```
361
+
362
+ **Option B: Service Account JSON String (Recommended for Docker/Cloud)**
363
+ ```bash
364
+ FIREBASE_SERVICE_ACCOUNT_JSON='{"type":"service_account","project_id":"...","private_key_id":"...","private_key":"...","client_email":"...","client_id":"...","auth_uri":"...","token_uri":"...","auth_provider_x509_cert_url":"...","client_x509_cert_url":"..."}'
365
+ ```
366
+
367
+ ---
368
+
369
+ ## Step 2: Brevo Setup
370
+
371
+ ### 2.1 Create Brevo Account
372
+
373
+ 1. Go to [Brevo](https://www.brevo.com/) (formerly Sendinblue)
374
+ 2. Sign up for a free account (300 emails/day free tier)
375
+ 3. Verify your email address
376
+
377
+ ### 2.2 Get API Key
378
+
379
+ 1. Log in to Brevo
380
+ 2. Go to **Settings** → **API Keys**
381
+ 3. Click **Generate a new API key**
382
+ 4. Copy the API key (starts with `xkeysib-...`)
383
+
384
+ ### 2.3 Verify Sender Email
385
+
386
+ 1. Go to **Senders & IP** → **Senders**
387
+ 2. Click **Add a sender**
388
+ 3. Enter your sender email (e.g., `noreply@yourdomain.com`)
389
+ 4. Verify the email address (check your inbox for verification email)
390
+ 5. Once verified, you can use it to send emails
391
+
392
+ ### 2.4 Set Backend Environment Variables
393
+
394
+ ```bash
395
+ BREVO_API_KEY=xkeysib-your-api-key-here
396
+ BREVO_SENDER_EMAIL=noreply@yourdomain.com
397
+ BREVO_SENDER_NAME=EZOFIS AI
398
+ ```
399
+
400
+ ---
401
+
402
+ ## Step 3: JWT Secret Key
403
+
404
+ Generate a strong random string for JWT token signing:
405
+
406
+ ```bash
407
+ # Generate a random secret (Linux/Mac)
408
+ openssl rand -hex 32
409
+
410
+ # Or use Python
411
+ python -c "import secrets; print(secrets.token_hex(32))"
412
+ ```
413
+
414
+ Set the environment variable:
415
+
416
+ ```bash
417
+ JWT_SECRET_KEY=your-generated-secret-key-here
418
+ ```
419
+
420
+ ---
421
+
422
+ ## Step 4: Frontend URL
423
+
424
+ Set the frontend URL for OAuth redirects:
425
+
426
+ ```bash
427
+ FRONTEND_URL=http://localhost:5173 # Development
428
+ # OR
429
+ FRONTEND_URL=https://your-domain.com # Production
430
+ ```
431
+
432
+ ---
433
+
434
+ ## Step 5: Install Dependencies
435
+
436
+ ### Backend
437
+
438
+ ```bash
439
+ cd backend
440
+ pip install -r requirements.txt
441
+ ```
442
+
443
+ ### Frontend
444
+
445
+ ```bash
446
+ cd frontend
447
+ npm install
448
+ ```
449
+
450
+ ---
451
+
452
+ ## Step 6: Database Migration
453
+
454
+ The database will automatically create the new schema when you start the application. However, if you have existing data:
455
+
456
+ **Option 1: Fresh Start (Recommended for Development)**
457
+ - Delete the existing database file: `data/app.db`
458
+ - Restart the application (tables will be recreated)
459
+
460
+ **Option 2: Manual Migration (For Production)**
461
+ - The new `users` table will be created automatically
462
+ - Existing `extractions` table needs `user_id` column added
463
+ - You'll need to assign existing records to a default user or migrate them
464
+
465
+ ---
466
+
467
+ ## Step 7: Test the Setup
468
+
469
+ ### 7.1 Test Firebase Authentication
470
+
471
+ 1. Start the backend server
472
+ 2. Start the frontend development server
473
+ 3. Navigate to the application
474
+ 4. Click "Google Sign In"
475
+ 5. Sign in with a business Google account
476
+ 6. Verify you're redirected to the dashboard
477
+
478
+ ### 7.2 Test OTP Authentication
479
+
480
+ 1. Click on "Email / OTP" tab
481
+ 2. Enter a business email address
482
+ 3. Click "Send OTP"
483
+ 4. Check your email for the OTP code
484
+ 5. Enter the OTP and verify
485
+ 6. Verify you're redirected to the dashboard
486
+
487
+ ### 7.3 Test Business Email Validation
488
+
489
+ 1. Try to sign in with a personal Gmail account
490
+ 2. Verify you get an error message
491
+ 3. Try OTP with a personal email
492
+ 4. Verify it's blocked
493
+
494
+ ---
495
+
496
+ ## Environment Variables Summary
497
+
498
+ ### Backend (.env or environment)
499
+
500
+ ```bash
501
+ # Firebase
502
+ FIREBASE_SERVICE_ACCOUNT_JSON='{...}' # OR
503
+ FIREBASE_SERVICE_ACCOUNT_KEY=/path/to/key.json
504
+
505
+ # Brevo
506
+ BREVO_API_KEY=xkeysib-...
507
+ BREVO_SENDER_EMAIL=noreply@yourdomain.com
508
+ BREVO_SENDER_NAME=EZOFIS AI
509
+
510
+ # JWT
511
+ JWT_SECRET_KEY=your-secret-key
512
+
513
+ # Frontend URL
514
+ FRONTEND_URL=http://localhost:5173
515
+ ```
516
+
517
+ ### Frontend (.env)
518
+
519
+ ```bash
520
+ VITE_FIREBASE_API_KEY=...
521
+ VITE_FIREBASE_AUTH_DOMAIN=...
522
+ VITE_FIREBASE_PROJECT_ID=...
523
+ VITE_FIREBASE_STORAGE_BUCKET=...
524
+ VITE_FIREBASE_MESSAGING_SENDER_ID=...
525
+ VITE_FIREBASE_APP_ID=...
526
+ VITE_API_BASE_URL=http://localhost:7860
527
+ ```
528
+
529
+ ---
530
+
531
+ ## Troubleshooting
532
+
533
+ ### Firebase Issues
534
+
535
+ - **"Firebase not configured"**: Check that `FIREBASE_SERVICE_ACCOUNT_JSON` or `FIREBASE_SERVICE_ACCOUNT_KEY` is set correctly
536
+ - **"Invalid Firebase token"**: Ensure Firebase Web SDK is properly configured in frontend
537
+ - **"Email not found"**: Make sure Google sign-in is enabled in Firebase Console
538
+
539
+ ### Brevo Issues
540
+
541
+ - **"Failed to send email"**:
542
+ - Verify your API key is correct
543
+ - Check that sender email is verified in Brevo
544
+ - Ensure you haven't exceeded the free tier limit (300 emails/day)
545
+ - **"API key not set"**: Check that `BREVO_API_KEY` environment variable is set
546
+
547
+ ### Business Email Validation
548
+
549
+ - Personal emails (Gmail, Yahoo, etc.) are automatically blocked
550
+ - Only business/corporate email domains are allowed
551
+ - The validation happens on both frontend and backend
552
+
553
+ ---
554
+
555
+ ## Security Notes
556
+
557
+ 1. **Never commit** Firebase service account keys or API keys to version control
558
+ 2. Use environment variables or secure secret management
559
+ 3. JWT tokens expire after 7 days
560
+ 4. OTP codes expire after 10 minutes
561
+ 5. Maximum 5 OTP verification attempts per email
562
+ 6. All extraction records are filtered by user_id for data isolation
563
+
564
+ ---
565
+
566
+ ## Production Deployment
567
+
568
+ 1. Set all environment variables in your hosting platform
569
+ 2. Use HTTPS for both frontend and backend
570
+ 3. Update `FRONTEND_URL` to your production domain
571
+ 4. Verify sender email in Brevo with your production domain
572
+ 5. Consider using Redis for OTP storage instead of in-memory (for scalability)
573
+ 6. Set up proper error monitoring and logging
574
+
575
+ ---
576
+
577
+ ## Support
578
+
579
+ For issues:
580
+ - Firebase: [Firebase Documentation](https://firebase.google.com/docs)
581
+ - Brevo: [Brevo API Documentation](https://developers.brevo.com/)
582
+
583
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
GOOGLE_OAUTH_SETUP.md CHANGED
@@ -1,3 +1,4 @@
 
1
  # Google OAuth Setup Guide
2
 
3
  This application uses Google OAuth for user authentication. Follow these steps to set it up:
@@ -77,3 +78,84 @@ New dependencies added:
77
  - All extraction records are filtered by user_id
78
  - Users can only see their own data and history
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  # Google OAuth Setup Guide
3
 
4
  This application uses Google OAuth for user authentication. Follow these steps to set it up:
 
78
  - All extraction records are filtered by user_id
79
  - Users can only see their own data and history
80
 
81
+ =======
82
+ # Google OAuth Setup Guide
83
+
84
+ This application uses Google OAuth for user authentication. Follow these steps to set it up:
85
+
86
+ ## 1. Create Google OAuth Credentials
87
+
88
+ 1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
89
+ 2. Create a new project or select an existing one
90
+ 3. Enable the Google+ API
91
+ 4. Go to "Credentials" → "Create Credentials" → "OAuth client ID"
92
+ 5. Choose "Web application"
93
+ 6. Add authorized redirect URIs:
94
+ - For development: `http://localhost:7860/api/auth/callback`
95
+ - For production: `https://your-domain.com/api/auth/callback`
96
+ 7. Copy the Client ID and Client Secret
97
+
98
+ ## 2. Set Environment Variables
99
+
100
+ Set the following environment variables:
101
+
102
+ ```bash
103
+ # Google OAuth
104
+ GOOGLE_CLIENT_ID=your-client-id-here
105
+ GOOGLE_CLIENT_SECRET=your-client-secret-here
106
+
107
+ # JWT Secret (use a strong random string)
108
+ JWT_SECRET_KEY=your-secret-key-here
109
+
110
+ # Frontend URL (for OAuth redirect)
111
+ FRONTEND_URL=http://localhost:5173 # or your production URL
112
+ ```
113
+
114
+ ## 3. Database Migration
115
+
116
+ The database will automatically create the new `users` table and add `user_id` to the `extractions` table when you start the application.
117
+
118
+ **Note:** If you have an existing database with extraction records, you'll need to:
119
+ 1. Back up your data
120
+ 2. Delete the old database file
121
+ 3. Restart the application to recreate tables with the new schema
122
+
123
+ Or manually migrate:
124
+ - Add `user_id` column to `extractions` table (you may need to set a default user_id for existing records)
125
+
126
+ ## 4. Install Dependencies
127
+
128
+ Make sure to install the new Python dependencies:
129
+
130
+ ```bash
131
+ pip install -r backend/requirements.txt
132
+ ```
133
+
134
+ New dependencies added:
135
+ - `authlib` - OAuth library
136
+ - `pyjwt` - JWT token handling
137
+ - `python-jose[cryptography]` - JWT verification
138
+
139
+ ## 5. Start the Application
140
+
141
+ 1. Start the backend server
142
+ 2. Start the frontend development server
143
+ 3. Users will be prompted to sign in with Google when they try to access the application
144
+
145
+ ## How It Works
146
+
147
+ 1. User clicks "Sign in with Google" → redirected to Google login
148
+ 2. After authentication, Google redirects to `/api/auth/callback`
149
+ 3. Backend creates/updates user in database and generates JWT token
150
+ 4. Frontend receives token and stores it in localStorage
151
+ 5. All API requests include the JWT token in the Authorization header
152
+ 6. Backend verifies token and filters data by user_id
153
+
154
+ ## Security Notes
155
+
156
+ - JWT tokens expire after 7 days
157
+ - Tokens are stored in localStorage (consider httpOnly cookies for production)
158
+ - All extraction records are filtered by user_id
159
+ - Users can only see their own data and history
160
+
161
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
IMPLEMENTATION_COMPLETE.md CHANGED
@@ -1,3 +1,4 @@
 
1
  # ✅ Firebase + OTP Authentication Implementation Complete
2
 
3
  All code changes have been applied successfully! Here are the next steps you need to follow:
@@ -255,3 +256,262 @@ Once you complete the setup steps above, your application will have:
255
 
256
  Good luck! 🚀
257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  # ✅ Firebase + OTP Authentication Implementation Complete
3
 
4
  All code changes have been applied successfully! Here are the next steps you need to follow:
 
256
 
257
  Good luck! 🚀
258
 
259
+ =======
260
+ # ✅ Firebase + OTP Authentication Implementation Complete
261
+
262
+ All code changes have been applied successfully! Here are the next steps you need to follow:
263
+
264
+ ## 📋 Implementation Summary
265
+
266
+ ### ✅ Backend Changes (Completed)
267
+ - ✅ Updated `requirements.txt` with Firebase Admin SDK
268
+ - ✅ Updated `models.py` - User model now supports Firebase and OTP auth methods
269
+ - ✅ Created `email_validator.py` - Business email validation
270
+ - ✅ Created `firebase_auth.py` - Firebase token verification
271
+ - ✅ Created `brevo_service.py` - Brevo email service for OTP
272
+ - ✅ Created `otp_service.py` - OTP generation and verification
273
+ - ✅ Updated `auth_routes.py` - New endpoints for Firebase and OTP login
274
+
275
+ ### ✅ Frontend Changes (Completed)
276
+ - ✅ Updated `package.json` with Firebase SDK
277
+ - ✅ Created `config/firebase.js` - Firebase configuration
278
+ - ✅ Updated `services/auth.js` - Firebase and OTP auth functions
279
+ - ✅ Updated `contexts/AuthContext.jsx` - Firebase and OTP support
280
+ - ✅ Created `components/auth/LoginForm.jsx` - Login UI with both options
281
+ - ✅ Updated `App.jsx` - Integrated LoginForm component
282
+
283
+ ---
284
+
285
+ ## 🚀 Next Steps (YOU NEED TO DO THESE)
286
+
287
+ ### Step 1: Install Dependencies
288
+
289
+ **Backend:**
290
+ ```bash
291
+ cd backend
292
+ pip install -r requirements.txt
293
+ ```
294
+
295
+ **Frontend:**
296
+ ```bash
297
+ cd frontend
298
+ npm install
299
+ ```
300
+
301
+ ---
302
+
303
+ ### Step 2: Set Up Firebase
304
+
305
+ 1. **Create Firebase Project:**
306
+ - Go to https://console.firebase.google.com/
307
+ - Create a new project or use existing one
308
+
309
+ 2. **Enable Google Authentication:**
310
+ - In Firebase Console → Authentication → Sign-in method
311
+ - Enable "Google" provider
312
+ - Set project support email
313
+
314
+ 3. **Get Web App Config:**
315
+ - Project Settings → Your apps → Add Web app
316
+ - Copy the config values
317
+
318
+ 4. **Get Service Account Key:**
319
+ - Project Settings → Service accounts
320
+ - Click "Generate new private key"
321
+ - Download the JSON file
322
+
323
+ 5. **Set Frontend Environment Variables:**
324
+ Create `frontend/.env`:
325
+ ```bash
326
+ VITE_FIREBASE_API_KEY=your-api-key-here
327
+ VITE_FIREBASE_AUTH_DOMAIN=your-project.firebaseapp.com
328
+ VITE_FIREBASE_PROJECT_ID=your-project-id
329
+ VITE_FIREBASE_STORAGE_BUCKET=your-project.appspot.com
330
+ VITE_FIREBASE_MESSAGING_SENDER_ID=your-sender-id
331
+ VITE_FIREBASE_APP_ID=your-app-id
332
+ VITE_API_BASE_URL=http://localhost:7860
333
+ ```
334
+
335
+ 6. **Set Backend Environment Variables:**
336
+ Option A (JSON file path):
337
+ ```bash
338
+ FIREBASE_SERVICE_ACCOUNT_KEY=/path/to/service-account-key.json
339
+ ```
340
+
341
+ Option B (JSON string - recommended for Docker):
342
+ ```bash
343
+ FIREBASE_SERVICE_ACCOUNT_JSON='{"type":"service_account","project_id":"...","private_key":"...","client_email":"..."}'
344
+ ```
345
+ (Copy the entire JSON content from the downloaded file)
346
+
347
+ ---
348
+
349
+ ### Step 3: Set Up Brevo
350
+
351
+ 1. **Create Brevo Account:**
352
+ - Go to https://www.brevo.com/
353
+ - Sign up (free tier: 300 emails/day)
354
+
355
+ 2. **Get API Key:**
356
+ - Settings → API Keys
357
+ - Generate new API key
358
+ - Copy the key (starts with `xkeysib-`)
359
+
360
+ 3. **Verify Sender Email:**
361
+ - Senders & IP → Senders
362
+ - Add sender email (e.g., `noreply@yourdomain.com`)
363
+ - Verify via email
364
+
365
+ 4. **Set Backend Environment Variables:**
366
+ ```bash
367
+ BREVO_API_KEY=xkeysib-your-api-key-here
368
+ BREVO_SENDER_EMAIL=noreply@yourdomain.com
369
+ BREVO_SENDER_NAME=EZOFIS AI
370
+ ```
371
+
372
+ ---
373
+
374
+ ### Step 4: Set JWT Secret
375
+
376
+ Generate a secure random key:
377
+ ```bash
378
+ # Linux/Mac
379
+ openssl rand -hex 32
380
+
381
+ # Or Python
382
+ python -c "import secrets; print(secrets.token_hex(32))"
383
+ ```
384
+
385
+ Set environment variable:
386
+ ```bash
387
+ JWT_SECRET_KEY=your-generated-secret-key-here
388
+ ```
389
+
390
+ ---
391
+
392
+ ### Step 5: Set Frontend URL
393
+
394
+ ```bash
395
+ FRONTEND_URL=http://localhost:5173 # Development
396
+ # OR
397
+ FRONTEND_URL=https://your-domain.com # Production
398
+ ```
399
+
400
+ ---
401
+
402
+ ### Step 6: Database Migration
403
+
404
+ **If you have existing data:**
405
+ - The new schema will be created automatically
406
+ - Existing `extractions` table needs `user_id` column
407
+ - You may need to assign existing records to a default user
408
+
409
+ **For fresh start (recommended for development):**
410
+ - Delete `data/app.db` (if exists)
411
+ - Restart application - tables will be recreated
412
+
413
+ ---
414
+
415
+ ### Step 7: Test the Implementation
416
+
417
+ 1. **Start Backend:**
418
+ ```bash
419
+ cd backend
420
+ uvicorn app.main:app --reload --port 7860
421
+ ```
422
+
423
+ 2. **Start Frontend:**
424
+ ```bash
425
+ cd frontend
426
+ npm run dev
427
+ ```
428
+
429
+ 3. **Test Firebase Login:**
430
+ - Navigate to http://localhost:5173
431
+ - Click "Google Sign In" tab
432
+ - Sign in with business Google account
433
+ - Should redirect to dashboard
434
+
435
+ 4. **Test OTP Login:**
436
+ - Click "Email / OTP" tab
437
+ - Enter business email
438
+ - Click "Send OTP"
439
+ - Check email for OTP code
440
+ - Enter OTP and verify
441
+ - Should redirect to dashboard
442
+
443
+ 5. **Test Business Email Validation:**
444
+ - Try personal Gmail account → Should be blocked
445
+ - Try OTP with personal email → Should be blocked
446
+
447
+ ---
448
+
449
+ ## 📝 Environment Variables Checklist
450
+
451
+ ### Backend (.env or system environment)
452
+ - [ ] `FIREBASE_SERVICE_ACCOUNT_JSON` or `FIREBASE_SERVICE_ACCOUNT_KEY`
453
+ - [ ] `BREVO_API_KEY`
454
+ - [ ] `BREVO_SENDER_EMAIL`
455
+ - [ ] `BREVO_SENDER_NAME`
456
+ - [ ] `JWT_SECRET_KEY`
457
+ - [ ] `FRONTEND_URL`
458
+
459
+ ### Frontend (.env)
460
+ - [ ] `VITE_FIREBASE_API_KEY`
461
+ - [ ] `VITE_FIREBASE_AUTH_DOMAIN`
462
+ - [ ] `VITE_FIREBASE_PROJECT_ID`
463
+ - [ ] `VITE_FIREBASE_STORAGE_BUCKET`
464
+ - [ ] `VITE_FIREBASE_MESSAGING_SENDER_ID`
465
+ - [ ] `VITE_FIREBASE_APP_ID`
466
+ - [ ] `VITE_API_BASE_URL`
467
+
468
+ ---
469
+
470
+ ## 🔒 Security Reminders
471
+
472
+ 1. ✅ Never commit API keys or secrets to git
473
+ 2. ✅ Use `.env` files (add to `.gitignore`)
474
+ 3. ✅ Business email validation is enforced on both frontend and backend
475
+ 4. ✅ JWT tokens expire after 7 days
476
+ 5. ✅ OTP codes expire after 10 minutes
477
+ 6. ✅ Maximum 5 OTP verification attempts
478
+
479
+ ---
480
+
481
+ ## 📚 Documentation
482
+
483
+ - **Firebase Setup:** See `FIREBASE_OTP_SETUP.md` for detailed instructions
484
+ - **Brevo API:** https://developers.brevo.com/reference/sendtransacemail
485
+
486
+ ---
487
+
488
+ ## ⚠️ Important Notes
489
+
490
+ 1. **Database Schema Change:**
491
+ - User model changed from `google_id` (required) to `firebase_uid` (optional)
492
+ - If you have existing users, you'll need to migrate the data
493
+ - For development, deleting `data/app.db` is the easiest option
494
+
495
+ 2. **Business Email Validation:**
496
+ - Personal email domains are blocked (Gmail, Yahoo, Outlook, etc.)
497
+ - Validation happens on both frontend and backend
498
+ - Users must use their work/corporate email addresses
499
+
500
+ 3. **OTP Storage:**
501
+ - Currently stored in memory (works for single server)
502
+ - For production with multiple servers, consider using Redis
503
+
504
+ ---
505
+
506
+ ## 🎉 You're All Set!
507
+
508
+ Once you complete the setup steps above, your application will have:
509
+ - ✅ Firebase Google Sign-in (no OAuth credentials needed!)
510
+ - ✅ Email/OTP authentication via Brevo
511
+ - ✅ Business email validation
512
+ - ✅ User-specific data isolation
513
+ - ✅ Secure JWT token authentication
514
+
515
+ Good luck! 🚀
516
+
517
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
README.md CHANGED
@@ -1,10 +1,10 @@
1
- ---
2
- title: EZOFISAIOCR
3
- emoji: 🌍
4
- colorFrom: blue
5
- colorTo: purple
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: EZOFISOCR
3
+ emoji: 🌍
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
backend/app/apollo_service.py CHANGED
@@ -1,3 +1,4 @@
 
1
  """
2
  Apollo.io API service for creating contacts, enriching contact data, and adding them to sequences.
3
  Reference:
@@ -442,3 +443,449 @@ async def enrich_contact_by_email(email: str) -> Optional[Dict[str, Any]]:
442
  print(f"[ERROR] Failed to enrich contact from Apollo.io: {str(e)}")
443
  return None
444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  """
3
  Apollo.io API service for creating contacts, enriching contact data, and adding them to sequences.
4
  Reference:
 
443
  print(f"[ERROR] Failed to enrich contact from Apollo.io: {str(e)}")
444
  return None
445
 
446
+ =======
447
+ """
448
+ Apollo.io API service for creating contacts, enriching contact data, and adding them to sequences.
449
+ Reference:
450
+ - Create contact: https://docs.apollo.io/reference/create-a-contact
451
+ - Add to sequence: https://docs.apollo.io/reference/add-contacts-to-sequence
452
+ - Enrich person: https://docs.apollo.io/reference/enrich-people-data
453
+ """
454
+ import os
455
+ import httpx
456
+ from typing import Optional, Dict, Any
457
+
458
+ APOLLO_API_KEY = os.environ.get("APOLLO_API_KEY", "")
459
+ APOLLO_API_URL = "https://api.apollo.io/api/v1"
460
+ APOLLO_TRIAL_LIST_NAME = "VPR TRIAL LEADS"
461
+ # Allow list ID to be set directly via environment variable (more reliable than lookup)
462
+ APOLLO_TRIAL_LIST_ID = os.environ.get("APOLLO_TRIAL_LIST_ID", None)
463
+ # Sequence ID for adding contacts to email sequences (preferred over lists)
464
+ APOLLO_TRIAL_SEQUENCE_ID = os.environ.get("APOLLO_TRIAL_SEQUENCE_ID", None)
465
+
466
+
467
+ async def get_list_id(list_name: Optional[str] = None) -> Optional[str]:
468
+ """
469
+ Get Apollo list ID. First tries environment variable, then attempts API lookup.
470
+
471
+ Args:
472
+ list_name: Name of the list (for lookup if env var not set)
473
+
474
+ Returns:
475
+ List ID as string if found, None otherwise
476
+ """
477
+ # First, try to use the list ID from environment variable (most reliable)
478
+ if APOLLO_TRIAL_LIST_ID:
479
+ # Apollo list IDs are typically hexadecimal strings (MongoDB ObjectIds)
480
+ # Accept them as strings, just strip whitespace
481
+ list_id = str(APOLLO_TRIAL_LIST_ID).strip()
482
+ if list_id:
483
+ print(f"[INFO] Using Apollo list ID from environment variable: {list_id}")
484
+ return list_id
485
+ else:
486
+ print(f"[WARNING] APOLLO_TRIAL_LIST_ID is empty")
487
+
488
+ # If no env var, try to look up by name (this may not work if API endpoint is different)
489
+ if not list_name or not APOLLO_API_KEY:
490
+ return None
491
+
492
+ # Note: The /lists endpoint may not be available in all Apollo API versions
493
+ # Try alternative: search for lists using a different endpoint
494
+ try:
495
+ async with httpx.AsyncClient() as client:
496
+ # Try the lists endpoint (may return 404 in some API versions)
497
+ response = await client.get(
498
+ f"{APOLLO_API_URL}/lists",
499
+ headers={
500
+ "Content-Type": "application/json",
501
+ "Cache-Control": "no-cache",
502
+ "X-Api-Key": APOLLO_API_KEY
503
+ },
504
+ timeout=10.0
505
+ )
506
+ if response.status_code == 200:
507
+ data = response.json()
508
+ lists = data.get("lists", [])
509
+ for list_item in lists:
510
+ if list_item.get("name") == list_name:
511
+ list_id = list_item.get("id")
512
+ print(f"[INFO] Found Apollo list '{list_name}' with ID: {list_id}")
513
+ # Return as string (Apollo IDs are typically hex strings)
514
+ return str(list_id) if list_id else None
515
+ print(f"[WARNING] Apollo list '{list_name}' not found in available lists")
516
+ else:
517
+ print(f"[WARNING] Apollo lists endpoint returned {response.status_code}, cannot lookup list by name")
518
+ except Exception as e:
519
+ print(f"[WARNING] Failed to fetch Apollo list ID: {str(e)}")
520
+
521
+ return None
522
+
523
+
524
+ async def add_contact_to_sequence(contact_id: str, sequence_id: str) -> bool:
525
+ """
526
+ Add a contact to an Apollo.io email sequence.
527
+
528
+ Args:
529
+ contact_id: The Apollo contact ID
530
+ sequence_id: The Apollo sequence ID
531
+
532
+ Returns:
533
+ True if contact was successfully added to sequence, False otherwise
534
+ """
535
+ if not APOLLO_API_KEY:
536
+ print("[WARNING] APOLLO_API_KEY not set, skipping sequence enrollment")
537
+ return False
538
+
539
+ try:
540
+ async with httpx.AsyncClient() as client:
541
+ response = await client.post(
542
+ f"{APOLLO_API_URL}/sequence_contacts",
543
+ headers={
544
+ "Content-Type": "application/json",
545
+ "Cache-Control": "no-cache",
546
+ "X-Api-Key": APOLLO_API_KEY
547
+ },
548
+ json={
549
+ "sequence_id": sequence_id,
550
+ "contact_id": contact_id
551
+ },
552
+ timeout=10.0
553
+ )
554
+
555
+ if response.status_code in [200, 201]:
556
+ print(f"[INFO] Successfully added contact {contact_id} to sequence {sequence_id}")
557
+ return True
558
+ else:
559
+ error_data = response.text
560
+ print(f"[ERROR] Failed to add contact to sequence: {response.status_code} - {error_data}")
561
+ return False
562
+ except httpx.HTTPStatusError as e:
563
+ print(f"[ERROR] Apollo API HTTP error adding to sequence: {e.response.status_code} - {e.response.text}")
564
+ return False
565
+ except Exception as e:
566
+ print(f"[ERROR] Failed to add contact to sequence: {str(e)}")
567
+ return False
568
+
569
+
570
+ async def create_apollo_contact(
571
+ email: str,
572
+ first_name: Optional[str] = None,
573
+ last_name: Optional[str] = None,
574
+ organization_name: Optional[str] = None,
575
+ title: Optional[str] = None,
576
+ list_name: Optional[str] = None,
577
+ sequence_id: Optional[str] = None
578
+ ) -> bool:
579
+ """
580
+ Create a contact in Apollo.io and optionally add to a sequence or list.
581
+
582
+ Args:
583
+ email: Contact email address (required)
584
+ first_name: Contact first name
585
+ last_name: Contact last name
586
+ organization_name: Organization name
587
+ title: Job title
588
+ list_name: Name of the list to add contact to (defaults to APOLLO_TRIAL_LIST_NAME)
589
+ sequence_id: ID of the sequence to add contact to (preferred over list)
590
+
591
+ Returns:
592
+ True if contact created successfully, False otherwise
593
+
594
+ Raises:
595
+ ValueError: If APOLLO_API_KEY is not set
596
+ """
597
+ if not APOLLO_API_KEY:
598
+ print("[WARNING] APOLLO_API_KEY not set, skipping Apollo contact creation")
599
+ return False
600
+
601
+ # Use default list name if not provided
602
+ if list_name is None:
603
+ list_name = APOLLO_TRIAL_LIST_NAME
604
+
605
+ # Parse name if full name is provided but first/last are not
606
+ if not first_name and not last_name:
607
+ # Try to extract from email or use email prefix
608
+ email_prefix = email.split('@')[0]
609
+ if '.' in email_prefix:
610
+ parts = email_prefix.split('.')
611
+ first_name = parts[0].capitalize() if parts else None
612
+ last_name = parts[1].capitalize() if len(parts) > 1 else None
613
+ else:
614
+ first_name = email_prefix.capitalize()
615
+
616
+ # Extract organization domain from email
617
+ organization_domain = None
618
+ if '@' in email:
619
+ organization_domain = email.split('@')[1]
620
+
621
+ # Prepare contact data
622
+ contact_data: Dict[str, Any] = {
623
+ "email": email.lower(),
624
+ "run_dedupe": True # Prevent duplicate contacts
625
+ }
626
+
627
+ if first_name:
628
+ contact_data["first_name"] = first_name
629
+ if last_name:
630
+ contact_data["last_name"] = last_name
631
+ if organization_name:
632
+ contact_data["organization_name"] = organization_name
633
+ if organization_domain:
634
+ contact_data["organization_domain"] = organization_domain
635
+ if title:
636
+ contact_data["title"] = title
637
+
638
+ try:
639
+ async with httpx.AsyncClient() as client:
640
+ # Get the list ID if list_name is provided
641
+ list_ids = []
642
+ target_list_id = None # Store for later use
643
+ if list_name:
644
+ list_id = await get_list_id(list_name)
645
+ if list_id:
646
+ target_list_id = list_id # Store for verification later
647
+ # Apollo API accepts list_ids as an array of strings (hex IDs)
648
+ list_ids = [str(list_id)]
649
+ contact_data["list_ids"] = list_ids
650
+ print(f"[INFO] Adding contact to list ID: {list_id}")
651
+ else:
652
+ print(f"[WARNING] Could not find list '{list_name}'. Set APOLLO_TRIAL_LIST_ID environment variable with the list ID, or create contact without list assignment")
653
+
654
+ # Log the payload being sent (for debugging)
655
+ print(f"[DEBUG] Creating Apollo contact with payload: {contact_data}")
656
+
657
+ # Create the contact
658
+ response = await client.post(
659
+ f"{APOLLO_API_URL}/contacts",
660
+ headers={
661
+ "Content-Type": "application/json",
662
+ "Cache-Control": "no-cache",
663
+ "X-Api-Key": APOLLO_API_KEY
664
+ },
665
+ json=contact_data,
666
+ timeout=10.0
667
+ )
668
+
669
+ # Log the full response for debugging
670
+ print(f"[DEBUG] Apollo API response status: {response.status_code}")
671
+ try:
672
+ response_json = response.json()
673
+ print(f"[DEBUG] Apollo API response (full): {response_json}")
674
+ except:
675
+ print(f"[DEBUG] Apollo API response body (text): {response.text[:1000]}") # First 1000 chars
676
+
677
+ if response.status_code == 200 or response.status_code == 201:
678
+ result = response.json()
679
+ contact = result.get("contact", {})
680
+ contact_id = contact.get("id")
681
+ print(f"[INFO] Successfully created Apollo contact: {email} (ID: {contact_id})")
682
+
683
+ # Priority: Add to sequence if sequence_id is provided (this is supported by API)
684
+ target_sequence_id = sequence_id or APOLLO_TRIAL_SEQUENCE_ID
685
+ if contact_id and target_sequence_id:
686
+ print(f"[INFO] Adding contact to sequence: {target_sequence_id}")
687
+ sequence_success = await add_contact_to_sequence(contact_id, target_sequence_id)
688
+ if sequence_success:
689
+ print(f"[INFO] ✓ Contact successfully enrolled in sequence")
690
+ else:
691
+ print(f"[WARNING] Failed to add contact to sequence, but contact was created")
692
+
693
+ # Fallback: Try to add to list (API limitation - may not work)
694
+ if list_ids and contact_id and target_list_id and not target_sequence_id:
695
+ print(f"[INFO] Contact created with list_ids parameter: {list_ids}")
696
+ print(f"[INFO] ⚠️ Apollo.io API Limitation: The API does not return list_ids in responses,")
697
+ print(f"[INFO] so we cannot verify if the contact was added to the list via API.")
698
+ print(f"[INFO] Please verify manually in Apollo.io that contact '{email}' is in list '{list_name or target_list_id}'")
699
+ print(f"[INFO] Consider using sequences instead (APOLLO_TRIAL_SEQUENCE_ID) for better API support.")
700
+
701
+ return True
702
+ else:
703
+ error_data = response.text
704
+ print(f"[ERROR] Failed to create Apollo contact: {response.status_code} - {error_data}")
705
+ return False
706
+
707
+ except httpx.HTTPStatusError as e:
708
+ print(f"[ERROR] Apollo API HTTP error: {e.response.status_code} - {e.response.text}")
709
+ return False
710
+ except Exception as e:
711
+ print(f"[ERROR] Failed to create Apollo contact: {str(e)}")
712
+ return False
713
+
714
+
715
+ async def enrich_contact_by_email(email: str) -> Optional[Dict[str, Any]]:
716
+ """
717
+ Enrich contact data from Apollo.io using email address.
718
+
719
+ Args:
720
+ email: Contact email address
721
+
722
+ Returns:
723
+ Dictionary with enriched contact data, or None if not found
724
+ """
725
+ if not APOLLO_API_KEY:
726
+ print("[WARNING] APOLLO_API_KEY not set, skipping Apollo enrichment")
727
+ return None
728
+
729
+ try:
730
+ async with httpx.AsyncClient() as client:
731
+ # Try people/match endpoint first (for exact email match)
732
+ print(f"[DEBUG] Attempting Apollo.io enrichment for {email} via /people/match endpoint")
733
+ response = await client.post(
734
+ f"{APOLLO_API_URL}/people/match",
735
+ headers={
736
+ "Content-Type": "application/json",
737
+ "Cache-Control": "no-cache",
738
+ "X-Api-Key": APOLLO_API_KEY
739
+ },
740
+ json={
741
+ "email": email.lower()
742
+ # Note: reveal_phone_number requires webhook_url, so we skip it for now
743
+ },
744
+ timeout=10.0
745
+ )
746
+
747
+ print(f"[DEBUG] Apollo.io /people/match response status: {response.status_code}")
748
+
749
+ if response.status_code == 200:
750
+ data = response.json()
751
+ print(f"[DEBUG] Apollo.io /people/match response data keys: {list(data.keys())}")
752
+ person = data.get("person", {})
753
+ if person:
754
+ print(f"[DEBUG] Found person data in Apollo.io response")
755
+ # Extract enriched data
756
+ enriched_data = {
757
+ "first_name": person.get("first_name"),
758
+ "last_name": person.get("last_name"),
759
+ "title": person.get("title"),
760
+ "phone_number": person.get("phone_numbers", [{}])[0].get("raw_number") if person.get("phone_numbers") else None,
761
+ "linkedin_url": person.get("linkedin_url"),
762
+ "headline": person.get("headline"),
763
+ "organization_name": person.get("organization", {}).get("name") if person.get("organization") else None,
764
+ "organization_website": person.get("organization", {}).get("website_url") if person.get("organization") else None,
765
+ "organization_address": None, # May need to parse from organization data
766
+ }
767
+
768
+ # Try to get organization address
769
+ if person.get("organization"):
770
+ org = person.get("organization", {})
771
+ address_parts = []
772
+ if org.get("street_address"):
773
+ address_parts.append(org.get("street_address"))
774
+ if org.get("city"):
775
+ address_parts.append(org.get("city"))
776
+ if org.get("state"):
777
+ address_parts.append(org.get("state"))
778
+ if org.get("postal_code"):
779
+ address_parts.append(org.get("postal_code"))
780
+ if org.get("country"):
781
+ address_parts.append(org.get("country"))
782
+ if address_parts:
783
+ enriched_data["organization_address"] = ", ".join(address_parts)
784
+
785
+ print(f"[INFO] Successfully enriched contact data for {email} from Apollo.io")
786
+ return enriched_data
787
+ else:
788
+ print(f"[DEBUG] Apollo.io /people/match returned 200 but no person data found")
789
+ elif response.status_code == 404:
790
+ print(f"[DEBUG] Apollo.io /people/match returned 404 - contact not found in database")
791
+ elif response.status_code == 401:
792
+ print(f"[ERROR] Apollo.io API authentication failed - check your API key")
793
+ try:
794
+ error_data = response.json()
795
+ print(f"[ERROR] Apollo.io error details: {error_data}")
796
+ except:
797
+ print(f"[ERROR] Apollo.io error response: {response.text}")
798
+ else:
799
+ print(f"[DEBUG] Apollo.io /people/match returned status {response.status_code}")
800
+ try:
801
+ error_data = response.json()
802
+ print(f"[DEBUG] Apollo.io response: {error_data}")
803
+ except:
804
+ print(f"[DEBUG] Apollo.io response text: {response.text[:500]}")
805
+
806
+ # If match fails, try the new search endpoint (api_search)
807
+ print(f"[DEBUG] Attempting Apollo.io enrichment for {email} via /mixed_people/api_search endpoint")
808
+ search_response = await client.post(
809
+ f"{APOLLO_API_URL}/mixed_people/api_search",
810
+ headers={
811
+ "Content-Type": "application/json",
812
+ "Cache-Control": "no-cache",
813
+ "X-Api-Key": APOLLO_API_KEY
814
+ },
815
+ json={
816
+ "email": email.lower(),
817
+ "per_page": 1
818
+ },
819
+ timeout=10.0
820
+ )
821
+
822
+ print(f"[DEBUG] Apollo.io /mixed_people/api_search response status: {search_response.status_code}")
823
+
824
+ if search_response.status_code == 200:
825
+ search_data = search_response.json()
826
+ print(f"[DEBUG] Apollo.io /mixed_people/api_search response data keys: {list(search_data.keys())}")
827
+ people = search_data.get("people", [])
828
+ print(f"[DEBUG] Found {len(people)} people in search results")
829
+ if people:
830
+ person = people[0]
831
+ # Extract enriched data (same structure as above)
832
+ enriched_data = {
833
+ "first_name": person.get("first_name"),
834
+ "last_name": person.get("last_name"),
835
+ "title": person.get("title"),
836
+ "phone_number": person.get("phone_numbers", [{}])[0].get("raw_number") if person.get("phone_numbers") else None,
837
+ "linkedin_url": person.get("linkedin_url"),
838
+ "headline": person.get("headline"),
839
+ "organization_name": person.get("organization", {}).get("name") if person.get("organization") else None,
840
+ "organization_website": person.get("organization", {}).get("website_url") if person.get("organization") else None,
841
+ "organization_address": None,
842
+ }
843
+
844
+ if person.get("organization"):
845
+ org = person.get("organization", {})
846
+ address_parts = []
847
+ if org.get("street_address"):
848
+ address_parts.append(org.get("street_address"))
849
+ if org.get("city"):
850
+ address_parts.append(org.get("city"))
851
+ if org.get("state"):
852
+ address_parts.append(org.get("state"))
853
+ if org.get("postal_code"):
854
+ address_parts.append(org.get("postal_code"))
855
+ if org.get("country"):
856
+ address_parts.append(org.get("country"))
857
+ if address_parts:
858
+ enriched_data["organization_address"] = ", ".join(address_parts)
859
+
860
+ print(f"[INFO] Successfully enriched contact data for {email} from Apollo.io (via search)")
861
+ return enriched_data
862
+ else:
863
+ print(f"[DEBUG] Apollo.io /mixed_people/api_search returned 200 but no people in results")
864
+ elif search_response.status_code == 404:
865
+ print(f"[DEBUG] Apollo.io /mixed_people/api_search returned 404 - contact not found")
866
+ elif search_response.status_code == 401:
867
+ print(f"[ERROR] Apollo.io API authentication failed on search - check your API key")
868
+ try:
869
+ error_data = search_response.json()
870
+ print(f"[ERROR] Apollo.io search error details: {error_data}")
871
+ except:
872
+ print(f"[ERROR] Apollo.io search error response: {search_response.text}")
873
+ else:
874
+ print(f"[DEBUG] Apollo.io /mixed_people/api_search returned status {search_response.status_code}")
875
+ try:
876
+ error_data = search_response.json()
877
+ print(f"[DEBUG] Apollo.io search response: {error_data}")
878
+ except:
879
+ print(f"[DEBUG] Apollo.io search response text: {search_response.text[:500]}")
880
+
881
+ print(f"[INFO] No contact data found in Apollo.io for {email} - contact may not exist in Apollo's database")
882
+ return None
883
+
884
+ except httpx.HTTPStatusError as e:
885
+ print(f"[ERROR] Apollo API HTTP error during enrichment: {e.response.status_code} - {e.response.text}")
886
+ return None
887
+ except Exception as e:
888
+ print(f"[ERROR] Failed to enrich contact from Apollo.io: {str(e)}")
889
+ return None
890
+
891
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
backend/app/auth.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import jwt
3
  from datetime import datetime, timedelta
@@ -90,3 +91,97 @@ def get_current_user(
90
 
91
  return user
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import os
3
  import jwt
4
  from datetime import datetime, timedelta
 
91
 
92
  return user
93
 
94
+ =======
95
+ import os
96
+ import jwt
97
+ from datetime import datetime, timedelta
98
+ from typing import Optional
99
+ from fastapi import Depends, HTTPException, status
100
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
101
+ from sqlalchemy.orm import Session
102
+ from .db import SessionLocal
103
+ from .models import User
104
+
105
+ # JWT Configuration
106
+ SECRET_KEY = os.environ.get("JWT_SECRET_KEY", "your-secret-key-change-in-production")
107
+ ALGORITHM = "HS256"
108
+ ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days
109
+
110
+ security = HTTPBearer()
111
+
112
+
113
+ def get_db():
114
+ """Database dependency."""
115
+ db = SessionLocal()
116
+ try:
117
+ yield db
118
+ finally:
119
+ db.close()
120
+
121
+
122
+ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
123
+ """Create a JWT access token."""
124
+ to_encode = data.copy()
125
+ # Ensure 'sub' (subject) is a string, not an integer
126
+ if "sub" in to_encode:
127
+ to_encode["sub"] = str(to_encode["sub"])
128
+ if expires_delta:
129
+ expire = datetime.utcnow() + expires_delta
130
+ else:
131
+ expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
132
+ to_encode.update({"exp": expire})
133
+ encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
134
+ return encoded_jwt
135
+
136
+
137
+ def verify_token(token: str) -> dict:
138
+ """Verify and decode a JWT token."""
139
+ try:
140
+ payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
141
+ return payload
142
+ except jwt.ExpiredSignatureError:
143
+ raise HTTPException(
144
+ status_code=status.HTTP_401_UNAUTHORIZED,
145
+ detail="Token has expired",
146
+ )
147
+ except jwt.InvalidTokenError:
148
+ raise HTTPException(
149
+ status_code=status.HTTP_401_UNAUTHORIZED,
150
+ detail="Could not validate credentials",
151
+ )
152
+
153
+
154
+ def get_current_user(
155
+ credentials: HTTPAuthorizationCredentials = Depends(security),
156
+ db: Session = Depends(get_db)
157
+ ) -> User:
158
+ """Get the current authenticated user from JWT token."""
159
+ token = credentials.credentials
160
+ payload = verify_token(token)
161
+ user_id: int = payload.get("sub")
162
+
163
+ if user_id is None:
164
+ raise HTTPException(
165
+ status_code=status.HTTP_401_UNAUTHORIZED,
166
+ detail="Could not validate credentials",
167
+ )
168
+
169
+ # Convert user_id back to integer for database query
170
+ try:
171
+ user_id_int = int(user_id)
172
+ except (ValueError, TypeError):
173
+ raise HTTPException(
174
+ status_code=status.HTTP_401_UNAUTHORIZED,
175
+ detail="Invalid user ID in token",
176
+ )
177
+
178
+ user = db.query(User).filter(User.id == user_id_int).first()
179
+ if user is None:
180
+ raise HTTPException(
181
+ status_code=status.HTTP_401_UNAUTHORIZED,
182
+ detail="User not found",
183
+ )
184
+
185
+ return user
186
+
187
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
backend/app/auth_routes.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  from fastapi import APIRouter, Depends, HTTPException, Body
3
  from pydantic import BaseModel, EmailStr
@@ -345,3 +346,241 @@ async def delete_api_key(
345
  "message": "API key deactivated successfully"
346
  }
347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import os
3
  from fastapi import APIRouter, Depends, HTTPException, Body
4
  from pydantic import BaseModel, EmailStr
 
346
  "message": "API key deactivated successfully"
347
  }
348
 
349
+ =======
350
+ import os
351
+ from fastapi import APIRouter, Depends, HTTPException, Body
352
+ from pydantic import BaseModel, EmailStr
353
+ from sqlalchemy.orm import Session
354
+ from .models import User
355
+ from .auth import create_access_token, get_current_user
356
+ from .firebase_auth import verify_firebase_token
357
+ from .otp_service import request_otp, verify_otp
358
+ from .email_validator import validate_business_email, is_business_email
359
+ from .db import SessionLocal
360
+
361
+ def get_db():
362
+ """Database dependency."""
363
+ db = SessionLocal()
364
+ try:
365
+ yield db
366
+ finally:
367
+ db.close()
368
+
369
+ router = APIRouter()
370
+
371
+
372
+ class FirebaseLoginRequest(BaseModel):
373
+ id_token: str
374
+
375
+
376
+ class OTPRequestRequest(BaseModel):
377
+ email: EmailStr
378
+
379
+
380
+ class OTPVerifyRequest(BaseModel):
381
+ email: EmailStr
382
+ otp: str
383
+
384
+
385
+ @router.post("/api/auth/firebase/login")
386
+ async def firebase_login(
387
+ request: FirebaseLoginRequest,
388
+ db: Session = Depends(get_db)
389
+ ):
390
+ """
391
+ Login with Firebase ID token.
392
+ Validates business email and creates/updates user.
393
+ """
394
+ try:
395
+ # Verify Firebase token
396
+ user_info = await verify_firebase_token(request.id_token)
397
+ email = user_info.get('email')
398
+
399
+ if not email:
400
+ raise HTTPException(status_code=400, detail="Email not found in Firebase token")
401
+
402
+ # Validate business email
403
+ if not is_business_email(email):
404
+ raise HTTPException(
405
+ status_code=400,
406
+ detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
407
+ )
408
+
409
+ # Get or create user
410
+ user = db.query(User).filter(
411
+ (User.email == email.lower()) | (User.firebase_uid == user_info['uid'])
412
+ ).first()
413
+
414
+ if not user:
415
+ user = User(
416
+ email=email.lower(),
417
+ name=user_info.get('name'),
418
+ picture=user_info.get('picture'),
419
+ firebase_uid=user_info['uid'],
420
+ auth_method='firebase',
421
+ email_verified=True
422
+ )
423
+ db.add(user)
424
+ db.commit()
425
+ db.refresh(user)
426
+ print(f"[INFO] New user created via Firebase: {email}")
427
+
428
+ # Enrich contact data from Apollo.io and update Brevo + Monday.com
429
+ try:
430
+ from .apollo_service import enrich_contact_by_email
431
+ from .brevo_service import create_brevo_contact, BREVO_TRIAL_LIST_ID
432
+ from .monday_service import create_monday_lead
433
+
434
+ # Enrich contact data from Apollo.io
435
+ enriched_data = await enrich_contact_by_email(email)
436
+
437
+ # Use enriched data if available, otherwise use basic data
438
+ first_name = enriched_data.get("first_name") if enriched_data else None
439
+ last_name = enriched_data.get("last_name") if enriched_data else None
440
+ org_name = enriched_data.get("organization_name") if enriched_data else None
441
+
442
+ # Fallback to Firebase data if Apollo didn't provide it
443
+ if not first_name or not last_name:
444
+ full_name = user_info.get('name', '')
445
+ if full_name:
446
+ name_parts = full_name.strip().split(' ', 1)
447
+ first_name = first_name or (name_parts[0] if name_parts else None)
448
+ last_name = last_name or (name_parts[1] if len(name_parts) > 1 else None)
449
+
450
+ if not org_name:
451
+ org_domain = email.split('@')[1] if '@' in email else None
452
+ org_name = org_domain.split('.')[0].capitalize() if org_domain else None
453
+
454
+ # Update Brevo contact with enriched data
455
+ await create_brevo_contact(
456
+ email=email,
457
+ first_name=first_name,
458
+ last_name=last_name,
459
+ organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
460
+ phone_number=enriched_data.get("phone_number") if enriched_data else None,
461
+ linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
462
+ title=enriched_data.get("title") if enriched_data else None,
463
+ headline=enriched_data.get("headline") if enriched_data else None,
464
+ organization_website=enriched_data.get("organization_website") if enriched_data else None,
465
+ organization_address=enriched_data.get("organization_address") if enriched_data else None,
466
+ list_id=BREVO_TRIAL_LIST_ID
467
+ )
468
+
469
+ # Create lead in Monday.com
470
+ await create_monday_lead(
471
+ email=email,
472
+ first_name=first_name,
473
+ last_name=last_name,
474
+ phone_number=enriched_data.get("phone_number") if enriched_data else None,
475
+ linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
476
+ title=enriched_data.get("title") if enriched_data else None,
477
+ headline=enriched_data.get("headline") if enriched_data else None,
478
+ organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
479
+ organization_website=enriched_data.get("organization_website") if enriched_data else None,
480
+ organization_address=enriched_data.get("organization_address") if enriched_data else None,
481
+ )
482
+ except Exception as e:
483
+ # Don't fail user creation if integrations fail
484
+ print(f"[WARNING] Failed to enrich/update contact for {email}: {str(e)}")
485
+ else:
486
+ # Update user info
487
+ user.firebase_uid = user_info['uid']
488
+ user.email_verified = True
489
+ user.name = user_info.get('name', user.name)
490
+ user.picture = user_info.get('picture', user.picture)
491
+ if user.auth_method != 'firebase':
492
+ user.auth_method = 'firebase'
493
+ db.commit()
494
+ print(f"[INFO] User logged in via Firebase: {email}")
495
+
496
+ # Generate JWT token
497
+ token = create_access_token(data={"sub": user.id})
498
+
499
+ return {
500
+ "token": token,
501
+ "user": {
502
+ "id": user.id,
503
+ "email": user.email,
504
+ "name": user.name,
505
+ "picture": user.picture,
506
+ "auth_method": user.auth_method
507
+ }
508
+ }
509
+ except HTTPException:
510
+ raise
511
+ except Exception as e:
512
+ print(f"[ERROR] Firebase login failed: {str(e)}")
513
+ raise HTTPException(status_code=400, detail=f"Authentication failed: {str(e)}")
514
+
515
+
516
+ @router.post("/api/auth/otp/request")
517
+ async def request_otp_endpoint(
518
+ request: OTPRequestRequest,
519
+ db: Session = Depends(get_db)
520
+ ):
521
+ """
522
+ Request OTP for email login.
523
+ Validates business email before sending OTP.
524
+ """
525
+ try:
526
+ # Validate business email
527
+ validate_business_email(request.email)
528
+
529
+ # Request OTP
530
+ result = await request_otp(request.email, db)
531
+ return result
532
+ except HTTPException:
533
+ raise
534
+ except Exception as e:
535
+ print(f"[ERROR] OTP request failed: {str(e)}")
536
+ raise HTTPException(status_code=500, detail=f"Failed to send OTP: {str(e)}")
537
+
538
+
539
+ @router.post("/api/auth/otp/verify")
540
+ async def verify_otp_endpoint(
541
+ request: OTPVerifyRequest,
542
+ db: Session = Depends(get_db)
543
+ ):
544
+ """
545
+ Verify OTP and login.
546
+ Validates business email and OTP code.
547
+ """
548
+ try:
549
+ # Validate business email
550
+ validate_business_email(request.email)
551
+
552
+ # Verify OTP
553
+ user = await verify_otp(request.email, request.otp, db)
554
+
555
+ # Generate JWT token
556
+ token = create_access_token(data={"sub": user.id})
557
+
558
+ return {
559
+ "token": token,
560
+ "user": {
561
+ "id": user.id,
562
+ "email": user.email,
563
+ "name": user.name,
564
+ "picture": user.picture,
565
+ "auth_method": user.auth_method
566
+ }
567
+ }
568
+ except HTTPException:
569
+ raise
570
+ except Exception as e:
571
+ print(f"[ERROR] OTP verification failed: {str(e)}")
572
+ raise HTTPException(status_code=400, detail=f"OTP verification failed: {str(e)}")
573
+
574
+
575
+ @router.get("/api/auth/me")
576
+ async def get_current_user_info(current_user: User = Depends(get_current_user)):
577
+ """Get current user information."""
578
+ return {
579
+ "id": current_user.id,
580
+ "email": current_user.email,
581
+ "name": current_user.name,
582
+ "picture": current_user.picture,
583
+ "auth_method": current_user.auth_method,
584
+ }
585
+
586
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
backend/app/email_validator.py CHANGED
@@ -1,3 +1,4 @@
 
1
  """
2
  Email validation utilities to ensure only business emails are allowed.
3
  """
@@ -59,3 +60,66 @@ def validate_business_email(email: str) -> None:
59
  detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
60
  )
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  """
3
  Email validation utilities to ensure only business emails are allowed.
4
  """
 
60
  detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
61
  )
62
 
63
+ =======
64
+ """
65
+ Email validation utilities to ensure only business emails are allowed.
66
+ """
67
+ from fastapi import HTTPException
68
+
69
+ # List of personal email domains to block
70
+ PERSONAL_EMAIL_DOMAINS = {
71
+ 'gmail.com', 'yahoo.com', 'hotmail.com', 'outlook.com',
72
+ 'aol.com', 'icloud.com', 'mail.com', 'protonmail.com',
73
+ 'yandex.com', 'zoho.com', 'gmx.com', 'live.com', 'msn.com',
74
+ 'me.com', 'mac.com', 'yahoo.co.uk', 'yahoo.co.jp', 'yahoo.fr',
75
+ 'yahoo.de', 'yahoo.it', 'yahoo.es', 'yahoo.in', 'yahoo.com.au',
76
+ 'gmail.co.uk', 'gmail.fr', 'gmail.de', 'gmail.it', 'gmail.es',
77
+ 'gmail.in', 'gmail.com.au', 'hotmail.co.uk', 'hotmail.fr',
78
+ 'hotmail.de', 'hotmail.it', 'hotmail.es', 'outlook.co.uk',
79
+ 'outlook.fr', 'outlook.de', 'outlook.it', 'outlook.es',
80
+ 'rediffmail.com', 'sina.com', 'qq.com', '163.com', '126.com',
81
+ 'mail.ru', 'inbox.com', 'fastmail.com', 'tutanota.com',
82
+ 'hey.com', 'pm.me'
83
+ }
84
+
85
+
86
+ def is_business_email(email: str) -> bool:
87
+ """
88
+ Check if email is a business email (not personal).
89
+
90
+ Args:
91
+ email: Email address to validate
92
+
93
+ Returns:
94
+ True if business email, False if personal email
95
+ """
96
+ if not email or '@' not in email:
97
+ return False
98
+
99
+ domain = email.split('@')[1].lower().strip()
100
+ return domain not in PERSONAL_EMAIL_DOMAINS
101
+
102
+
103
+ def validate_business_email(email: str) -> None:
104
+ """
105
+ Raise exception if email is not a business email.
106
+
107
+ Args:
108
+ email: Email address to validate
109
+
110
+ Raises:
111
+ HTTPException: If email is a personal email domain
112
+ """
113
+ if not email:
114
+ raise HTTPException(
115
+ status_code=400,
116
+ detail="Email address is required"
117
+ )
118
+
119
+ if not is_business_email(email):
120
+ raise HTTPException(
121
+ status_code=400,
122
+ detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
123
+ )
124
+
125
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
backend/app/firebase_auth.py CHANGED
@@ -1,3 +1,4 @@
 
1
  """
2
  Firebase Authentication utilities.
3
  """
@@ -90,3 +91,97 @@ async def verify_firebase_token(id_token: str) -> dict:
90
  detail=f"Firebase authentication failed: {str(e)}"
91
  )
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  """
3
  Firebase Authentication utilities.
4
  """
 
91
  detail=f"Firebase authentication failed: {str(e)}"
92
  )
93
 
94
+ =======
95
+ """
96
+ Firebase Authentication utilities.
97
+ """
98
+ import os
99
+ import json
100
+ import firebase_admin
101
+ from firebase_admin import auth, credentials
102
+ from fastapi import HTTPException
103
+
104
+ # Initialize Firebase Admin SDK
105
+ _firebase_initialized = False
106
+
107
+ def initialize_firebase():
108
+ """Initialize Firebase Admin SDK."""
109
+ global _firebase_initialized
110
+
111
+ if _firebase_initialized:
112
+ return
113
+
114
+ if not firebase_admin._apps:
115
+ # Try to get service account from environment variable (JSON string)
116
+ service_account_json = os.environ.get("FIREBASE_SERVICE_ACCOUNT_JSON")
117
+
118
+ if service_account_json:
119
+ try:
120
+ service_account_info = json.loads(service_account_json)
121
+ cred = credentials.Certificate(service_account_info)
122
+ firebase_admin.initialize_app(cred)
123
+ _firebase_initialized = True
124
+ print("[INFO] Firebase Admin SDK initialized from environment variable")
125
+ return
126
+ except json.JSONDecodeError:
127
+ print("[WARNING] Failed to parse FIREBASE_SERVICE_ACCOUNT_JSON")
128
+
129
+ # Try to get service account from file path
130
+ service_account_path = os.environ.get("FIREBASE_SERVICE_ACCOUNT_KEY")
131
+ if service_account_path and os.path.exists(service_account_path):
132
+ cred = credentials.Certificate(service_account_path)
133
+ firebase_admin.initialize_app(cred)
134
+ _firebase_initialized = True
135
+ print(f"[INFO] Firebase Admin SDK initialized from file: {service_account_path}")
136
+ return
137
+
138
+ # Try to use default credentials (for Google Cloud environments)
139
+ try:
140
+ firebase_admin.initialize_app()
141
+ _firebase_initialized = True
142
+ print("[INFO] Firebase Admin SDK initialized with default credentials")
143
+ return
144
+ except Exception as e:
145
+ print(f"[WARNING] Firebase initialization failed: {e}")
146
+ raise HTTPException(
147
+ status_code=500,
148
+ detail="Firebase not configured. Please set FIREBASE_SERVICE_ACCOUNT_JSON or FIREBASE_SERVICE_ACCOUNT_KEY environment variable."
149
+ )
150
+
151
+
152
+ async def verify_firebase_token(id_token: str) -> dict:
153
+ """
154
+ Verify Firebase ID token and return user info.
155
+
156
+ Args:
157
+ id_token: Firebase ID token from client
158
+
159
+ Returns:
160
+ Dictionary with user information (uid, email, name, picture)
161
+
162
+ Raises:
163
+ HTTPException: If token is invalid
164
+ """
165
+ initialize_firebase()
166
+
167
+ try:
168
+ decoded_token = auth.verify_id_token(id_token)
169
+
170
+ return {
171
+ 'uid': decoded_token['uid'],
172
+ 'email': decoded_token.get('email'),
173
+ 'name': decoded_token.get('name'),
174
+ 'picture': decoded_token.get('picture'),
175
+ }
176
+ except ValueError as e:
177
+ raise HTTPException(
178
+ status_code=401,
179
+ detail=f"Invalid Firebase token: {str(e)}"
180
+ )
181
+ except Exception as e:
182
+ raise HTTPException(
183
+ status_code=401,
184
+ detail=f"Firebase authentication failed: {str(e)}"
185
+ )
186
+
187
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
backend/app/main.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import time
3
  from typing import List, Dict, Optional
@@ -784,3 +785,747 @@ if os.path.isdir(frontend_dir):
784
  return FileResponse(index_path)
785
  from fastapi import HTTPException
786
  raise HTTPException(status_code=404)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import os
3
  import time
4
  from typing import List, Dict, Optional
 
785
  return FileResponse(index_path)
786
  from fastapi import HTTPException
787
  raise HTTPException(status_code=404)
788
+ =======
789
+ import os
790
+ import time
791
+ from typing import List, Dict, Optional
792
+
793
+ from fastapi import FastAPI, UploadFile, File, Depends, Form, HTTPException, Body
794
+ from fastapi.middleware.cors import CORSMiddleware
795
+ from fastapi.staticfiles import StaticFiles
796
+ from sqlalchemy.orm import Session
797
+ from pydantic import BaseModel
798
+
799
+ from .db import Base, engine, SessionLocal
800
+ from .models import ExtractionRecord, User, ShareToken
801
+ from .schemas import ExtractionRecordBase, ExtractionStage
802
+ from .openrouter_client import extract_fields_from_document
803
+ from .auth import get_current_user, get_db
804
+ from .auth_routes import router as auth_router
805
+
806
+ # Allowed file types
807
+ ALLOWED_CONTENT_TYPES = [
808
+ "application/pdf",
809
+ "image/png",
810
+ "image/jpeg",
811
+ "image/jpg",
812
+ "image/tiff",
813
+ "image/tif"
814
+ ]
815
+
816
+ # Allowed file extensions (for fallback validation)
817
+ ALLOWED_EXTENSIONS = [".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"]
818
+
819
+ # Maximum file size: 4 MB
820
+ MAX_FILE_SIZE = 4 * 1024 * 1024 # 4 MB in bytes
821
+
822
+ # Ensure data dir exists for SQLite
823
+ os.makedirs("data", exist_ok=True)
824
+
825
+ # Create tables
826
+ Base.metadata.create_all(bind=engine)
827
+
828
+ app = FastAPI(title="Document Capture Demo – Backend")
829
+
830
+ # Include auth routes
831
+ app.include_router(auth_router)
832
+
833
+ # CORS (for safety we allow all; you can tighten later)
834
+ app.add_middleware(
835
+ CORSMiddleware,
836
+ allow_origins=["*"],
837
+ allow_credentials=True,
838
+ allow_methods=["*"],
839
+ allow_headers=["*"],
840
+ )
841
+
842
+
843
+ def get_db():
844
+ db = SessionLocal()
845
+ try:
846
+ yield db
847
+ finally:
848
+ db.close()
849
+
850
+
851
+ @app.get("/ping")
852
+ def ping():
853
+ """Healthcheck."""
854
+ return {"status": "ok", "message": "backend alive"}
855
+
856
+
857
+ def make_stages(total_ms: int, status: str) -> Dict[str, ExtractionStage]:
858
+ """
859
+ Build synthetic stage timing data for the History UI.
860
+ For now we just split total_ms into 4 stages.
861
+ """
862
+ if total_ms <= 0:
863
+ total_ms = 1000
864
+
865
+ return {
866
+ "uploading": ExtractionStage(
867
+ time=int(total_ms * 0.15),
868
+ status="completed",
869
+ variation="normal",
870
+ ),
871
+ "aiAnalysis": ExtractionStage(
872
+ time=int(total_ms * 0.55),
873
+ status="completed" if status == "completed" else "failed",
874
+ variation="normal",
875
+ ),
876
+ "dataExtraction": ExtractionStage(
877
+ time=int(total_ms * 0.2),
878
+ status="completed" if status == "completed" else "skipped",
879
+ variation="fast",
880
+ ),
881
+ "outputRendering": ExtractionStage(
882
+ time=int(total_ms * 0.1),
883
+ status="completed" if status == "completed" else "skipped",
884
+ variation="normal",
885
+ ),
886
+ }
887
+
888
+
889
+ @app.post("/api/extract")
890
+ async def extract_document(
891
+ file: UploadFile = File(...),
892
+ key_fields: Optional[str] = Form(None),
893
+ db: Session = Depends(get_db),
894
+ current_user: User = Depends(get_current_user),
895
+ ):
896
+ """
897
+ Main extraction endpoint used by the Dashboard.
898
+ 1) Read the uploaded file
899
+ 2) Call OpenRouter + Qwen3-VL
900
+ 3) Store a record in SQLite
901
+ 4) Return extraction result + metadata
902
+ """
903
+ start = time.time()
904
+ content = await file.read()
905
+ content_type = file.content_type or "application/octet-stream"
906
+ file_size = len(content)
907
+ size_mb = file_size / 1024 / 1024
908
+ size_str = f"{size_mb:.2f} MB"
909
+
910
+ # Convert file content to base64 for storage
911
+ import base64
912
+ file_base64 = base64.b64encode(content).decode("utf-8")
913
+
914
+ # Validate file size
915
+ if file_size > MAX_FILE_SIZE:
916
+ raise HTTPException(
917
+ status_code=400,
918
+ detail=f"File size exceeds 4 MB limit. Your file is {size_mb:.2f} MB."
919
+ )
920
+
921
+ # Validate file type
922
+ file_extension = ""
923
+ if file.filename:
924
+ file_extension = "." + file.filename.split(".")[-1].lower()
925
+
926
+ is_valid_type = (
927
+ content_type in ALLOWED_CONTENT_TYPES or
928
+ file_extension in ALLOWED_EXTENSIONS
929
+ )
930
+
931
+ if not is_valid_type:
932
+ raise HTTPException(
933
+ status_code=400,
934
+ detail="Only PDF, PNG, JPG, and TIFF files are allowed."
935
+ )
936
+
937
+ try:
938
+ print(f"[INFO] Starting extraction for file: {file.filename}, type: {content_type}, size: {size_str}")
939
+ if key_fields:
940
+ print(f"[INFO] Key fields requested: {key_fields}")
941
+ extracted = await extract_fields_from_document(content, content_type, file.filename, key_fields)
942
+ total_ms = int((time.time() - start) * 1000)
943
+
944
+ print(f"[INFO] Extraction completed. Response keys: {list(extracted.keys())}")
945
+ print(f"[INFO] Fields extracted: {extracted.get('fields', {})}")
946
+
947
+ confidence = float(extracted.get("confidence", 90))
948
+ fields = extracted.get("fields", {})
949
+
950
+ # Get Fields from root level (if user provided key_fields)
951
+ root_fields = extracted.get("Fields", {})
952
+
953
+ # Get full_text for text output
954
+ full_text = extracted.get("full_text", "")
955
+ if full_text:
956
+ full_text_words = len(str(full_text).split())
957
+ print(f"[INFO] Full text extracted: {full_text_words} words")
958
+
959
+ # Check if fields contain structured data (from table parsing)
960
+ # If fields is a dict with page_X keys, it's already structured
961
+ # If fields is empty or simple, add full_text and pages for text display
962
+ if not fields or (isinstance(fields, dict) and not any(k.startswith("page_") for k in fields.keys())):
963
+ if full_text:
964
+ fields["full_text"] = full_text
965
+
966
+ # Also check for pages array
967
+ pages_data = extracted.get("pages", [])
968
+ if pages_data and isinstance(pages_data, list):
969
+ print(f"[INFO] Extracted text from {len(pages_data)} page(s)")
970
+ fields["pages"] = pages_data
971
+
972
+ # Add Fields at root level if it exists
973
+ if root_fields:
974
+ fields["Fields"] = root_fields
975
+
976
+ # Count fields - if structured data exists, count table rows + root Fields
977
+ if isinstance(fields, dict):
978
+ # Check if it's structured page data
979
+ if any(k.startswith("page_") for k in fields.keys()):
980
+ # Count table rows from all pages
981
+ table_rows_count = 0
982
+ for page_key, page_data in fields.items():
983
+ if page_key.startswith("page_") and isinstance(page_data, dict):
984
+ table_rows = page_data.get("table", [])
985
+ if isinstance(table_rows, list):
986
+ table_rows_count += len(table_rows)
987
+
988
+ # Count Fields from root level
989
+ fields_keys = 0
990
+ if isinstance(root_fields, dict):
991
+ fields_keys = len(root_fields)
992
+
993
+ fields_extracted = table_rows_count + fields_keys
994
+ print(f"[INFO] Structured data: {table_rows_count} table rows, {fields_keys} extracted fields")
995
+ else:
996
+ # Regular fields count (excluding full_text, pages, and Fields)
997
+ fields_extracted = len([k for k in fields.keys() if k not in ["full_text", "pages", "Fields"]])
998
+ # Add Fields count if it exists
999
+ if isinstance(root_fields, dict):
1000
+ fields_extracted += len(root_fields)
1001
+ else:
1002
+ fields_extracted = 0
1003
+
1004
+ print(f"[INFO] Final stats - confidence: {confidence}, fields_count: {fields_extracted}")
1005
+
1006
+ status = "completed"
1007
+ error_message = None
1008
+ except Exception as e:
1009
+ import traceback
1010
+ total_ms = int((time.time() - start) * 1000)
1011
+ confidence = 0.0
1012
+ fields = {}
1013
+ fields_extracted = 0
1014
+ status = "failed"
1015
+ error_message = str(e)
1016
+ print(f"[ERROR] Extraction failed: {error_message}")
1017
+ print(f"[ERROR] Traceback: {traceback.format_exc()}")
1018
+
1019
+ # Save record to DB
1020
+ import json
1021
+ import base64
1022
+ rec = ExtractionRecord(
1023
+ user_id=current_user.id,
1024
+ file_name=file.filename,
1025
+ file_type=content_type,
1026
+ file_size=size_str,
1027
+ status=status,
1028
+ confidence=confidence,
1029
+ fields_extracted=fields_extracted,
1030
+ total_time_ms=total_ms,
1031
+ raw_output=json.dumps(fields), # Use JSON instead of str() to preserve structure
1032
+ file_base64=file_base64, # Store base64 encoded file for preview
1033
+ error_message=error_message,
1034
+ )
1035
+ db.add(rec)
1036
+ db.commit()
1037
+ db.refresh(rec)
1038
+
1039
+ stages = make_stages(total_ms, status)
1040
+
1041
+ # Response shape that frontend will consume
1042
+ return {
1043
+ "id": rec.id,
1044
+ "fileName": rec.file_name,
1045
+ "fileType": rec.file_type,
1046
+ "fileSize": rec.file_size,
1047
+ "status": status,
1048
+ "confidence": confidence,
1049
+ "fieldsExtracted": fields_extracted,
1050
+ "totalTime": total_ms,
1051
+ "fields": fields,
1052
+ "stages": {k: v.dict() for k, v in stages.items()},
1053
+ "errorMessage": error_message,
1054
+ }
1055
+
1056
+
1057
+ @app.get("/api/history", response_model=List[ExtractionRecordBase])
1058
+ def get_history(
1059
+ db: Session = Depends(get_db),
1060
+ current_user: User = Depends(get_current_user),
1061
+ ):
1062
+ """
1063
+ Used by the History page.
1064
+ Returns last 100 records for the current user, with synthetic stage data.
1065
+ """
1066
+ recs = (
1067
+ db.query(ExtractionRecord)
1068
+ .filter(ExtractionRecord.user_id == current_user.id)
1069
+ .order_by(ExtractionRecord.created_at.desc())
1070
+ .limit(100)
1071
+ .all()
1072
+ )
1073
+
1074
+ # Deduplicate: if multiple extractions share the same shared_from_extraction_id,
1075
+ # keep only the most recent one (to prevent duplicates when same extraction is shared multiple times)
1076
+ seen_shared_ids = set()
1077
+ deduplicated_recs = []
1078
+ for rec in recs:
1079
+ if rec.shared_from_extraction_id:
1080
+ # This is a shared extraction
1081
+ if rec.shared_from_extraction_id not in seen_shared_ids:
1082
+ seen_shared_ids.add(rec.shared_from_extraction_id)
1083
+ deduplicated_recs.append(rec)
1084
+ # Skip duplicates
1085
+ else:
1086
+ # Original extraction (not shared), always include
1087
+ deduplicated_recs.append(rec)
1088
+
1089
+ recs = deduplicated_recs
1090
+
1091
+ output: List[ExtractionRecordBase] = []
1092
+ for r in recs:
1093
+ stages = make_stages(r.total_time_ms or 1000, r.status or "completed")
1094
+ output.append(
1095
+ ExtractionRecordBase(
1096
+ id=r.id,
1097
+ fileName=r.file_name,
1098
+ fileType=r.file_type or "",
1099
+ fileSize=r.file_size or "",
1100
+ extractedAt=r.created_at,
1101
+ status=r.status or "completed",
1102
+ confidence=r.confidence or 0.0,
1103
+ fieldsExtracted=r.fields_extracted or 0,
1104
+ totalTime=r.total_time_ms or 0,
1105
+ stages=stages,
1106
+ errorMessage=r.error_message,
1107
+ )
1108
+ )
1109
+ return output
1110
+
1111
+
1112
+ @app.get("/api/extraction/{extraction_id}")
1113
+ def get_extraction(
1114
+ extraction_id: int,
1115
+ db: Session = Depends(get_db),
1116
+ current_user: User = Depends(get_current_user),
1117
+ ):
1118
+ """
1119
+ Get a specific extraction by ID with full fields data.
1120
+ Used when viewing output from History page.
1121
+ """
1122
+ import json
1123
+
1124
+ rec = (
1125
+ db.query(ExtractionRecord)
1126
+ .filter(
1127
+ ExtractionRecord.id == extraction_id,
1128
+ ExtractionRecord.user_id == current_user.id
1129
+ )
1130
+ .first()
1131
+ )
1132
+
1133
+ if not rec:
1134
+ from fastapi import HTTPException
1135
+ raise HTTPException(status_code=404, detail="Extraction not found")
1136
+
1137
+ # Parse the raw_output JSON string back to dict
1138
+ fields = {}
1139
+ if rec.raw_output:
1140
+ try:
1141
+ # Try parsing as JSON first (new format)
1142
+ fields = json.loads(rec.raw_output)
1143
+ except (json.JSONDecodeError, TypeError):
1144
+ # If that fails, try using ast.literal_eval for old str() format (backward compatibility)
1145
+ try:
1146
+ import ast
1147
+ # Only use literal_eval if it looks like a Python dict string
1148
+ if rec.raw_output.strip().startswith('{'):
1149
+ fields = ast.literal_eval(rec.raw_output)
1150
+ else:
1151
+ fields = {}
1152
+ except:
1153
+ fields = {}
1154
+
1155
+ stages = make_stages(rec.total_time_ms or 1000, rec.status or "completed")
1156
+
1157
+ return {
1158
+ "id": rec.id,
1159
+ "fileName": rec.file_name,
1160
+ "fileType": rec.file_type or "",
1161
+ "fileSize": rec.file_size or "",
1162
+ "status": rec.status or "completed",
1163
+ "confidence": rec.confidence or 0.0,
1164
+ "fieldsExtracted": rec.fields_extracted or 0,
1165
+ "totalTime": rec.total_time_ms or 0,
1166
+ "fields": fields,
1167
+ "fileBase64": rec.file_base64, # Include base64 encoded file for preview
1168
+ "stages": {k: v.dict() for k, v in stages.items()},
1169
+ "errorMessage": rec.error_message,
1170
+ }
1171
+
1172
+
1173
+ @app.post("/api/share")
1174
+ async def share_extraction(
1175
+ extraction_id: int = Body(...),
1176
+ recipient_emails: List[str] = Body(...),
1177
+ db: Session = Depends(get_db),
1178
+ current_user: User = Depends(get_current_user),
1179
+ ):
1180
+ """
1181
+ Share an extraction with one or more users via email.
1182
+ Creates share tokens and sends emails to recipients.
1183
+ """
1184
+ import secrets
1185
+ from datetime import datetime, timedelta
1186
+ from .brevo_service import send_share_email
1187
+ from .email_validator import validate_business_email
1188
+
1189
+ # Validate recipient emails list
1190
+ if not recipient_emails or len(recipient_emails) == 0:
1191
+ raise HTTPException(status_code=400, detail="At least one recipient email is required")
1192
+
1193
+ # Validate each recipient email is a business email
1194
+ for email in recipient_emails:
1195
+ try:
1196
+ validate_business_email(email)
1197
+ except HTTPException:
1198
+ raise # Re-raise HTTPException from validate_business_email
1199
+
1200
+ # Get the extraction record
1201
+ extraction = (
1202
+ db.query(ExtractionRecord)
1203
+ .filter(
1204
+ ExtractionRecord.id == extraction_id,
1205
+ ExtractionRecord.user_id == current_user.id
1206
+ )
1207
+ .first()
1208
+ )
1209
+
1210
+ if not extraction:
1211
+ raise HTTPException(status_code=404, detail="Extraction not found")
1212
+
1213
+ # Generate share link base URL
1214
+ base_url = os.environ.get("VITE_API_BASE_URL", "https://seth0330-ezofisocr.hf.space")
1215
+
1216
+ # Process each recipient email
1217
+ successful_shares = []
1218
+ failed_shares = []
1219
+ share_records = []
1220
+
1221
+ for recipient_email in recipient_emails:
1222
+ recipient_email = recipient_email.strip().lower()
1223
+
1224
+ # Generate secure share token for this recipient
1225
+ share_token = secrets.token_urlsafe(32)
1226
+
1227
+ # Create share token record (expires in 30 days)
1228
+ expires_at = datetime.utcnow() + timedelta(days=30)
1229
+ share_record = ShareToken(
1230
+ token=share_token,
1231
+ extraction_id=extraction_id,
1232
+ sender_user_id=current_user.id,
1233
+ recipient_email=recipient_email,
1234
+ expires_at=expires_at,
1235
+ )
1236
+ db.add(share_record)
1237
+ share_records.append((share_record, share_token, recipient_email))
1238
+
1239
+ # Commit all share tokens
1240
+ try:
1241
+ db.commit()
1242
+ for share_record, share_token, recipient_email in share_records:
1243
+ db.refresh(share_record)
1244
+ except Exception as e:
1245
+ db.rollback()
1246
+ raise HTTPException(status_code=500, detail=f"Failed to create share tokens: {str(e)}")
1247
+
1248
+ # Send emails to all recipients
1249
+ for share_record, share_token, recipient_email in share_records:
1250
+ share_link = f"{base_url}/share/{share_token}"
1251
+ try:
1252
+ # Get sender's name from current_user, fallback to None if not available
1253
+ sender_name = current_user.name if current_user.name else None
1254
+ await send_share_email(recipient_email, current_user.email, share_link, sender_name)
1255
+ successful_shares.append(recipient_email)
1256
+ except Exception as e:
1257
+ # Log error but continue with other emails
1258
+ print(f"[ERROR] Failed to send share email to {recipient_email}: {str(e)}")
1259
+ failed_shares.append(recipient_email)
1260
+ # Optionally, you could delete the share token if email fails
1261
+ # db.delete(share_record)
1262
+
1263
+ # Build response message
1264
+ if len(failed_shares) == 0:
1265
+ message = f"Extraction shared successfully with {len(successful_shares)} recipient(s)"
1266
+ elif len(successful_shares) == 0:
1267
+ raise HTTPException(status_code=500, detail=f"Failed to send share emails to all recipients")
1268
+ else:
1269
+ message = f"Extraction shared with {len(successful_shares)} recipient(s). Failed to send to: {', '.join(failed_shares)}"
1270
+
1271
+ return {
1272
+ "success": True,
1273
+ "message": message,
1274
+ "successful_count": len(successful_shares),
1275
+ "failed_count": len(failed_shares),
1276
+ "successful_emails": successful_shares,
1277
+ "failed_emails": failed_shares if failed_shares else None
1278
+ }
1279
+
1280
+
1281
+ class ShareLinkRequest(BaseModel):
1282
+ extraction_id: int
1283
+
1284
+ @app.post("/api/share/link")
1285
+ async def create_share_link(
1286
+ request: ShareLinkRequest,
1287
+ db: Session = Depends(get_db),
1288
+ current_user: User = Depends(get_current_user),
1289
+ ):
1290
+ """
1291
+ Create a shareable link for an extraction without requiring recipient emails.
1292
+ Returns a share link that can be copied and shared manually.
1293
+ """
1294
+ import secrets
1295
+ from datetime import datetime, timedelta
1296
+
1297
+ # Get the extraction record
1298
+ extraction = (
1299
+ db.query(ExtractionRecord)
1300
+ .filter(
1301
+ ExtractionRecord.id == request.extraction_id,
1302
+ ExtractionRecord.user_id == current_user.id
1303
+ )
1304
+ .first()
1305
+ )
1306
+
1307
+ if not extraction:
1308
+ raise HTTPException(status_code=404, detail="Extraction not found")
1309
+
1310
+ # Generate secure share token
1311
+ share_token = secrets.token_urlsafe(32)
1312
+
1313
+ # Create share token record (expires in 30 days, no specific recipient)
1314
+ expires_at = datetime.utcnow() + timedelta(days=30)
1315
+ share_record = ShareToken(
1316
+ token=share_token,
1317
+ extraction_id=request.extraction_id,
1318
+ sender_user_id=current_user.id,
1319
+ recipient_email=None, # None for public share links (copyable links)
1320
+ expires_at=expires_at,
1321
+ )
1322
+ db.add(share_record)
1323
+ db.commit()
1324
+ db.refresh(share_record)
1325
+
1326
+ # Generate share link
1327
+ base_url = os.environ.get("VITE_API_BASE_URL", "https://seth0330-ezofisocr.hf.space")
1328
+ share_link = f"{base_url}/share/{share_token}"
1329
+
1330
+ return {
1331
+ "success": True,
1332
+ "share_link": share_link,
1333
+ "share_token": share_token,
1334
+ "expires_at": expires_at.isoformat() if expires_at else None
1335
+ }
1336
+
1337
+
1338
+ @app.get("/api/share/{token}")
1339
+ async def access_shared_extraction(
1340
+ token: str,
1341
+ db: Session = Depends(get_db),
1342
+ current_user: User = Depends(get_current_user),
1343
+ ):
1344
+ """
1345
+ Access a shared extraction and copy it to the current user's account.
1346
+ This endpoint is called after the user logs in via the share link.
1347
+ """
1348
+ from datetime import datetime
1349
+ import json
1350
+
1351
+ # Find the share token
1352
+ share = (
1353
+ db.query(ShareToken)
1354
+ .filter(ShareToken.token == token)
1355
+ .first()
1356
+ )
1357
+
1358
+ if not share:
1359
+ raise HTTPException(status_code=404, detail="Share link not found or expired")
1360
+
1361
+ # Check if token is expired
1362
+ if share.expires_at and share.expires_at < datetime.utcnow():
1363
+ raise HTTPException(status_code=410, detail="Share link has expired")
1364
+
1365
+ # Get the original extraction
1366
+ original_extraction = (
1367
+ db.query(ExtractionRecord)
1368
+ .filter(ExtractionRecord.id == share.extraction_id)
1369
+ .first()
1370
+ )
1371
+
1372
+ if not original_extraction:
1373
+ raise HTTPException(status_code=404, detail="Original extraction not found")
1374
+
1375
+ # Check if already copied for this user (check by share token to prevent duplicates from same share)
1376
+ # Also check if this specific share token was already used by this user
1377
+ if share.accessed and share.accessed_by_user_id == current_user.id:
1378
+ # This share token was already used by this user, find the extraction
1379
+ existing_copy = (
1380
+ db.query(ExtractionRecord)
1381
+ .filter(
1382
+ ExtractionRecord.user_id == current_user.id,
1383
+ ExtractionRecord.shared_from_extraction_id == original_extraction.id
1384
+ )
1385
+ .order_by(ExtractionRecord.created_at.desc())
1386
+ .first()
1387
+ )
1388
+
1389
+ if existing_copy:
1390
+ return {
1391
+ "success": True,
1392
+ "extraction_id": existing_copy.id,
1393
+ "message": "Extraction already shared with you"
1394
+ }
1395
+
1396
+ # Also check if any copy exists for this user from this original extraction
1397
+ existing_copy = (
1398
+ db.query(ExtractionRecord)
1399
+ .filter(
1400
+ ExtractionRecord.user_id == current_user.id,
1401
+ ExtractionRecord.shared_from_extraction_id == original_extraction.id
1402
+ )
1403
+ .first()
1404
+ )
1405
+
1406
+ if existing_copy:
1407
+ # Already copied, mark this share as accessed and return existing extraction ID
1408
+ share.accessed = True
1409
+ share.accessed_at = datetime.utcnow()
1410
+ share.accessed_by_user_id = current_user.id
1411
+ db.commit()
1412
+
1413
+ return {
1414
+ "success": True,
1415
+ "extraction_id": existing_copy.id,
1416
+ "message": "Extraction already shared with you"
1417
+ }
1418
+
1419
+ # Copy extraction to current user's account
1420
+ # Parse the raw_output JSON string back to dict
1421
+ fields = {}
1422
+ if original_extraction.raw_output:
1423
+ try:
1424
+ fields = json.loads(original_extraction.raw_output)
1425
+ except (json.JSONDecodeError, TypeError):
1426
+ try:
1427
+ import ast
1428
+ if original_extraction.raw_output.strip().startswith('{'):
1429
+ fields = ast.literal_eval(original_extraction.raw_output)
1430
+ else:
1431
+ fields = {}
1432
+ except:
1433
+ fields = {}
1434
+
1435
+ # Create new extraction record for the recipient
1436
+ new_extraction = ExtractionRecord(
1437
+ user_id=current_user.id,
1438
+ file_name=original_extraction.file_name,
1439
+ file_type=original_extraction.file_type,
1440
+ file_size=original_extraction.file_size,
1441
+ status=original_extraction.status or "completed",
1442
+ confidence=original_extraction.confidence or 0.0,
1443
+ fields_extracted=original_extraction.fields_extracted or 0,
1444
+ total_time_ms=original_extraction.total_time_ms or 0,
1445
+ raw_output=original_extraction.raw_output, # Copy the JSON string
1446
+ file_base64=original_extraction.file_base64, # Copy the base64 file
1447
+ shared_from_extraction_id=original_extraction.id,
1448
+ shared_by_user_id=share.sender_user_id,
1449
+ )
1450
+ db.add(new_extraction)
1451
+
1452
+ # Mark share as accessed
1453
+ share.accessed = True
1454
+ share.accessed_at = datetime.utcnow()
1455
+ share.accessed_by_user_id = current_user.id
1456
+
1457
+ db.commit()
1458
+ db.refresh(new_extraction)
1459
+
1460
+ return {
1461
+ "success": True,
1462
+ "extraction_id": new_extraction.id,
1463
+ "message": "Extraction shared successfully"
1464
+ }
1465
+
1466
+
1467
+ # Static frontend mounting (used after we build React)
1468
+ # Dockerfile copies the Vite build into backend/frontend_dist
1469
+ # IMPORTANT: API routes must be defined BEFORE this so they take precedence
1470
+ frontend_dir = os.path.join(
1471
+ os.path.dirname(os.path.dirname(__file__)), "frontend_dist"
1472
+ )
1473
+
1474
+ if os.path.isdir(frontend_dir):
1475
+ # Serve static files (JS, CSS, images, etc.) from assets directory
1476
+ assets_dir = os.path.join(frontend_dir, "assets")
1477
+ if os.path.isdir(assets_dir):
1478
+ app.mount(
1479
+ "/assets",
1480
+ StaticFiles(directory=assets_dir),
1481
+ name="assets",
1482
+ )
1483
+
1484
+ # Serve static files from root (logo.png, favicon.ico, etc.)
1485
+ # Files in public/ directory are copied to dist/ root during Vite build
1486
+ # These routes must be defined BEFORE the catch-all route
1487
+ @app.get("/logo.png")
1488
+ async def serve_logo():
1489
+ """Serve logo.png from frontend_dist root."""
1490
+ from fastapi.responses import FileResponse
1491
+ logo_path = os.path.join(frontend_dir, "logo.png")
1492
+ if os.path.exists(logo_path):
1493
+ return FileResponse(logo_path, media_type="image/png")
1494
+ from fastapi import HTTPException
1495
+ raise HTTPException(status_code=404)
1496
+
1497
+ @app.get("/favicon.ico")
1498
+ async def serve_favicon():
1499
+ """Serve favicon.ico from frontend_dist root."""
1500
+ from fastapi.responses import FileResponse
1501
+ favicon_path = os.path.join(frontend_dir, "favicon.ico")
1502
+ if os.path.exists(favicon_path):
1503
+ return FileResponse(favicon_path, media_type="image/x-icon")
1504
+ from fastapi import HTTPException
1505
+ raise HTTPException(status_code=404)
1506
+
1507
+ # Catch-all route to serve index.html for React Router
1508
+ # This must be last so API routes and static files are matched first
1509
+ @app.get("/{full_path:path}")
1510
+ async def serve_frontend(full_path: str):
1511
+ """
1512
+ Serve React app for all non-API routes.
1513
+ React Router will handle client-side routing.
1514
+ """
1515
+ # Skip API routes, docs, static assets, and known static files
1516
+ if (full_path.startswith("api/") or
1517
+ full_path.startswith("docs") or
1518
+ full_path.startswith("openapi.json") or
1519
+ full_path.startswith("assets/") or
1520
+ full_path in ["logo.png", "favicon.ico"]):
1521
+ from fastapi import HTTPException
1522
+ raise HTTPException(status_code=404)
1523
+
1524
+ # Serve index.html for all other routes (React Router will handle routing)
1525
+ from fastapi.responses import FileResponse
1526
+ index_path = os.path.join(frontend_dir, "index.html")
1527
+ if os.path.exists(index_path):
1528
+ return FileResponse(index_path)
1529
+ from fastapi import HTTPException
1530
+ raise HTTPException(status_code=404)
1531
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
backend/app/models.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from sqlalchemy import Column, Integer, String, Float, DateTime, Text, ForeignKey, Boolean
2
  from sqlalchemy.orm import relationship
3
  from sqlalchemy.sql import func
@@ -134,3 +135,108 @@ class APIKey(Base):
134
  "User",
135
  back_populates="api_keys"
136
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  from sqlalchemy import Column, Integer, String, Float, DateTime, Text, ForeignKey, Boolean
3
  from sqlalchemy.orm import relationship
4
  from sqlalchemy.sql import func
 
135
  "User",
136
  back_populates="api_keys"
137
  )
138
+ =======
139
+ from sqlalchemy import Column, Integer, String, Float, DateTime, Text, ForeignKey, Boolean
140
+ from sqlalchemy.orm import relationship
141
+ from sqlalchemy.sql import func
142
+
143
+ from .db import Base
144
+
145
+
146
+ class User(Base):
147
+ """
148
+ Stores user information from Firebase or OTP authentication.
149
+ """
150
+ __tablename__ = "users"
151
+
152
+ id = Column(Integer, primary_key=True, index=True)
153
+ email = Column(String, unique=True, index=True, nullable=False)
154
+ name = Column(String, nullable=True)
155
+ picture = Column(String, nullable=True)
156
+
157
+ # Auth method: 'firebase' or 'otp'
158
+ auth_method = Column(String, default='firebase')
159
+
160
+ # Firebase-specific
161
+ firebase_uid = Column(String, unique=True, index=True, nullable=True)
162
+
163
+ # OTP-specific
164
+ email_verified = Column(Boolean, default=False)
165
+
166
+ created_at = Column(
167
+ DateTime(timezone=True),
168
+ server_default=func.now(),
169
+ )
170
+
171
+ # Relationship to extraction records (explicitly specify user_id as the foreign key)
172
+ # Note: primaryjoin must be specified because ExtractionRecord has multiple foreign keys to User
173
+ extractions = relationship(
174
+ "ExtractionRecord",
175
+ back_populates="user",
176
+ primaryjoin="User.id == ExtractionRecord.user_id"
177
+ )
178
+
179
+
180
+ class ExtractionRecord(Base):
181
+ """
182
+ Stores one extraction run so the History page can show past jobs.
183
+ We'll fill it from the /api/extract endpoint later.
184
+ """
185
+
186
+ __tablename__ = "extractions"
187
+
188
+ id = Column(Integer, primary_key=True, index=True)
189
+ user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
190
+
191
+ file_name = Column(String, index=True)
192
+ file_type = Column(String)
193
+ file_size = Column(String)
194
+
195
+ status = Column(String) # "completed" | "failed"
196
+ confidence = Column(Float) # overall confidence (0–100)
197
+ fields_extracted = Column(Integer) # number of fields extracted
198
+ total_time_ms = Column(Integer) # total processing time in ms
199
+
200
+ raw_output = Column(Text) # JSON string from the model
201
+ file_base64 = Column(Text, nullable=True) # Base64 encoded original file for preview
202
+ error_message = Column(Text, nullable=True)
203
+
204
+ created_at = Column(
205
+ DateTime(timezone=True),
206
+ server_default=func.now(),
207
+ )
208
+
209
+ # Relationship to user (explicitly specify user_id as the foreign key)
210
+ # Note: primaryjoin must be specified because ExtractionRecord has multiple foreign keys to User
211
+ user = relationship(
212
+ "User",
213
+ back_populates="extractions",
214
+ primaryjoin="ExtractionRecord.user_id == User.id"
215
+ )
216
+
217
+ # Track if this extraction was shared (original extraction ID)
218
+ shared_from_extraction_id = Column(Integer, ForeignKey("extractions.id"), nullable=True, index=True)
219
+ shared_by_user_id = Column(Integer, ForeignKey("users.id"), nullable=True, index=True)
220
+
221
+
222
+ class ShareToken(Base):
223
+ """
224
+ Stores share tokens for sharing extractions with other users.
225
+ """
226
+ __tablename__ = "share_tokens"
227
+
228
+ id = Column(Integer, primary_key=True, index=True)
229
+ token = Column(String, unique=True, index=True, nullable=False) # Unique share token
230
+ extraction_id = Column(Integer, ForeignKey("extractions.id"), nullable=False, index=True)
231
+ sender_user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
232
+ recipient_email = Column(String, nullable=True, index=True) # Nullable for public share links
233
+ expires_at = Column(DateTime(timezone=True), nullable=True) # Optional expiration
234
+ accessed = Column(Boolean, default=False) # Track if link was accessed
235
+ accessed_at = Column(DateTime(timezone=True), nullable=True)
236
+ accessed_by_user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
237
+
238
+ created_at = Column(
239
+ DateTime(timezone=True),
240
+ server_default=func.now(),
241
+ )
242
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
backend/app/monday_service.py CHANGED
@@ -1,3 +1,4 @@
 
1
  """
2
  Monday.com API service for creating leads with automatic field matching.
3
  Reference: https://developer.monday.com/api-reference/docs
@@ -389,3 +390,396 @@ async def create_monday_lead(
389
  print(f"[ERROR] Failed to create Monday.com lead: {str(e)}")
390
  return False
391
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  """
3
  Monday.com API service for creating leads with automatic field matching.
4
  Reference: https://developer.monday.com/api-reference/docs
 
390
  print(f"[ERROR] Failed to create Monday.com lead: {str(e)}")
391
  return False
392
 
393
+ =======
394
+ """
395
+ Monday.com API service for creating leads with automatic field matching.
396
+ Reference: https://developer.monday.com/api-reference/docs
397
+ """
398
+ import os
399
+ import httpx
400
+ import json
401
+ from typing import Optional, Dict, Any, List, Tuple
402
+ from difflib import SequenceMatcher
403
+
404
+ MONDAY_API_KEY = os.environ.get("MONDAY_API_KEY", "")
405
+ MONDAY_API_URL = "https://api.monday.com/v2"
406
+ MONDAY_BOARD_ID = os.environ.get("MONDAY_BOARD_ID", None) # Your "New Leads" board ID
407
+
408
+ # Cache for board columns to avoid repeated API calls
409
+ _board_columns_cache: Dict[str, List[Dict[str, Any]]] = {}
410
+
411
+
412
+ def _calculate_similarity(str1: str, str2: str) -> float:
413
+ """
414
+ Calculate similarity between two strings using SequenceMatcher.
415
+ Returns a value between 0.0 and 1.0.
416
+ """
417
+ return SequenceMatcher(None, str1.lower(), str2.lower()).ratio()
418
+
419
+
420
+ def _find_best_column_match(
421
+ field_name: str,
422
+ available_columns: List[Dict[str, Any]],
423
+ min_similarity: float = 0.3
424
+ ) -> Optional[Tuple[str, str, float]]:
425
+ """
426
+ Find the best matching column for a field name using semantic similarity.
427
+
428
+ Args:
429
+ field_name: The field name to match (e.g., "first_name", "email")
430
+ available_columns: List of column dicts with 'id' and 'title' keys
431
+ min_similarity: Minimum similarity threshold (0.0 to 1.0)
432
+
433
+ Returns:
434
+ Tuple of (column_id, column_title, similarity_score) or None if no match found
435
+ """
436
+ best_match = None
437
+ best_score = 0.0
438
+
439
+ # Normalize field name for matching
440
+ normalized_field = field_name.lower().replace("_", " ").replace("-", " ")
441
+
442
+ # Common field name variations
443
+ field_variations = [
444
+ normalized_field,
445
+ field_name.lower(),
446
+ field_name.replace("_", ""),
447
+ ]
448
+
449
+ # Add common synonyms
450
+ synonyms = {
451
+ "first_name": ["first name", "firstname", "fname", "given name"],
452
+ "last_name": ["last name", "lastname", "lname", "surname", "family name"],
453
+ "email": ["email address", "email", "e-mail", "mail"],
454
+ "phone_number": ["phone", "phone number", "telephone", "mobile", "cell"],
455
+ "linkedin_url": ["linkedin", "linkedin profile", "linkedin url", "linkedin link"],
456
+ "title": ["job title", "position", "role", "job"],
457
+ "headline": ["headline", "tagline", "bio"],
458
+ "organization_name": ["company", "organization", "org", "company name", "employer"],
459
+ "organization_website": ["website", "company website", "url", "web"],
460
+ "organization_address": ["address", "company address", "location"],
461
+ }
462
+
463
+ if field_name in synonyms:
464
+ field_variations.extend(synonyms[field_name])
465
+
466
+ for column in available_columns:
467
+ column_title = column.get("title", "").lower()
468
+ column_id = column.get("id", "")
469
+
470
+ if not column_title or not column_id:
471
+ continue
472
+
473
+ # Calculate similarity for each variation
474
+ for variation in field_variations:
475
+ score = _calculate_similarity(variation, column_title)
476
+ if score > best_score:
477
+ best_score = score
478
+ best_match = (column_id, column.get("title", ""), score)
479
+
480
+ if best_match and best_score >= min_similarity:
481
+ return best_match
482
+ return None
483
+
484
+
485
+ async def _get_board_columns(board_id: str) -> List[Dict[str, Any]]:
486
+ """
487
+ Fetch board columns from Monday.com API.
488
+
489
+ Args:
490
+ board_id: Monday.com board ID
491
+
492
+ Returns:
493
+ List of column dictionaries with 'id', 'title', and 'type' keys
494
+ """
495
+ # Check cache first
496
+ if board_id in _board_columns_cache:
497
+ return _board_columns_cache[board_id]
498
+
499
+ if not MONDAY_API_KEY:
500
+ print("[WARNING] MONDAY_API_KEY not set, cannot fetch board columns")
501
+ return []
502
+
503
+ query = """
504
+ query ($boardId: ID!) {
505
+ boards(ids: [$boardId]) {
506
+ columns {
507
+ id
508
+ title
509
+ type
510
+ }
511
+ }
512
+ }
513
+ """
514
+
515
+ headers = {
516
+ "Authorization": MONDAY_API_KEY,
517
+ "Content-Type": "application/json"
518
+ }
519
+
520
+ try:
521
+ async with httpx.AsyncClient(timeout=30.0) as client:
522
+ response = await client.post(
523
+ MONDAY_API_URL,
524
+ json={
525
+ "query": query,
526
+ "variables": {"boardId": board_id}
527
+ },
528
+ headers=headers
529
+ )
530
+
531
+ if response.status_code == 200:
532
+ result = response.json()
533
+ if result.get("data") and result["data"].get("boards"):
534
+ boards = result["data"]["boards"]
535
+ if boards and boards[0].get("columns"):
536
+ columns = boards[0]["columns"]
537
+ # Cache the result
538
+ _board_columns_cache[board_id] = columns
539
+ print(f"[INFO] Fetched {len(columns)} columns from Monday.com board {board_id}")
540
+ return columns
541
+ elif result.get("errors"):
542
+ print(f"[ERROR] Failed to fetch board columns: {result['errors']}")
543
+ else:
544
+ print(f"[ERROR] Failed to fetch board columns: {response.status_code} - {response.text}")
545
+ except Exception as e:
546
+ print(f"[ERROR] Exception while fetching board columns: {str(e)}")
547
+
548
+ return []
549
+
550
+
551
+ def _format_column_value(value: Any, column_type: str, column_id: Optional[str] = None) -> Any:
552
+ """
553
+ Format a value according to Monday.com column type.
554
+
555
+ Args:
556
+ value: The value to format
557
+ column_type: Monday.com column type (email, phone, link, text, etc.)
558
+ column_id: Column ID (for special handling)
559
+
560
+ Returns:
561
+ For email/phone/link: Python dict object
562
+ For text/other types: Plain string
563
+ """
564
+ if value is None:
565
+ return ""
566
+
567
+ value_str = str(value)
568
+
569
+ if column_type == "email":
570
+ # Monday.com email format requires dict object (will be JSON encoded later)
571
+ return {"email": value_str, "text": value_str}
572
+ elif column_type == "phone":
573
+ return {"phone": value_str, "countryShortName": "US"}
574
+ elif column_type == "link":
575
+ # If it's already a URL, use it; otherwise create a link
576
+ if value_str.startswith("http://") or value_str.startswith("https://"):
577
+ return {"url": value_str, "text": value_str}
578
+ else:
579
+ return {"url": f"https://{value_str}", "text": value_str}
580
+ else:
581
+ # Text, status, and other types - just return the string
582
+ return value_str
583
+
584
+
585
+ async def create_monday_lead(
586
+ email: str,
587
+ first_name: Optional[str] = None,
588
+ last_name: Optional[str] = None,
589
+ phone_number: Optional[str] = None,
590
+ linkedin_url: Optional[str] = None,
591
+ title: Optional[str] = None,
592
+ headline: Optional[str] = None,
593
+ organization_name: Optional[str] = None,
594
+ organization_website: Optional[str] = None,
595
+ organization_address: Optional[str] = None,
596
+ board_id: Optional[str] = None
597
+ ) -> bool:
598
+ """
599
+ Create a new lead item in Monday.com board.
600
+
601
+ Args:
602
+ email: Contact email address (required)
603
+ first_name: Contact first name
604
+ last_name: Contact last name
605
+ phone_number: Phone number
606
+ linkedin_url: LinkedIn profile URL
607
+ title: Job title
608
+ headline: Professional headline
609
+ organization_name: Company name
610
+ organization_website: Company website
611
+ organization_address: Company address
612
+ board_id: Monday.com board ID as string (defaults to MONDAY_BOARD_ID env var)
613
+
614
+ Returns:
615
+ True if lead created successfully, False otherwise
616
+ """
617
+ if not MONDAY_API_KEY:
618
+ print("[WARNING] MONDAY_API_KEY not set, skipping Monday.com lead creation")
619
+ return False
620
+
621
+ target_board_id = board_id or MONDAY_BOARD_ID
622
+ if not target_board_id:
623
+ print("[WARNING] MONDAY_BOARD_ID not set, skipping Monday.com lead creation")
624
+ return False
625
+
626
+ # Prepare item name (use full name or email)
627
+ item_name = email
628
+ if first_name and last_name:
629
+ item_name = f"{first_name} {last_name}"
630
+ elif first_name:
631
+ item_name = first_name
632
+ elif last_name:
633
+ item_name = last_name
634
+
635
+ # Fetch board columns to automatically match fields
636
+ print(f"[INFO] Fetching Monday.com board columns for automatic field matching...")
637
+ board_columns = await _get_board_columns(str(target_board_id))
638
+
639
+ if not board_columns:
640
+ print("[WARNING] Could not fetch board columns, skipping Monday.com lead creation")
641
+ return False
642
+
643
+ # Create a mapping of column IDs to column types for formatting
644
+ column_types = {col["id"]: col.get("type", "text") for col in board_columns}
645
+
646
+ # Prepare data fields to map
647
+ data_fields = {
648
+ "email": email,
649
+ "first_name": first_name,
650
+ "last_name": last_name,
651
+ "phone_number": phone_number,
652
+ "linkedin_url": linkedin_url,
653
+ "title": title,
654
+ "headline": headline,
655
+ "organization_name": organization_name,
656
+ "organization_website": organization_website,
657
+ "organization_address": organization_address,
658
+ }
659
+
660
+ # Automatically match fields to columns using semantic similarity
661
+ column_values = {}
662
+ matched_fields = []
663
+ # Track which columns have been matched to handle duplicates (e.g., first_name and last_name -> Name)
664
+ column_matches = {} # column_id -> (field_name, value)
665
+
666
+ for field_name, field_value in data_fields.items():
667
+ if not field_value:
668
+ continue
669
+
670
+ match = _find_best_column_match(field_name, board_columns)
671
+ if match:
672
+ column_id, column_title, similarity = match
673
+ column_type = column_types.get(column_id, "text")
674
+
675
+ # Handle special case: if first_name and last_name both match to the same "Name" column
676
+ if column_id in column_matches:
677
+ existing_field, existing_value = column_matches[column_id]
678
+ # If both first_name and last_name match to the same column, combine them
679
+ if (field_name in ["first_name", "last_name"] and
680
+ existing_field in ["first_name", "last_name"] and
681
+ field_name != existing_field):
682
+ # Combine first and last name
683
+ if field_name == "first_name":
684
+ combined_value = f"{field_value} {existing_value}"
685
+ else:
686
+ combined_value = f"{existing_value} {field_value}"
687
+ formatted_value = _format_column_value(combined_value, column_type, column_id)
688
+ column_values[column_id] = formatted_value
689
+ matched_fields.append(f"{existing_field}+{field_name} -> {column_title} (combined)")
690
+ print(f"[INFO] Combined '{existing_field}' and '{field_name}' to column '{column_title}' (ID: {column_id})")
691
+ continue
692
+ else:
693
+ # Different fields matching to same column - use the one with higher similarity
694
+ print(f"[DEBUG] Column '{column_title}' already matched to '{existing_field}', skipping '{field_name}'")
695
+ continue
696
+
697
+ formatted_value = _format_column_value(field_value, column_type, column_id)
698
+ column_values[column_id] = formatted_value
699
+ column_matches[column_id] = (field_name, field_value)
700
+ matched_fields.append(f"{field_name} -> {column_title} (similarity: {similarity:.2f})")
701
+ print(f"[INFO] Matched '{field_name}' to column '{column_title}' (ID: {column_id}, type: {column_type}, value: {formatted_value[:100] if len(str(formatted_value)) > 100 else formatted_value})")
702
+ else:
703
+ print(f"[DEBUG] No suitable column match found for '{field_name}' (skipping)")
704
+
705
+ if not column_values:
706
+ print("[WARNING] No fields could be matched to board columns")
707
+ return False
708
+
709
+ print(f"[INFO] Successfully matched {len(matched_fields)} fields to Monday.com columns")
710
+
711
+ # Convert column_values to JSON string for GraphQL mutation
712
+ # Monday.com expects column values as a JSON string where:
713
+ # - Text columns: plain string values
714
+ # - Email/Phone/Link columns: dict objects (properly JSON encoded)
715
+ column_values_json = json.dumps(column_values)
716
+ print(f"[DEBUG] Monday.com column_values JSON: {column_values_json[:500]}")
717
+
718
+ # GraphQL mutation
719
+ # Note: Monday.com uses ID! (string) type for board_id, not Int!
720
+ mutation = """
721
+ mutation ($boardId: ID!, $itemName: String!, $columnValues: JSON!) {
722
+ create_item (board_id: $boardId, item_name: $itemName, column_values: $columnValues) {
723
+ id
724
+ }
725
+ }
726
+ """
727
+
728
+ # Convert board_id to string (Monday.com expects ID! which is a string)
729
+ board_id_str = str(target_board_id)
730
+
731
+ variables = {
732
+ "boardId": board_id_str,
733
+ "itemName": item_name,
734
+ "columnValues": column_values_json
735
+ }
736
+
737
+ headers = {
738
+ "Authorization": MONDAY_API_KEY,
739
+ "Content-Type": "application/json"
740
+ }
741
+
742
+ try:
743
+ async with httpx.AsyncClient(timeout=30.0) as client:
744
+ response = await client.post(
745
+ MONDAY_API_URL,
746
+ json={
747
+ "query": mutation,
748
+ "variables": variables
749
+ },
750
+ headers=headers
751
+ )
752
+
753
+ if response.status_code == 200:
754
+ result = response.json()
755
+ if result.get("data") and result["data"].get("create_item"):
756
+ item_id = result["data"]["create_item"].get("id")
757
+ print(f"[INFO] Successfully created Monday.com lead: {item_name} (ID: {item_id})")
758
+ return True
759
+ elif result.get("errors"):
760
+ errors = result.get("errors", [])
761
+ for error in errors:
762
+ error_msg = error.get("message", "Unknown error")
763
+ error_path = error.get("path", [])
764
+ print(f"[ERROR] Monday.com API error: {error_msg}")
765
+ if error_path:
766
+ print(f"[ERROR] Error path: {error_path}")
767
+ # Log full error for debugging
768
+ print(f"[DEBUG] Full Monday.com error response: {json.dumps(errors, indent=2)}")
769
+ return False
770
+ else:
771
+ print(f"[ERROR] Unexpected Monday.com API response: {result}")
772
+ return False
773
+ else:
774
+ error_data = response.text
775
+ print(f"[ERROR] Failed to create Monday.com lead: {response.status_code} - {error_data}")
776
+ return False
777
+
778
+ except httpx.HTTPStatusError as e:
779
+ print(f"[ERROR] Monday.com API HTTP error: {e.response.status_code} - {e.response.text}")
780
+ return False
781
+ except Exception as e:
782
+ print(f"[ERROR] Failed to create Monday.com lead: {str(e)}")
783
+ return False
784
+
785
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
backend/app/openrouter_client.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import base64
3
  import json
@@ -860,3 +861,867 @@ async def extract_fields_from_document(
860
  return_obj["Fields"] = extracted_fields
861
 
862
  return return_obj
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import os
3
  import base64
4
  import json
 
861
  return_obj["Fields"] = extracted_fields
862
 
863
  return return_obj
864
+ =======
865
+ import os
866
+ import base64
867
+ import json
868
+ import re
869
+ import time
870
+ import asyncio
871
+ from io import BytesIO
872
+ from typing import Any, Dict, List, Optional, Tuple
873
+ import httpx
874
+
875
+ try:
876
+ import fitz # PyMuPDF
877
+ from PIL import Image
878
+ PDF_SUPPORT = True
879
+ except ImportError as e:
880
+ PDF_SUPPORT = False
881
+ print(f"[WARNING] PDF support libraries not available: {e}. PDF conversion will not work.")
882
+
883
+
884
+ # RunPod Serverless OCR Configuration
885
+ RUNPOD_ENDPOINT = os.environ.get("RUNPOD_ENDPOINT", "https://api.runpod.ai/v2/j2jvf8t6n0rk5c/run")
886
+ RUNPOD_API_KEY = os.environ.get("RUNPOD_API_KEY", "rpa_0UJOK33ZO7SID9B3ASFSKKPUHNPBQC5Z2128RB4O4qi9ts")
887
+
888
+ # Extract endpoint ID from endpoint URL for status polling
889
+ # URL format: https://api.runpod.ai/v2/{endpoint_id}/run
890
+ _endpoint_id = RUNPOD_ENDPOINT.split("/v2/")[1].split("/")[0] if "/v2/" in RUNPOD_ENDPOINT else None
891
+ RUNPOD_STATUS_ENDPOINT = f"https://api.runpod.ai/v2/{_endpoint_id}/status" if _endpoint_id else None
892
+
893
+
894
+ def _pdf_to_images(pdf_bytes: bytes) -> List[bytes]:
895
+ """
896
+ Convert PDF pages to PNG images.
897
+ Returns a list of PNG image bytes, one per page.
898
+ """
899
+ if not PDF_SUPPORT:
900
+ raise RuntimeError("PyMuPDF not installed. Cannot convert PDF to images.")
901
+
902
+ pdf_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
903
+ images = []
904
+
905
+ print(f"[INFO] PDF has {len(pdf_doc)} page(s)")
906
+
907
+ for page_num in range(len(pdf_doc)):
908
+ page = pdf_doc[page_num]
909
+ # Render page to image (zoom factor 2 for better quality)
910
+ mat = fitz.Matrix(2.0, 2.0) # 2x zoom for better quality
911
+ pix = page.get_pixmap(matrix=mat)
912
+
913
+ # Convert to PIL Image
914
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
915
+
916
+ # Resize if too large to avoid GPU memory issues (max 1920px on longest side)
917
+ max_size = 1920
918
+ w, h = img.size
919
+ if w > max_size or h > max_size:
920
+ if w > h:
921
+ new_w = max_size
922
+ new_h = int(h * (max_size / w))
923
+ else:
924
+ new_h = max_size
925
+ new_w = int(w * (max_size / h))
926
+ img = img.resize((new_w, new_h), Image.LANCZOS)
927
+ print(f"[INFO] Resized page {page_num + 1} from {w}x{h} to {new_w}x{new_h}")
928
+ else:
929
+ print(f"[INFO] Converted page {page_num + 1} to image ({w}x{h})")
930
+
931
+ # Convert to JPEG bytes (better compression)
932
+ img_bytes = BytesIO()
933
+ img.save(img_bytes, format="JPEG", quality=95)
934
+ images.append(img_bytes.getvalue())
935
+
936
+ pdf_doc.close()
937
+ return images
938
+
939
+
940
+ def _image_bytes_to_base64(image_bytes: bytes) -> str:
941
+ """Convert image bytes to base64 data URL (JPEG format)."""
942
+ b64 = base64.b64encode(image_bytes).decode("utf-8")
943
+ data_url = f"data:image/jpeg;base64,{b64}"
944
+ print(f"[DEBUG] Base64 encoded image: {len(image_bytes)} bytes -> {len(data_url)} chars")
945
+ return data_url
946
+
947
+
948
+ def _parse_markdown_table(text: str) -> Optional[Tuple[List[str], List[List[str]]]]:
949
+ """
950
+ Parse a markdown table from text.
951
+ Returns (headers, rows) if table found, None otherwise.
952
+ Handles various table formats including malformed ones.
953
+ """
954
+ lines = [line.strip() for line in text.split('\n')]
955
+
956
+ # Find potential table start (line with multiple | and actual text content)
957
+ table_start = None
958
+ for i, line in enumerate(lines):
959
+ if '|' in line and line.count('|') >= 2:
960
+ # Skip separator lines (only |, -, :, spaces)
961
+ if re.match(r'^[\s\|\-:]+$', line):
962
+ continue
963
+ # Check if line has meaningful text (not just | characters)
964
+ cells = [cell.strip() for cell in line.split('|')]
965
+ if cells and not cells[0]:
966
+ cells = cells[1:]
967
+ if cells and not cells[-1]:
968
+ cells = cells[:-1]
969
+ # Must have at least 2 columns with some text
970
+ meaningful_cells = [c for c in cells if len(c) > 0]
971
+ if len(meaningful_cells) >= 2:
972
+ table_start = i
973
+ break
974
+
975
+ if table_start is None:
976
+ return None
977
+
978
+ # Find table end (first non-empty line without | after table start)
979
+ table_end = None
980
+ for i in range(table_start + 1, len(lines)):
981
+ line = lines[i]
982
+ if not line: # Empty line, continue
983
+ continue
984
+ if '|' not in line:
985
+ # Non-empty line without | means table ended
986
+ table_end = i
987
+ break
988
+
989
+ if table_end is None:
990
+ table_end = len(lines)
991
+
992
+ table_lines = lines[table_start:table_end]
993
+
994
+ # Find the actual header row (should have meaningful text, not just | or separators)
995
+ headers = None
996
+ header_idx = None
997
+
998
+ for i, line in enumerate(table_lines):
999
+ if not line or '|' not in line:
1000
+ continue
1001
+
1002
+ # Skip separator lines (lines with only |, -, :, spaces)
1003
+ if re.match(r'^[\s\|\-:]+$', line):
1004
+ continue
1005
+
1006
+ # Check if this line has meaningful content (not just | characters)
1007
+ cells = [cell.strip() for cell in line.split('|')]
1008
+ # Remove empty cells at start/end
1009
+ if cells and not cells[0]:
1010
+ cells = cells[1:]
1011
+ if cells and not cells[-1]:
1012
+ cells = cells[:-1]
1013
+
1014
+ # Header should have at least 3 columns and meaningful text
1015
+ if len(cells) >= 3:
1016
+ # Check if cells have actual text (not just empty or single char)
1017
+ meaningful_cells = [c for c in cells if len(c) > 1]
1018
+ if len(meaningful_cells) >= 3:
1019
+ headers = cells
1020
+ header_idx = i
1021
+ break
1022
+
1023
+ if not headers or header_idx is None:
1024
+ return None
1025
+
1026
+ # Parse data rows (skip separator line after header if present)
1027
+ rows = []
1028
+ num_columns = len(headers)
1029
+
1030
+ for i in range(header_idx + 1, len(table_lines)):
1031
+ line = table_lines[i]
1032
+
1033
+ if not line:
1034
+ continue
1035
+
1036
+ # Skip separator lines
1037
+ if re.match(r'^[\s\|\-:]+$', line):
1038
+ continue
1039
+
1040
+ if '|' not in line:
1041
+ # No more table rows
1042
+ break
1043
+
1044
+ cells = [cell.strip() for cell in line.split('|')]
1045
+ # Remove empty cells at start/end
1046
+ if cells and not cells[0]:
1047
+ cells = cells[1:]
1048
+ if cells and not cells[-1]:
1049
+ cells = cells[:-1]
1050
+
1051
+ # Only add rows that match header column count (allow some flexibility)
1052
+ if len(cells) == num_columns or (len(cells) >= num_columns - 1 and len(cells) <= num_columns + 1):
1053
+ # Pad or trim to match header count
1054
+ if len(cells) < num_columns:
1055
+ cells.extend([''] * (num_columns - len(cells)))
1056
+ elif len(cells) > num_columns:
1057
+ cells = cells[:num_columns]
1058
+
1059
+ # Only add if row has at least one non-empty cell
1060
+ if any(cell for cell in cells):
1061
+ rows.append(cells)
1062
+
1063
+ if not rows:
1064
+ return None
1065
+
1066
+ return (headers, rows)
1067
+
1068
+
1069
+ def _extract_metadata(text: str) -> Dict[str, str]:
1070
+ """
1071
+ Extract metadata from document header text.
1072
+ Looks for title, office, notice number, and description.
1073
+ """
1074
+ metadata = {
1075
+ "title": "",
1076
+ "office": "",
1077
+ "notice_no": "",
1078
+ "description": ""
1079
+ }
1080
+
1081
+ lines = [line.strip() for line in text.split('\n') if line.strip()]
1082
+
1083
+ # Extract office (usually first non-empty line)
1084
+ if lines:
1085
+ metadata["office"] = lines[0]
1086
+
1087
+ # Look for notice number pattern (like "पत्रक सं- 1239" or "सं- 1239")
1088
+ notice_pattern = r'(?:पत्रक\s+)?सं[-\s:]*(\d+)'
1089
+ for line in lines[:10]: # Check first 10 lines
1090
+ match = re.search(notice_pattern, line)
1091
+ if match:
1092
+ metadata["notice_no"] = match.group(1)
1093
+ break
1094
+
1095
+ # Look for title - usually in quotes or contains specific keywords
1096
+ # Check for quoted text first
1097
+ quoted_title = re.search(r'["""]([^"""]+)["""]', text[:1000])
1098
+ if quoted_title:
1099
+ metadata["title"] = quoted_title.group(1).strip()
1100
+ else:
1101
+ # Look for title patterns
1102
+ title_keywords = ['सम्पत्ति', 'सूचना', 'विज्ञप्ति', 'नाम परिवर्तन']
1103
+ for line in lines[:5]:
1104
+ if any(keyword in line for keyword in title_keywords):
1105
+ # Extract the title phrase
1106
+ title_match = re.search(r'(सम्पत्ति[^।]*|सूचना[^।]*|विज्ञप्ति[^।]*)', line)
1107
+ if title_match:
1108
+ metadata["title"] = title_match.group(1).strip()
1109
+ break
1110
+
1111
+ # Extract description (text before table, usually contains key phrases)
1112
+ description_keywords = ['नाम परिवर्तन', 'अधिनियम', 'धारा', 'प्रकाशन', 'आवेदन']
1113
+ description_parts = []
1114
+ for i, line in enumerate(lines[:15]): # Check first 15 lines
1115
+ if any(keyword in line for keyword in description_keywords):
1116
+ description_parts.append(line)
1117
+ # Get a few surrounding lines for context
1118
+ if i > 0:
1119
+ description_parts.insert(0, lines[i-1])
1120
+ if i < len(lines) - 1:
1121
+ description_parts.append(lines[i+1])
1122
+ break
1123
+
1124
+ if description_parts:
1125
+ description = ' '.join(description_parts).strip()
1126
+ if len(description) > 30: # Only if substantial
1127
+ # Clean up and limit length
1128
+ description = re.sub(r'\s+', ' ', description)
1129
+ metadata["description"] = description[:300] # Limit length
1130
+
1131
+ return metadata
1132
+
1133
+
1134
+ def _parse_model_response(response_text: str) -> Tuple[str, Dict[str, Any]]:
1135
+ """
1136
+ Parse model response to extract text and metadata.
1137
+ The model may return text and metadata in various formats.
1138
+ Returns: (extracted_text, metadata_dict)
1139
+ """
1140
+ metadata = {}
1141
+ text = response_text
1142
+
1143
+ # Try to find JSON metadata section
1144
+ # Look for METADATA: or metadata: section
1145
+ metadata_patterns = [
1146
+ r'METADATA:\s*\n?\s*({.*?})(?:\n\n|\nTEXT|$)',
1147
+ r'metadata:\s*\n?\s*({.*?})(?:\n\n|\nTEXT|$)',
1148
+ r'METADATA:\s*\n?\s*```json\s*({.*?})\s*```',
1149
+ r'METADATA:\s*\n?\s*```\s*({.*?})\s*```',
1150
+ ]
1151
+
1152
+ for pattern in metadata_patterns:
1153
+ match = re.search(pattern, response_text, re.DOTALL | re.IGNORECASE)
1154
+ if match:
1155
+ try:
1156
+ metadata_json = match.group(1).strip()
1157
+ metadata = json.loads(metadata_json)
1158
+ # Remove metadata section from text
1159
+ text = response_text[:match.start()] + response_text[match.end():]
1160
+ break
1161
+ except (json.JSONDecodeError, IndexError):
1162
+ continue
1163
+
1164
+ # If no JSON found, try to extract metadata from structured text format
1165
+ if not metadata:
1166
+ # Look for key-value pairs in METADATA section
1167
+ metadata_section = re.search(r'METADATA:\s*\n(.*?)(?:\n\n|\nTEXT|$)', response_text, re.DOTALL | re.IGNORECASE)
1168
+ if metadata_section:
1169
+ metadata_text = metadata_section.group(1)
1170
+ # Parse key-value pairs
1171
+ for line in metadata_text.split('\n'):
1172
+ if ':' in line:
1173
+ parts = line.split(':', 1)
1174
+ if len(parts) == 2:
1175
+ key = parts[0].strip().lower().replace(' ', '_')
1176
+ value = parts[1].strip()
1177
+ if value:
1178
+ metadata[key] = value
1179
+
1180
+ # Extract TEXT section if present
1181
+ text_match = re.search(r'TEXT:\s*\n(.*?)(?:\n\nMETADATA|$)', response_text, re.DOTALL | re.IGNORECASE)
1182
+ if text_match:
1183
+ text = text_match.group(1).strip()
1184
+ else:
1185
+ # If no TEXT section, remove METADATA section if found
1186
+ text = re.sub(r'METADATA:.*', '', response_text, flags=re.DOTALL | re.IGNORECASE).strip()
1187
+
1188
+ # Clean up text
1189
+ text = text.strip()
1190
+
1191
+ # Clean up metadata - remove empty values
1192
+ metadata = {k: v for k, v in metadata.items() if v and str(v).strip()}
1193
+
1194
+ return text, metadata
1195
+
1196
+
1197
+ def _extract_footer_notes(text: str) -> List[str]:
1198
+ """
1199
+ Extract footer notes from document.
1200
+ Usually appears after the table.
1201
+ """
1202
+ notes = []
1203
+
1204
+ # Find table end
1205
+ lines = text.split('\n')
1206
+ table_end_idx = len(lines)
1207
+
1208
+ for i, line in enumerate(lines):
1209
+ if '|' in line:
1210
+ # Find last table line
1211
+ j = i + 1
1212
+ while j < len(lines) and ('|' in lines[j] or re.match(r'^[\s\|\-:]+$', lines[j])):
1213
+ j += 1
1214
+ table_end_idx = j
1215
+ break
1216
+
1217
+ # Extract footer text (after table)
1218
+ footer_lines = lines[table_end_idx:]
1219
+ footer_text = '\n'.join(footer_lines).strip()
1220
+
1221
+ # Split into sentences/notes
1222
+ # Look for sentences ending with period, exclamation, or specific keywords
1223
+ sentences = re.split(r'[।\.!]\s+', footer_text)
1224
+
1225
+ for sentence in sentences:
1226
+ sentence = sentence.strip()
1227
+ if len(sentence) > 20: # Only substantial notes
1228
+ # Clean up
1229
+ sentence = re.sub(r'\s+', ' ', sentence)
1230
+ if sentence:
1231
+ notes.append(sentence)
1232
+
1233
+ # Limit to most relevant notes (usually 2-4)
1234
+ return notes[:5]
1235
+
1236
+
1237
+ def _parse_text_with_tables(text: str, page_metadata: Dict[str, Any] = None) -> Dict[str, Any]:
1238
+ """
1239
+ Parse text and extract structured data including tables.
1240
+ Uses model-extracted metadata if provided, otherwise falls back to basic extraction.
1241
+ Returns structured JSON format with metadata, table, and footer_notes.
1242
+ """
1243
+ result = {
1244
+ "text": text, # Keep original text
1245
+ "metadata": page_metadata if page_metadata else {},
1246
+ "table": [],
1247
+ "footer_notes": []
1248
+ }
1249
+
1250
+ # Check if text contains a table
1251
+ table_data = _parse_markdown_table(text)
1252
+
1253
+ if table_data:
1254
+ headers, rows = table_data
1255
+ print(f"[INFO] Found table with {len(headers)} columns and {len(rows)} rows")
1256
+
1257
+ # Use provided metadata or extract basic metadata as fallback
1258
+ if not result["metadata"]:
1259
+ result["metadata"] = _extract_metadata(text)
1260
+
1261
+ # Map headers to field names using original header text
1262
+ # Keep original language, just make valid JSON keys and handle duplicates
1263
+ header_mapping = {}
1264
+ header_counts = {} # Track occurrences of each header
1265
+
1266
+ for i, header in enumerate(headers):
1267
+ header_clean = header.strip()
1268
+
1269
+ # Create a valid JSON key from the original header
1270
+ # Remove special characters that aren't valid in JSON keys, but keep the text
1271
+ # Replace spaces and special chars with underscores, but preserve the original text
1272
+ header_key = header_clean
1273
+
1274
+ # Track how many times we've seen this exact header
1275
+ if header_key not in header_counts:
1276
+ header_counts[header_key] = 0
1277
+
1278
+ header_counts[header_key] += 1
1279
+
1280
+ # If this header appears multiple times, append a number
1281
+ if header_counts[header_key] > 1:
1282
+ header_key = f"{header_key}_{header_counts[header_key]}"
1283
+
1284
+ # Clean the key to be valid for JSON (remove/replace problematic characters)
1285
+ # Keep the original text but make it JSON-safe
1286
+ header_key = re.sub(r'[^\w\s\u0900-\u097F]', '', header_key) # Keep Unicode Hindi chars
1287
+ header_key = re.sub(r'\s+', '_', header_key) # Replace spaces with underscores
1288
+
1289
+ # If key is empty after cleaning, use column index
1290
+ if not header_key:
1291
+ header_key = f"column_{i+1}"
1292
+
1293
+ header_mapping[i] = header_key
1294
+
1295
+ # Parse table rows - each row becomes a separate section
1296
+ table_rows_dict = {}
1297
+ for idx, row in enumerate(rows, start=1):
1298
+ row_dict = {}
1299
+ for i, header_idx in header_mapping.items():
1300
+ if i < len(row):
1301
+ row_dict[header_idx] = row[i].strip()
1302
+
1303
+ if row_dict:
1304
+ # Each row is a separate section: row_1, row_2, etc.
1305
+ table_rows_dict[f"row_{idx}"] = row_dict
1306
+
1307
+ # Store rows as separate sections instead of array
1308
+ result["table"] = table_rows_dict
1309
+
1310
+ # Extract footer notes
1311
+ result["footer_notes"] = _extract_footer_notes(text)
1312
+ else:
1313
+ # No table found, just extract basic metadata
1314
+ result["metadata"] = _extract_metadata(text)
1315
+ result["footer_notes"] = _extract_footer_notes(text)
1316
+
1317
+ return result
1318
+
1319
+
1320
+ async def _poll_runpod_job(job_id: str, client: httpx.AsyncClient, max_wait_time: int = 300) -> Dict[str, Any]:
1321
+ """
1322
+ Poll RunPod job status until completion.
1323
+ Returns the final job result with output.
1324
+ """
1325
+ headers = {
1326
+ "Content-Type": "application/json",
1327
+ "Authorization": f"Bearer {RUNPOD_API_KEY}"
1328
+ }
1329
+
1330
+ start_time = time.time()
1331
+ poll_interval = 2 # Poll every 2 seconds
1332
+
1333
+ while True:
1334
+ # Check timeout
1335
+ elapsed = time.time() - start_time
1336
+ if elapsed > max_wait_time:
1337
+ raise RuntimeError(f"Job {job_id} timed out after {max_wait_time} seconds")
1338
+
1339
+ # Poll job status
1340
+ status_url = f"{RUNPOD_STATUS_ENDPOINT}/{job_id}"
1341
+ response = await client.get(status_url, headers=headers)
1342
+ response.raise_for_status()
1343
+ status_result = response.json()
1344
+
1345
+ status = status_result.get("status", "").upper()
1346
+
1347
+ if status == "COMPLETED":
1348
+ print(f"[INFO] Job {job_id} completed successfully")
1349
+ return status_result
1350
+ elif status == "FAILED":
1351
+ error_msg = status_result.get("error", "Unknown error")
1352
+ raise RuntimeError(f"Job {job_id} failed: {error_msg}")
1353
+ elif status in ["IN_QUEUE", "IN_PROGRESS"]:
1354
+ print(f"[INFO] Job {job_id} status: {status}, waiting...")
1355
+ await asyncio.sleep(poll_interval)
1356
+ else:
1357
+ # Unknown status, wait and retry
1358
+ print(f"[INFO] Job {job_id} status: {status}, waiting...")
1359
+ await asyncio.sleep(poll_interval)
1360
+
1361
+
1362
+ async def _extract_text_with_ocr(image_bytes: bytes, page_num: int, total_pages: int, custom_prompt: str = None) -> Dict[str, Any]:
1363
+ """
1364
+ Extract text and metadata from a single page/image using the RunPod serverless OCR model.
1365
+ Uses model-driven extraction to identify and extract metadata fields dynamically.
1366
+ Returns text output in full_text field and extracted metadata.
1367
+
1368
+ Args:
1369
+ image_bytes: Image bytes to process
1370
+ page_num: Page number
1371
+ total_pages: Total number of pages
1372
+ custom_prompt: Optional custom prompt for field extraction
1373
+ """
1374
+ # Convert image bytes to base64
1375
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
1376
+
1377
+ print(f"[INFO] OCR: Processing page {page_num}/{total_pages} with RunPod endpoint")
1378
+
1379
+ try:
1380
+ # Use custom prompt if provided, otherwise use default
1381
+ if custom_prompt:
1382
+ metadata_prompt = custom_prompt
1383
+ else:
1384
+ # Default prompt for general text extraction
1385
+ metadata_prompt = """Extract all text from this image."""
1386
+
1387
+ # Prepare request payload for RunPod
1388
+ # RunPod serverless endpoints expect image_base64, image_url, or image_path
1389
+ payload = {
1390
+ "input": {
1391
+ "prompt": metadata_prompt,
1392
+ "image_base64": image_base64 # Base64 encoded image
1393
+ }
1394
+ }
1395
+
1396
+ # Make HTTP request to RunPod endpoint
1397
+ headers = {
1398
+ "Content-Type": "application/json",
1399
+ "Authorization": f"Bearer {RUNPOD_API_KEY}"
1400
+ }
1401
+
1402
+ async with httpx.AsyncClient(timeout=300.0) as client:
1403
+ # Submit job
1404
+ response = await client.post(
1405
+ RUNPOD_ENDPOINT,
1406
+ headers=headers,
1407
+ json=payload
1408
+ )
1409
+ response.raise_for_status()
1410
+ result = response.json()
1411
+
1412
+ # Check if this is an async job (has job ID and status)
1413
+ job_id = result.get("id")
1414
+ status = result.get("status", "").upper()
1415
+
1416
+ if job_id and status in ["IN_QUEUE", "IN_PROGRESS"]:
1417
+ # This is an async job, need to poll for completion
1418
+ print(f"[INFO] Job submitted with ID: {job_id}, status: {status}")
1419
+ if not RUNPOD_STATUS_ENDPOINT:
1420
+ raise RuntimeError("RunPod status endpoint not configured. Cannot poll async job.")
1421
+
1422
+ # Poll until completion
1423
+ result = await _poll_runpod_job(job_id, client)
1424
+
1425
+ # Extract text from RunPod response
1426
+ # RunPod serverless typically returns: {"id": "...", "status": "...", "output": "..."}
1427
+ # The output might be a string or a dict depending on the model
1428
+ extracted_text = ""
1429
+
1430
+ if "output" in result:
1431
+ output = result["output"]
1432
+ if isinstance(output, str):
1433
+ extracted_text = output
1434
+ elif isinstance(output, dict):
1435
+ # If output is a dict, try common fields
1436
+ extracted_text = output.get("text", output.get("result", output.get("content", "")))
1437
+ if not extracted_text and isinstance(output.get("text"), str):
1438
+ extracted_text = output["text"]
1439
+ elif isinstance(output, list) and len(output) > 0:
1440
+ # If output is a list, take the first element
1441
+ extracted_text = str(output[0])
1442
+ elif "result" in result:
1443
+ extracted_text = str(result["result"])
1444
+ elif "text" in result:
1445
+ extracted_text = str(result["text"])
1446
+ else:
1447
+ # Fallback: convert entire response to string
1448
+ extracted_text = str(result)
1449
+
1450
+ if not extracted_text:
1451
+ extracted_text = ""
1452
+
1453
+ print(f"[INFO] OCR: Extracted {len(extracted_text)} characters from page {page_num}")
1454
+
1455
+ # Parse model response to extract text and metadata
1456
+ parsed_text, parsed_metadata = _parse_model_response(extracted_text)
1457
+
1458
+ # Calculate confidence based on response quality
1459
+ # Create a mock response object for compatibility with confidence calculation
1460
+ mock_response = type('obj', (object,), {
1461
+ 'choices': [type('obj', (object,), {'finish_reason': 'stop'})()],
1462
+ 'usage': type('obj', (object,), {'completion_tokens': len(parsed_text.split())})()
1463
+ })()
1464
+ confidence = _calculate_ocr_confidence(mock_response, parsed_text)
1465
+
1466
+ # Determine document type from metadata if available
1467
+ doc_type = parsed_metadata.get("document_type", "other")
1468
+ if doc_type == "other" and parsed_metadata.get("title"):
1469
+ # Try to infer from title
1470
+ title_lower = parsed_metadata.get("title", "").lower()
1471
+ if any(kw in title_lower for kw in ["tender", "bid", "quotation"]):
1472
+ doc_type = "tender"
1473
+ elif any(kw in title_lower for kw in ["recruitment", "appointment", "vacancy"]):
1474
+ doc_type = "recruitment"
1475
+ elif any(kw in title_lower for kw in ["notice", "notification", "circular"]):
1476
+ doc_type = "notice"
1477
+
1478
+ # Return text and extracted metadata
1479
+ return {
1480
+ "doc_type": doc_type,
1481
+ "confidence": confidence,
1482
+ "full_text": parsed_text,
1483
+ "fields": parsed_metadata if parsed_metadata else {} # Model-extracted metadata
1484
+ }
1485
+
1486
+ except httpx.HTTPStatusError as e:
1487
+ error_msg = f"HTTP {e.response.status_code}: {e.response.text}"
1488
+ print(f"[ERROR] OCR API HTTP error for page {page_num}: {error_msg}")
1489
+ raise RuntimeError(f"OCR API error for page {page_num}: {error_msg}")
1490
+ except Exception as e:
1491
+ error_msg = str(e)
1492
+ print(f"[ERROR] OCR API error for page {page_num}: {error_msg}")
1493
+ raise RuntimeError(f"OCR API error for page {page_num}: {error_msg}")
1494
+
1495
+
1496
+ def _calculate_ocr_confidence(response, extracted_text: str) -> float:
1497
+ """
1498
+ Calculate confidence score based on OCR response quality.
1499
+ Returns a score from 0-100, with higher scores for better extraction quality.
1500
+ """
1501
+ # Start with a higher base confidence for successful extractions
1502
+ base_confidence = 92.0
1503
+
1504
+ # Adjust confidence based on text quality heuristics
1505
+ text_length = len(extracted_text.strip())
1506
+
1507
+ if text_length == 0:
1508
+ return 0.0
1509
+ elif text_length < 10:
1510
+ # Very short text - might be error or empty
1511
+ return max(30.0, base_confidence - 40.0)
1512
+ elif text_length < 50:
1513
+ # Short text - might be incomplete
1514
+ return max(60.0, base_confidence - 20.0)
1515
+ elif text_length > 1000:
1516
+ # Long text - likely good extraction
1517
+ confidence = min(100.0, base_confidence + 5.0)
1518
+ elif text_length > 500:
1519
+ # Medium-long text - good extraction
1520
+ confidence = min(100.0, base_confidence + 3.0)
1521
+ else:
1522
+ confidence = base_confidence
1523
+
1524
+ # Check for structured content (tables, etc.) - indicates good extraction
1525
+ if '|' in extracted_text and extracted_text.count('|') > 5:
1526
+ # Table detected - boost confidence significantly
1527
+ confidence = min(100.0, confidence + 6.0)
1528
+
1529
+ # Check for meaningful content (non-whitespace ratio)
1530
+ non_whitespace = len([c for c in extracted_text if not c.isspace()])
1531
+ if text_length > 0:
1532
+ content_ratio = non_whitespace / text_length
1533
+ if content_ratio > 0.85:
1534
+ # Very high content ratio - excellent extraction
1535
+ confidence = min(100.0, confidence + 5.0)
1536
+ elif content_ratio > 0.75:
1537
+ # High content ratio - good extraction
1538
+ confidence = min(100.0, confidence + 3.0)
1539
+ elif content_ratio > 0.6:
1540
+ # Moderate content ratio - decent extraction
1541
+ confidence = min(100.0, confidence + 1.0)
1542
+ elif content_ratio < 0.3:
1543
+ # Low content ratio - mostly whitespace
1544
+ confidence = max(60.0, confidence - 15.0)
1545
+
1546
+ # Check for common OCR quality indicators
1547
+ # Presence of numbers, dates, and structured patterns indicates good extraction
1548
+ has_numbers = any(c.isdigit() for c in extracted_text)
1549
+ has_letters = any(c.isalpha() for c in extracted_text)
1550
+ has_punctuation = any(c in '.,;:!?()[]{}' for c in extracted_text)
1551
+
1552
+ if has_numbers and has_letters and has_punctuation:
1553
+ # Well-structured text with mixed content - high confidence
1554
+ confidence = min(100.0, confidence + 2.0)
1555
+
1556
+ # Cap at 100% and ensure minimum quality threshold
1557
+ return round(min(100.0, max(0.0, confidence)), 1)
1558
+
1559
+
1560
+ async def extract_fields_from_document(
1561
+ file_bytes: bytes,
1562
+ content_type: str,
1563
+ filename: str,
1564
+ key_fields: str = None,
1565
+ ) -> Dict[str, Any]:
1566
+ """
1567
+ Extract text from document using OCR model.
1568
+ Processes pages separately for better reliability.
1569
+ Returns text output in full_text, keeps JSON/XML fields empty for now.
1570
+ """
1571
+ # Get raw image bytes for processing
1572
+ if content_type == "application/pdf" or content_type.endswith("/pdf"):
1573
+ if not PDF_SUPPORT:
1574
+ raise RuntimeError("PDF support requires PyMuPDF. Please install it.")
1575
+ # For PDFs, convert to images
1576
+ pdf_images = _pdf_to_images(file_bytes)
1577
+ image_bytes_list = pdf_images
1578
+ else:
1579
+ # For regular images, process the file bytes
1580
+ # Convert to JPEG for consistency
1581
+ try:
1582
+ img = Image.open(BytesIO(file_bytes))
1583
+ if img.mode != "RGB":
1584
+ img = img.convert("RGB")
1585
+
1586
+ # Resize if too large (max 1920px on longest side)
1587
+ max_size = 1920
1588
+ w, h = img.size
1589
+ if w > max_size or h > max_size:
1590
+ if w > h:
1591
+ new_w = max_size
1592
+ new_h = int(h * (max_size / w))
1593
+ else:
1594
+ new_h = max_size
1595
+ new_w = int(w * (max_size / h))
1596
+ img = img.resize((new_w, new_h), Image.LANCZOS)
1597
+ print(f"[INFO] Resized image from {w}x{h} to {new_w}x{new_h}")
1598
+
1599
+ # Convert to JPEG bytes
1600
+ img_bytes = BytesIO()
1601
+ img.save(img_bytes, format="JPEG", quality=95)
1602
+ image_bytes_list = [img_bytes.getvalue()]
1603
+ except Exception as e:
1604
+ # Fallback: use original file bytes
1605
+ print(f"[WARNING] Could not process image with PIL: {e}. Using original bytes.")
1606
+ image_bytes_list = [file_bytes]
1607
+
1608
+ total_pages = len(image_bytes_list)
1609
+ print(f"[INFO] Processing {total_pages} page(s) with OCR model...")
1610
+
1611
+ # Process each page separately
1612
+ page_results = []
1613
+ for page_num, img_bytes in enumerate(image_bytes_list):
1614
+ print(f"[INFO] Processing page {page_num + 1}/{total_pages}...")
1615
+ try:
1616
+ page_result = await _extract_text_with_ocr(img_bytes, page_num + 1, total_pages, None)
1617
+ page_results.append({
1618
+ "page_number": page_num + 1,
1619
+ "text": page_result.get("full_text", ""),
1620
+ "fields": page_result.get("fields", {}),
1621
+ "confidence": page_result.get("confidence", 0),
1622
+ "doc_type": page_result.get("doc_type", "other"),
1623
+ })
1624
+ print(f"[INFO] Page {page_num + 1} processed successfully")
1625
+ except Exception as e:
1626
+ print(f"[ERROR] Failed to process page {page_num + 1}: {e}")
1627
+ page_results.append({
1628
+ "page_number": page_num + 1,
1629
+ "text": "",
1630
+ "fields": {},
1631
+ "confidence": 0,
1632
+ "error": str(e)
1633
+ })
1634
+
1635
+ # Combine results from all pages
1636
+ combined_full_text = "\n\n".join([f"=== PAGE {p['page_number']} ===\n\n{p['text']}" for p in page_results if p.get("text")])
1637
+
1638
+ # Extract user-specified fields if key_fields provided
1639
+ extracted_fields = {}
1640
+ if key_fields and key_fields.strip():
1641
+ # Parse user input: "Invoice Number, Invoice Date, PO Number" -> ['Invoice Number', 'Invoice Date', 'PO Number']
1642
+ field_list = [f.strip() for f in key_fields.split(',') if f.strip()]
1643
+ if field_list:
1644
+ print(f"[INFO] Extracting user-specified fields: {field_list}")
1645
+
1646
+ # Format fields as JSON array string for prompt
1647
+ fields_json = json.dumps(field_list)
1648
+ custom_prompt = f"Extract the following fields from this image and return as JSON: {fields_json}. Return only a valid JSON object with the field names as keys and their extracted values."
1649
+
1650
+ # Run second OCR pass on first page (usually has most metadata) with custom prompt
1651
+ if image_bytes_list and len(image_bytes_list) > 0:
1652
+ try:
1653
+ print("[INFO] Running second OCR pass for field extraction...")
1654
+ field_result = await _extract_text_with_ocr(image_bytes_list[0], 1, 1, custom_prompt)
1655
+ field_text = field_result.get("full_text", "")
1656
+
1657
+ # Try to parse JSON from the response
1658
+ try:
1659
+ # Look for JSON in the response
1660
+ json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', field_text, re.DOTALL)
1661
+ if json_match:
1662
+ extracted_fields = json.loads(json_match.group(0))
1663
+ print(f"[INFO] Successfully extracted {len(extracted_fields)} fields from second OCR pass")
1664
+ else:
1665
+ # Try parsing the entire response as JSON
1666
+ extracted_fields = json.loads(field_text)
1667
+ print(f"[INFO] Successfully extracted {len(extracted_fields)} fields from second OCR pass")
1668
+ except json.JSONDecodeError:
1669
+ print(f"[WARNING] Could not parse JSON from field extraction response: {field_text[:200]}")
1670
+ extracted_fields = {}
1671
+ except Exception as e:
1672
+ print(f"[WARNING] Field extraction failed: {e}")
1673
+ extracted_fields = {}
1674
+
1675
+ # Parse each page for tables and structure the output
1676
+ structured_pages = {}
1677
+ for page_result in page_results:
1678
+ if page_result.get("text"):
1679
+ page_num = page_result.get("page_number", 1)
1680
+ page_text = page_result.get("text", "")
1681
+
1682
+ # Parse text for tables and structure
1683
+ parsed_data = _parse_text_with_tables(page_text, {})
1684
+
1685
+ # Build structured page output (without Fields - moved to root level)
1686
+ page_key = f"page_{page_num}"
1687
+ structured_pages[page_key] = {
1688
+ "text": parsed_data["text"],
1689
+ "table": parsed_data["table"],
1690
+ "footer_notes": parsed_data["footer_notes"],
1691
+ "confidence": page_result.get("confidence", 0),
1692
+ "doc_type": page_result.get("doc_type", "other")
1693
+ }
1694
+
1695
+ # If we have structured pages, use them; otherwise keep fields empty
1696
+ if structured_pages:
1697
+ # Always return pages with page_X keys (even for single page)
1698
+ combined_fields = structured_pages
1699
+ else:
1700
+ combined_fields = {}
1701
+
1702
+ # Calculate average confidence
1703
+ confidences = [p.get("confidence", 0) for p in page_results if p.get("confidence", 0) > 0]
1704
+ avg_confidence = sum(confidences) / len(confidences) if confidences else 0
1705
+
1706
+ # Determine doc_type from first successful page
1707
+ doc_type = "other"
1708
+ for page_result in page_results:
1709
+ if page_result.get("doc_type") and page_result["doc_type"] != "other":
1710
+ doc_type = page_result["doc_type"]
1711
+ break
1712
+
1713
+ # Build return object - add Fields at root level only if extracted_fields is not empty
1714
+ return_obj = {
1715
+ "doc_type": doc_type,
1716
+ "confidence": avg_confidence,
1717
+ "full_text": combined_full_text,
1718
+ "fields": combined_fields, # Now contains structured data with tables
1719
+ "pages": page_results
1720
+ }
1721
+
1722
+ # Add Fields at root level only if user provided key_fields and extraction succeeded
1723
+ if extracted_fields:
1724
+ return_obj["Fields"] = extracted_fields
1725
+
1726
+ return return_obj
1727
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
backend/app/otp_service.py CHANGED
@@ -1,3 +1,4 @@
 
1
  """
2
  OTP (One-Time Password) service for email-based authentication.
3
  """
@@ -195,3 +196,202 @@ async def verify_otp(email: str, otp: str, db: Session) -> User:
195
 
196
  return user
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  """
3
  OTP (One-Time Password) service for email-based authentication.
4
  """
 
196
 
197
  return user
198
 
199
+ =======
200
+ """
201
+ OTP (One-Time Password) service for email-based authentication.
202
+ """
203
+ import random
204
+ import string
205
+ from datetime import datetime, timedelta
206
+ from typing import Dict, Optional
207
+ from sqlalchemy.orm import Session
208
+ from fastapi import HTTPException
209
+ from .models import User
210
+ from .brevo_service import send_otp_email
211
+
212
+ # Store OTPs in memory (in production, use Redis or database)
213
+ otp_store: Dict[str, dict] = {}
214
+
215
+
216
+ def generate_otp(length: int = 6) -> str:
217
+ """
218
+ Generate a random OTP code.
219
+
220
+ Args:
221
+ length: Length of OTP (default: 6)
222
+
223
+ Returns:
224
+ Random OTP string
225
+ """
226
+ return ''.join(random.choices(string.digits, k=length))
227
+
228
+
229
+ async def request_otp(email: str, db: Session) -> dict:
230
+ """
231
+ Generate and send OTP to email using Brevo.
232
+
233
+ Args:
234
+ email: Email address to send OTP to
235
+ db: Database session
236
+
237
+ Returns:
238
+ Dictionary with success message
239
+ """
240
+ # Generate OTP
241
+ otp = generate_otp()
242
+ expires_at = datetime.utcnow() + timedelta(minutes=10)
243
+
244
+ # Store OTP (in production, use Redis or database with TTL)
245
+ otp_store[email.lower()] = {
246
+ 'otp': otp,
247
+ 'expires_at': expires_at,
248
+ 'attempts': 0,
249
+ 'max_attempts': 5
250
+ }
251
+
252
+ # Send OTP via Brevo
253
+ try:
254
+ await send_otp_email(email, otp)
255
+ print(f"[INFO] OTP generated and sent to {email}")
256
+ except Exception as e:
257
+ # Remove OTP from store if email sending failed
258
+ if email.lower() in otp_store:
259
+ del otp_store[email.lower()]
260
+ raise HTTPException(
261
+ status_code=500,
262
+ detail=f"Failed to send OTP email: {str(e)}"
263
+ )
264
+
265
+ return {
266
+ "message": "OTP sent to your email address",
267
+ "expires_in_minutes": 10
268
+ }
269
+
270
+
271
+ async def verify_otp(email: str, otp: str, db: Session) -> User:
272
+ """
273
+ Verify OTP and return/create user.
274
+
275
+ Args:
276
+ email: Email address
277
+ otp: OTP code to verify
278
+ db: Database session
279
+
280
+ Returns:
281
+ User object
282
+
283
+ Raises:
284
+ HTTPException: If OTP is invalid, expired, or max attempts exceeded
285
+ """
286
+ email_lower = email.lower()
287
+ stored = otp_store.get(email_lower)
288
+
289
+ if not stored:
290
+ raise HTTPException(
291
+ status_code=400,
292
+ detail="OTP not found. Please request a new OTP."
293
+ )
294
+
295
+ # Check if expired
296
+ if datetime.utcnow() > stored['expires_at']:
297
+ del otp_store[email_lower]
298
+ raise HTTPException(
299
+ status_code=400,
300
+ detail="OTP has expired. Please request a new OTP."
301
+ )
302
+
303
+ # Check max attempts
304
+ if stored['attempts'] >= stored['max_attempts']:
305
+ del otp_store[email_lower]
306
+ raise HTTPException(
307
+ status_code=400,
308
+ detail="Maximum verification attempts exceeded. Please request a new OTP."
309
+ )
310
+
311
+ # Verify OTP
312
+ if stored['otp'] != otp:
313
+ stored['attempts'] += 1
314
+ remaining_attempts = stored['max_attempts'] - stored['attempts']
315
+ raise HTTPException(
316
+ status_code=400,
317
+ detail=f"Invalid OTP. {remaining_attempts} attempt(s) remaining."
318
+ )
319
+
320
+ # OTP verified successfully
321
+ # Get or create user
322
+ user = db.query(User).filter(User.email == email_lower).first()
323
+
324
+ if not user:
325
+ user = User(
326
+ email=email_lower,
327
+ auth_method='otp',
328
+ email_verified=True
329
+ )
330
+ db.add(user)
331
+ db.commit()
332
+ db.refresh(user)
333
+ print(f"[INFO] New user created via OTP: {email_lower}")
334
+
335
+ # Enrich contact data from Apollo.io and update Brevo + Monday.com
336
+ try:
337
+ from .apollo_service import enrich_contact_by_email
338
+ from .brevo_service import create_brevo_contact, BREVO_TRIAL_LIST_ID
339
+ from .monday_service import create_monday_lead
340
+
341
+ # Enrich contact data from Apollo.io
342
+ enriched_data = await enrich_contact_by_email(email_lower)
343
+
344
+ # Use enriched data if available
345
+ first_name = enriched_data.get("first_name") if enriched_data else None
346
+ last_name = enriched_data.get("last_name") if enriched_data else None
347
+ org_name = enriched_data.get("organization_name") if enriched_data else None
348
+
349
+ # Fallback to email domain if Apollo didn't provide organization
350
+ if not org_name:
351
+ org_domain = email_lower.split('@')[1] if '@' in email_lower else None
352
+ org_name = org_domain.split('.')[0].capitalize() if org_domain else None
353
+
354
+ # Update Brevo contact with enriched data
355
+ await create_brevo_contact(
356
+ email=email_lower,
357
+ first_name=first_name,
358
+ last_name=last_name,
359
+ organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
360
+ phone_number=enriched_data.get("phone_number") if enriched_data else None,
361
+ linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
362
+ title=enriched_data.get("title") if enriched_data else None,
363
+ headline=enriched_data.get("headline") if enriched_data else None,
364
+ organization_website=enriched_data.get("organization_website") if enriched_data else None,
365
+ organization_address=enriched_data.get("organization_address") if enriched_data else None,
366
+ list_id=BREVO_TRIAL_LIST_ID
367
+ )
368
+
369
+ # Create lead in Monday.com
370
+ await create_monday_lead(
371
+ email=email_lower,
372
+ first_name=first_name,
373
+ last_name=last_name,
374
+ phone_number=enriched_data.get("phone_number") if enriched_data else None,
375
+ linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
376
+ title=enriched_data.get("title") if enriched_data else None,
377
+ headline=enriched_data.get("headline") if enriched_data else None,
378
+ organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
379
+ organization_website=enriched_data.get("organization_website") if enriched_data else None,
380
+ organization_address=enriched_data.get("organization_address") if enriched_data else None,
381
+ )
382
+ except Exception as e:
383
+ # Don't fail user creation if integrations fail
384
+ print(f"[WARNING] Failed to enrich/update contact for {email_lower}: {str(e)}")
385
+ else:
386
+ user.email_verified = True
387
+ if user.auth_method != 'otp':
388
+ user.auth_method = 'otp'
389
+ db.commit()
390
+ print(f"[INFO] User verified via OTP: {email_lower}")
391
+
392
+ # Remove OTP from store after successful verification
393
+ del otp_store[email_lower]
394
+
395
+ return user
396
+
397
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
backend/app/schemas.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from pydantic import BaseModel
2
  from typing import Dict, Optional
3
  from datetime import datetime
@@ -24,3 +25,31 @@ class ExtractionRecordBase(BaseModel):
24
 
25
  class Config:
26
  from_attributes = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  from pydantic import BaseModel
3
  from typing import Dict, Optional
4
  from datetime import datetime
 
25
 
26
  class Config:
27
  from_attributes = True
28
+ =======
29
+ from pydantic import BaseModel
30
+ from typing import Dict, Optional
31
+ from datetime import datetime
32
+
33
+
34
+ class ExtractionStage(BaseModel):
35
+ time: int
36
+ status: str
37
+ variation: str
38
+
39
+
40
+ class ExtractionRecordBase(BaseModel):
41
+ id: int
42
+ fileName: str
43
+ fileType: str
44
+ fileSize: str
45
+ extractedAt: datetime
46
+ status: str
47
+ confidence: float
48
+ fieldsExtracted: int
49
+ totalTime: int
50
+ stages: Dict[str, ExtractionStage]
51
+ errorMessage: Optional[str] = None
52
+
53
+ class Config:
54
+ from_attributes = True
55
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
backend/requirements.txt CHANGED
@@ -1,15 +1,15 @@
1
- fastapi
2
- uvicorn[standard]
3
- python-multipart
4
- pydantic[email]
5
- sqlalchemy
6
- httpx
7
- python-dotenv
8
- pymupdf
9
- pillow
10
- huggingface-hub
11
- openai
12
- firebase-admin
13
- pyjwt
14
- python-jose[cryptography]
15
  email-validator
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-multipart
4
+ pydantic[email]
5
+ sqlalchemy
6
+ httpx
7
+ python-dotenv
8
+ pymupdf
9
+ pillow
10
+ huggingface-hub
11
+ openai
12
+ firebase-admin
13
+ pyjwt
14
+ python-jose[cryptography]
15
  email-validator
frontend/index.html CHANGED
@@ -1,3 +1,4 @@
 
1
  <!doctype html>
2
  <html lang="en">
3
  <head>
@@ -11,3 +12,18 @@
11
  <script type="module" src="/src/main.jsx"></script>
12
  </body>
13
  </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  <!doctype html>
3
  <html lang="en">
4
  <head>
 
12
  <script type="module" src="/src/main.jsx"></script>
13
  </body>
14
  </html>
15
+ =======
16
+ <!doctype html>
17
+ <html lang="en">
18
+ <head>
19
+ <meta charset="UTF-8" />
20
+ <link rel="icon" type="image/png" href="/logo.png" />
21
+ <title>EZOFIS AI - VRP Document Intelligence</title>
22
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
23
+ </head>
24
+ <body class="bg-[#FAFAFA]">
25
+ <div id="root"></div>
26
+ <script type="module" src="/src/main.jsx"></script>
27
+ </body>
28
+ </html>
29
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/package.json CHANGED
@@ -1,26 +1,26 @@
1
- {
2
- "name": "document-capture-demo",
3
- "version": "1.0.0",
4
- "private": true,
5
- "scripts": {
6
- "dev": "vite",
7
- "build": "vite build",
8
- "preview": "vite preview"
9
- },
10
- "dependencies": {
11
- "react": "^18.3.1",
12
- "react-dom": "^18.3.1",
13
- "react-router-dom": "^6.26.2",
14
- "framer-motion": "^11.0.0",
15
- "lucide-react": "^0.471.0",
16
- "pdfjs-dist": "^4.0.379",
17
- "firebase": "^10.7.1"
18
- },
19
- "devDependencies": {
20
- "@vitejs/plugin-react": "^4.1.0",
21
- "autoprefixer": "^10.4.20",
22
- "postcss": "^8.4.47",
23
- "tailwindcss": "^3.4.14",
24
- "vite": "^5.4.0"
25
- }
26
- }
 
1
+ {
2
+ "name": "document-capture-demo",
3
+ "version": "1.0.0",
4
+ "private": true,
5
+ "scripts": {
6
+ "dev": "vite",
7
+ "build": "vite build",
8
+ "preview": "vite preview"
9
+ },
10
+ "dependencies": {
11
+ "react": "^18.3.1",
12
+ "react-dom": "^18.3.1",
13
+ "react-router-dom": "^6.26.2",
14
+ "framer-motion": "^11.0.0",
15
+ "lucide-react": "^0.471.0",
16
+ "pdfjs-dist": "^4.0.379",
17
+ "firebase": "^10.7.1"
18
+ },
19
+ "devDependencies": {
20
+ "@vitejs/plugin-react": "^4.1.0",
21
+ "autoprefixer": "^10.4.20",
22
+ "postcss": "^8.4.47",
23
+ "tailwindcss": "^3.4.14",
24
+ "vite": "^5.4.0"
25
+ }
26
+ }
frontend/src/App.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  // frontend/src/App.jsx
2
 
3
  import React, { useEffect } from "react";
@@ -104,3 +105,111 @@ export default function App() {
104
  </AuthProvider>
105
  );
106
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  // frontend/src/App.jsx
3
 
4
  import React, { useEffect } from "react";
 
105
  </AuthProvider>
106
  );
107
  }
108
+ =======
109
+ // frontend/src/App.jsx
110
+
111
+ import React, { useEffect } from "react";
112
+ import { Routes, Route, useNavigate, useSearchParams } from "react-router-dom";
113
+ import { AuthProvider, useAuth } from "./contexts/AuthContext";
114
+ import Layout from "./Layout";
115
+ import Dashboard from "./pages/Dashboard";
116
+ import History from "./pages/History";
117
+ import ShareHandler from "./pages/ShareHandler";
118
+ import LoginForm from "./components/auth/LoginForm";
119
+
120
+ // Auth callback handler component
121
+ function AuthCallback() {
122
+ const [searchParams] = useSearchParams();
123
+ const { handleAuthCallback } = useAuth();
124
+ const navigate = useNavigate();
125
+
126
+ useEffect(() => {
127
+ const token = searchParams.get("token");
128
+ if (token) {
129
+ handleAuthCallback(token);
130
+ navigate("/");
131
+ } else {
132
+ navigate("/");
133
+ }
134
+ }, [searchParams, handleAuthCallback, navigate]);
135
+
136
+ return (
137
+ <div className="min-h-screen flex items-center justify-center">
138
+ <div className="text-center">
139
+ <p className="text-slate-600">Completing authentication...</p>
140
+ </div>
141
+ </div>
142
+ );
143
+ }
144
+
145
+ // Protected route wrapper
146
+ function ProtectedRoute({ children }) {
147
+ const { isAuthenticated, loading } = useAuth();
148
+
149
+ if (loading) {
150
+ return (
151
+ <div className="min-h-screen flex items-center justify-center">
152
+ <div className="text-center">
153
+ <div className="h-16 w-16 mx-auto rounded-2xl bg-indigo-100 flex items-center justify-center mb-4 animate-pulse">
154
+ <div className="h-8 w-8 rounded-lg bg-indigo-600"></div>
155
+ </div>
156
+ <p className="text-slate-600">Loading...</p>
157
+ </div>
158
+ </div>
159
+ );
160
+ }
161
+
162
+ if (!isAuthenticated) {
163
+ return <LoginForm />;
164
+ }
165
+
166
+ return children;
167
+ }
168
+
169
+ function AppRoutes() {
170
+ return (
171
+ <Routes>
172
+ <Route
173
+ path="/auth/callback"
174
+ element={<AuthCallback />}
175
+ />
176
+ <Route
177
+ path="/share/:token"
178
+ element={
179
+ <ProtectedRoute>
180
+ <ShareHandler />
181
+ </ProtectedRoute>
182
+ }
183
+ />
184
+ <Route
185
+ path="/"
186
+ element={
187
+ <ProtectedRoute>
188
+ <Layout currentPageName="Dashboard">
189
+ <Dashboard />
190
+ </Layout>
191
+ </ProtectedRoute>
192
+ }
193
+ />
194
+ <Route
195
+ path="/history"
196
+ element={
197
+ <ProtectedRoute>
198
+ <Layout currentPageName="History">
199
+ <History />
200
+ </Layout>
201
+ </ProtectedRoute>
202
+ }
203
+ />
204
+ </Routes>
205
+ );
206
+ }
207
+
208
+ export default function App() {
209
+ return (
210
+ <AuthProvider>
211
+ <AppRoutes />
212
+ </AuthProvider>
213
+ );
214
+ }
215
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/Layout.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  // frontend/src/Layout.jsx
2
 
3
  import React, { useState } from "react";
@@ -177,3 +178,184 @@ export default function Layout({ children, currentPageName }) {
177
  </div>
178
  );
179
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  // frontend/src/Layout.jsx
3
 
4
  import React, { useState } from "react";
 
178
  </div>
179
  );
180
  }
181
+ =======
182
+ // frontend/src/Layout.jsx
183
+
184
+ import React, { useState } from "react";
185
+ import { Link } from "react-router-dom";
186
+ import { createPageUrl } from "./utils";
187
+ import {
188
+ LayoutDashboard,
189
+ History as HistoryIcon,
190
+ ChevronLeft,
191
+ Sparkles,
192
+ LogOut,
193
+ User,
194
+ } from "lucide-react";
195
+ import { cn } from "@/lib/utils";
196
+ import { useAuth } from "./contexts/AuthContext";
197
+
198
+ // Import logo - Vite will process this and handle the path correctly
199
+ // For production, the logo should be in frontend/public/logo.png
200
+ // Vite will copy it to dist/logo.png during build
201
+ const logoPath = "/logo.png";
202
+
203
+ export default function Layout({ children, currentPageName }) {
204
+ const [collapsed, setCollapsed] = useState(false);
205
+ const { user, logout } = useAuth();
206
+
207
+ const navItems = [
208
+ { name: "Dashboard", icon: LayoutDashboard, page: "Dashboard" },
209
+ { name: "History", icon: HistoryIcon, page: "History" },
210
+ ];
211
+
212
+ return (
213
+ <div className="min-h-screen bg-[#FAFAFA] flex">
214
+ {/* Sidebar */}
215
+ <aside
216
+ className={cn(
217
+ "fixed left-0 top-0 h-screen bg-white border-r border-slate-200/80 z-50 transition-all duration-300 ease-out flex flex-col",
218
+ collapsed ? "w-[72px]" : "w-[260px]"
219
+ )}
220
+ >
221
+ {/* Logo */}
222
+ <div
223
+ className={cn(
224
+ "h-16 flex items-center border-b border-slate-100 px-4",
225
+ collapsed ? "justify-center" : "justify-between"
226
+ )}
227
+ >
228
+ <Link to={createPageUrl("Dashboard")} className="flex items-center gap-3">
229
+ <div className="h-9 w-9 flex items-center justify-center flex-shrink-0">
230
+ <img
231
+ src={logoPath}
232
+ alt="EZOFIS AI Logo"
233
+ className="h-full w-full object-contain"
234
+ onError={(e) => {
235
+ // Fallback: hide image and show placeholder if logo not found
236
+ e.target.style.display = 'none';
237
+ }}
238
+ />
239
+ </div>
240
+ {!collapsed && (
241
+ <div className="flex flex-col">
242
+ <span className="font-semibold text-slate-900 tracking-tight">EZOFIS AI</span>
243
+ <span className="text-[10px] text-slate-400 font-medium tracking-wide uppercase">
244
+ VRP Intelligence
245
+ </span>
246
+ </div>
247
+ )}
248
+ </Link>
249
+ {!collapsed && (
250
+ <button
251
+ onClick={() => setCollapsed(true)}
252
+ className="h-7 w-7 rounded-lg hover:bg-slate-100 flex items-center justify-center text-slate-400 hover:text-slate-600 transition-colors"
253
+ >
254
+ <ChevronLeft className="h-4 w-4" />
255
+ </button>
256
+ )}
257
+ </div>
258
+
259
+ {/* Navigation */}
260
+ <nav className="flex-1 p-3 space-y-1">
261
+ {navItems.map((item) => {
262
+ const isActive = currentPageName === item.page;
263
+ return (
264
+ <Link
265
+ key={item.name}
266
+ to={createPageUrl(item.page)}
267
+ className={cn(
268
+ "flex items-center gap-3 px-3 py-2.5 rounded-xl transition-all duration-200 group",
269
+ isActive
270
+ ? "bg-gradient-to-r from-indigo-50 to-violet-50 text-indigo-600"
271
+ : "text-slate-500 hover:bg-slate-50 hover:text-slate-700"
272
+ )}
273
+ >
274
+ <item.icon
275
+ className={cn(
276
+ "h-5 w-5 flex-shrink-0",
277
+ isActive ? "text-indigo-600" : "text-slate-400 group-hover:text-slate-600"
278
+ )}
279
+ />
280
+ {!collapsed && (
281
+ <span className="font-medium text-sm">{item.name}</span>
282
+ )}
283
+ </Link>
284
+ );
285
+ })}
286
+ </nav>
287
+
288
+ {/* Collapse Toggle (when collapsed) */}
289
+ {collapsed && (
290
+ <button
291
+ onClick={() => setCollapsed(false)}
292
+ className="m-3 h-10 rounded-xl bg-slate-50 hover:bg-slate-100 flex items-center justify-center text-slate-400 hover:text-slate-600 transition-colors"
293
+ >
294
+ <ChevronLeft className="h-4 w-4 rotate-180" />
295
+ </button>
296
+ )}
297
+
298
+ {/* Pro Badge */}
299
+ {!collapsed && (
300
+ <div className="p-3">
301
+ <div className="p-4 rounded-2xl bg-gradient-to-br from-slate-900 to-slate-800 text-white">
302
+ <div className="flex items-center gap-2 mb-2">
303
+ <Sparkles className="h-4 w-4 text-amber-400" />
304
+ <span className="text-xs font-semibold tracking-wide">DEPLOY CUSTOM AGENT</span>
305
+ </div>
306
+ <p className="text-xs text-slate-400 mb-3">
307
+ Batch extractions, custom model, field mapping, complex lineitems, tables, workflows, &amp; API access
308
+ </p>
309
+ <button className="w-full py-2 px-3 rounded-lg bg-white text-slate-900 text-sm font-semibold hover:bg-slate-100 transition-colors">
310
+ Book a Custom Demo
311
+ </button>
312
+ </div>
313
+ </div>
314
+ )}
315
+
316
+ {/* User Profile */}
317
+ {!collapsed && user && (
318
+ <div className="p-3 border-t border-slate-200">
319
+ <div className="flex items-center gap-3 p-3 rounded-xl bg-slate-50 hover:bg-slate-100 transition-colors">
320
+ {user.picture ? (
321
+ <img
322
+ src={user.picture}
323
+ alt={user.name || user.email}
324
+ className="h-10 w-10 rounded-lg object-cover"
325
+ />
326
+ ) : (
327
+ <div className="h-10 w-10 rounded-lg bg-indigo-100 flex items-center justify-center">
328
+ <User className="h-5 w-5 text-indigo-600" />
329
+ </div>
330
+ )}
331
+ <div className="flex-1 min-w-0">
332
+ <p className="text-sm font-medium text-slate-900 truncate">
333
+ {user.name || "User"}
334
+ </p>
335
+ <p className="text-xs text-slate-500 truncate">{user.email}</p>
336
+ </div>
337
+ </div>
338
+ <button
339
+ onClick={logout}
340
+ className="mt-2 w-full flex items-center gap-2 px-3 py-2 rounded-xl text-sm text-slate-600 hover:bg-red-50 hover:text-red-600 transition-colors"
341
+ >
342
+ <LogOut className="h-4 w-4" />
343
+ <span>Sign Out</span>
344
+ </button>
345
+ </div>
346
+ )}
347
+ </aside>
348
+
349
+ {/* Main Content */}
350
+ <main
351
+ className={cn(
352
+ "flex-1 transition-all duration-300",
353
+ collapsed ? "ml-[72px]" : "ml-[260px]"
354
+ )}
355
+ >
356
+ {children}
357
+ </main>
358
+ </div>
359
+ );
360
+ }
361
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/components/ExportButtons.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import React, { useState } from "react";
2
  import { motion, AnimatePresence } from "framer-motion";
3
  import {
@@ -690,3 +691,697 @@ ${htmlContent}
690
  </motion.div>
691
  );
692
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import React, { useState } from "react";
3
  import { motion, AnimatePresence } from "framer-motion";
4
  import {
 
691
  </motion.div>
692
  );
693
  }
694
+ =======
695
+ import React, { useState } from "react";
696
+ import { motion, AnimatePresence } from "framer-motion";
697
+ import {
698
+ Download,
699
+ Braces,
700
+ FileCode2,
701
+ Check,
702
+ Share2,
703
+ FileText,
704
+ Link2,
705
+ Mail,
706
+ } from "lucide-react";
707
+ import { Button } from "@/components/ui/button";
708
+ import {
709
+ DropdownMenu,
710
+ DropdownMenuContent,
711
+ DropdownMenuItem,
712
+ DropdownMenuSeparator,
713
+ DropdownMenuTrigger,
714
+ } from "@/components/ui/dropdown-menu";
715
+ import { cn } from "@/lib/utils";
716
+ import ShareModal from "@/components/ShareModal";
717
+ import ShareLinkModal from "@/components/ShareLinkModal";
718
+ import { shareExtraction, createShareLink } from "@/services/api";
719
+
720
+ // Helper functions from ExtractionOutput
721
+ function prepareFieldsForOutput(fields, format = "json") {
722
+ if (!fields || typeof fields !== "object") {
723
+ return fields;
724
+ }
725
+
726
+ const output = { ...fields };
727
+
728
+ // Extract Fields from root level if it exists
729
+ const rootFields = output.Fields;
730
+ // Remove Fields from output temporarily (will be added back at top)
731
+ delete output.Fields;
732
+
733
+ // Remove full_text from top-level if pages array exists (to avoid duplication)
734
+ if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
735
+ delete output.full_text;
736
+
737
+ // Clean up each page: remove full_text from page.fields (it duplicates page.text)
738
+ output.pages = output.pages.map(page => {
739
+ const cleanedPage = { ...page };
740
+ if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
741
+ const cleanedFields = { ...cleanedPage.fields };
742
+ // Remove full_text from page fields (duplicates page.text)
743
+ delete cleanedFields.full_text;
744
+ cleanedPage.fields = cleanedFields;
745
+ }
746
+ return cleanedPage;
747
+ });
748
+ }
749
+
750
+ // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
751
+ if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
752
+ // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
753
+ const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields"));
754
+
755
+ output.pages.forEach((page, idx) => {
756
+ const pageNum = page.page_number || idx + 1;
757
+ const pageFields = page.fields || {};
758
+
759
+ // Remove duplicate fields from page.fields:
760
+ // 1. Remove full_text (duplicates page.text)
761
+ // 2. Remove fields that match top-level fields (already shown at root)
762
+ const cleanedPageFields = {};
763
+ for (const [key, value] of Object.entries(pageFields)) {
764
+ // Skip full_text and fields that match top-level exactly
765
+ if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
766
+ cleanedPageFields[key] = value;
767
+ }
768
+ }
769
+
770
+ const pageObj = {
771
+ text: page.text || "",
772
+ confidence: page.confidence || 0,
773
+ doc_type: page.doc_type || "other"
774
+ };
775
+
776
+ // Add table and footer_notes if they exist
777
+ if (page.table && Array.isArray(page.table) && page.table.length > 0) {
778
+ pageObj.table = page.table;
779
+ }
780
+ if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) {
781
+ pageObj.footer_notes = page.footer_notes;
782
+ }
783
+
784
+ // Only add fields if there are unique page-specific fields
785
+ if (Object.keys(cleanedPageFields).length > 0) {
786
+ pageObj.fields = cleanedPageFields;
787
+ }
788
+
789
+ output[`page_${pageNum}`] = pageObj;
790
+ });
791
+ // Remove pages array - we now have page_1, page_2, etc. as separate fields
792
+ delete output.pages;
793
+ }
794
+
795
+ // Handle page_X structure (from backend) - remove Fields from page objects if they exist
796
+ if (output && typeof output === "object") {
797
+ const pageKeys = Object.keys(output).filter(k => k.startsWith("page_"));
798
+ for (const pageKey of pageKeys) {
799
+ const pageData = output[pageKey];
800
+ if (pageData && typeof pageData === "object") {
801
+ // Remove Fields from page objects (it's now at root level)
802
+ delete pageData.Fields;
803
+ delete pageData.metadata;
804
+ }
805
+ }
806
+ }
807
+
808
+ // Rebuild output with Fields at the top (only if it exists and is not empty)
809
+ const finalOutput = {};
810
+ if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) {
811
+ finalOutput.Fields = rootFields;
812
+ }
813
+
814
+ // Add all other keys
815
+ Object.keys(output).forEach(key => {
816
+ finalOutput[key] = output[key];
817
+ });
818
+
819
+ return finalOutput;
820
+ }
821
+
822
+ function escapeXML(str) {
823
+ return str
824
+ .replace(/&/g, "&amp;")
825
+ .replace(/</g, "&lt;")
826
+ .replace(/>/g, "&gt;")
827
+ .replace(/"/g, "&quot;")
828
+ .replace(/'/g, "&apos;");
829
+ }
830
+
831
+ function objectToXML(obj, rootName = "extraction") {
832
+ // Prepare fields - remove full_text if pages exist
833
+ const preparedObj = prepareFieldsForOutput(obj, "xml");
834
+
835
+ let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
836
+
837
+ const convert = (obj, indent = " ") => {
838
+ for (const [key, value] of Object.entries(obj)) {
839
+ if (value === null || value === undefined) continue;
840
+
841
+ // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
842
+ if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
843
+ continue;
844
+ }
845
+
846
+ if (Array.isArray(value)) {
847
+ value.forEach((item) => {
848
+ xml += `${indent}<${key}>\n`;
849
+ if (typeof item === "object") {
850
+ convert(item, indent + " ");
851
+ } else {
852
+ xml += `${indent} ${escapeXML(String(item))}\n`;
853
+ }
854
+ xml += `${indent}</${key}>\n`;
855
+ });
856
+ } else if (typeof value === "object") {
857
+ xml += `${indent}<${key}>\n`;
858
+ convert(value, indent + " ");
859
+ xml += `${indent}</${key}>\n`;
860
+ } else {
861
+ xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
862
+ }
863
+ }
864
+ };
865
+
866
+ convert(preparedObj);
867
+ xml += `</${rootName}>`;
868
+ return xml;
869
+ }
870
+
871
+ export default function ExportButtons({ isComplete, extractionResult }) {
872
+ const [downloading, setDownloading] = useState(null);
873
+ const [copied, setCopied] = useState(false);
874
+ const [isShareModalOpen, setIsShareModalOpen] = useState(false);
875
+ const [isShareLinkModalOpen, setIsShareLinkModalOpen] = useState(false);
876
+ const [shareLink, setShareLink] = useState("");
877
+ const [isGeneratingLink, setIsGeneratingLink] = useState(false);
878
+
879
+ // Helper function to extract text from fields (same as in ExtractionOutput)
880
+ const extractTextFromFields = (fields) => {
881
+ if (!fields || typeof fields !== "object") {
882
+ return "";
883
+ }
884
+
885
+ // Check for page_X structure first (preferred format)
886
+ const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_"));
887
+ if (pageKeys.length > 0) {
888
+ // Get text from first page (or combine all pages)
889
+ const pageTexts = pageKeys.map(key => {
890
+ const page = fields[key];
891
+ if (page && page.text) {
892
+ return page.text;
893
+ }
894
+ return "";
895
+ }).filter(text => text);
896
+
897
+ if (pageTexts.length > 0) {
898
+ return pageTexts.join("\n\n");
899
+ }
900
+ }
901
+
902
+ // Fallback to full_text
903
+ if (fields.full_text) {
904
+ return fields.full_text;
905
+ }
906
+
907
+ return "";
908
+ };
909
+
910
+ // Helper function to escape HTML
911
+ const escapeHtml = (text) => {
912
+ if (!text) return '';
913
+ const div = document.createElement('div');
914
+ div.textContent = text;
915
+ return div.innerHTML;
916
+ };
917
+
918
+ // Helper function to convert pipe-separated tables to HTML tables
919
+ const convertPipeTablesToHTML = (text) => {
920
+ if (!text) return text;
921
+
922
+ const lines = text.split('\n');
923
+ const result = [];
924
+ let i = 0;
925
+
926
+ while (i < lines.length) {
927
+ const line = lines[i];
928
+
929
+ // Check if this line looks like a table row (has multiple pipes)
930
+ if (line.includes('|') && line.split('|').length >= 3) {
931
+ // Check if it's a separator line (only |, -, :, spaces)
932
+ const isSeparator = /^[\s|\-:]+$/.test(line.trim());
933
+
934
+ if (!isSeparator) {
935
+ // Start of a table - collect all table rows
936
+ const tableRows = [];
937
+ let j = i;
938
+
939
+ // Collect header row
940
+ const headerLine = lines[j];
941
+ const headerCells = headerLine.split('|').map(cell => cell.trim()).filter(cell => cell || cell === '');
942
+ // Remove empty cells at start/end
943
+ if (headerCells.length > 0 && !headerCells[0]) headerCells.shift();
944
+ if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop();
945
+
946
+ if (headerCells.length >= 2) {
947
+ tableRows.push(headerCells);
948
+ j++;
949
+
950
+ // Skip separator line if present
951
+ if (j < lines.length && /^[\s|\-:]+$/.test(lines[j].trim())) {
952
+ j++;
953
+ }
954
+
955
+ // Collect data rows
956
+ while (j < lines.length) {
957
+ const rowLine = lines[j];
958
+ if (!rowLine.trim()) break; // Empty line ends table
959
+
960
+ // Check if it's still a table row
961
+ if (rowLine.includes('|') && rowLine.split('|').length >= 2) {
962
+ const isRowSeparator = /^[\s|\-:]+$/.test(rowLine.trim());
963
+ if (!isRowSeparator) {
964
+ const rowCells = rowLine.split('|').map(cell => cell.trim());
965
+ // Remove empty cells at start/end
966
+ if (rowCells.length > 0 && !rowCells[0]) rowCells.shift();
967
+ if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop();
968
+ tableRows.push(rowCells);
969
+ j++;
970
+ } else {
971
+ j++;
972
+ }
973
+ } else {
974
+ break; // Not a table row anymore
975
+ }
976
+ }
977
+
978
+ // Convert to HTML table
979
+ if (tableRows.length > 0) {
980
+ let htmlTable = '<table class="border-collapse border border-gray-300 w-full my-4">\n<thead>\n<tr>';
981
+
982
+ // Header row
983
+ tableRows[0].forEach(cell => {
984
+ htmlTable += `<th class="border border-gray-300 px-4 py-2 bg-gray-100 font-semibold text-left">${escapeHtml(cell)}</th>`;
985
+ });
986
+ htmlTable += '</tr>\n</thead>\n<tbody>\n';
987
+
988
+ // Data rows
989
+ for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) {
990
+ htmlTable += '<tr>';
991
+ tableRows[rowIdx].forEach((cell, colIdx) => {
992
+ // Use header cell count to ensure alignment
993
+ const cellContent = cell || '';
994
+ htmlTable += `<td class="border border-gray-300 px-4 py-2">${escapeHtml(cellContent)}</td>`;
995
+ });
996
+ htmlTable += '</tr>\n';
997
+ }
998
+
999
+ htmlTable += '</tbody>\n</table>';
1000
+ result.push(htmlTable);
1001
+ i = j;
1002
+ continue;
1003
+ }
1004
+ }
1005
+ }
1006
+ }
1007
+
1008
+ // Not a table row, add as-is
1009
+ result.push(line);
1010
+ i++;
1011
+ }
1012
+
1013
+ return result.join('\n');
1014
+ };
1015
+
1016
+ // Helper function to render markdown to HTML (same as in ExtractionOutput)
1017
+ const renderMarkdownToHTML = (text) => {
1018
+ if (!text) return "";
1019
+
1020
+ let html = text;
1021
+
1022
+ // FIRST: Convert pipe-separated tables to HTML tables
1023
+ html = convertPipeTablesToHTML(html);
1024
+
1025
+ // Convert LaTeX-style superscripts/subscripts FIRST
1026
+ html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '<sup>$1</sup>');
1027
+ html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '<sup>$1</sup>');
1028
+ html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '<sub>$1</sub>');
1029
+ html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '<sub>$1</sub>');
1030
+
1031
+ // Protect HTML table blocks
1032
+ const htmlBlocks = [];
1033
+ let htmlBlockIndex = 0;
1034
+
1035
+ html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => {
1036
+ const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`;
1037
+ htmlBlocks[htmlBlockIndex] = match;
1038
+ htmlBlockIndex++;
1039
+ return placeholder;
1040
+ });
1041
+
1042
+ // Convert markdown headers
1043
+ html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>');
1044
+ html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>');
1045
+ html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>');
1046
+
1047
+ // Convert markdown bold/italic
1048
+ html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
1049
+ html = html.replace(/\*(.*?)\*/g, '<em>$1</em>');
1050
+
1051
+ // Convert markdown links
1052
+ html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>');
1053
+
1054
+ // Process line breaks
1055
+ const parts = html.split(/(__HTML_BLOCK_\d+__)/);
1056
+ const processedParts = parts.map((part) => {
1057
+ if (part.match(/^__HTML_BLOCK_\d+__$/)) {
1058
+ const blockIndex = parseInt(part.match(/\d+/)[0]);
1059
+ return htmlBlocks[blockIndex];
1060
+ } else {
1061
+ let processed = part;
1062
+ processed = processed.replace(/\n\n+/g, '</p><p>');
1063
+ processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2');
1064
+ if (processed.trim() && !processed.trim().startsWith('<')) {
1065
+ processed = '<p>' + processed + '</p>';
1066
+ }
1067
+ return processed;
1068
+ }
1069
+ });
1070
+
1071
+ html = processedParts.join('');
1072
+ html = html.replace(/<p><\/p>/g, '');
1073
+ html = html.replace(/<p>\s*<br>\s*<\/p>/g, '');
1074
+ html = html.replace(/<p>\s*<\/p>/g, '');
1075
+
1076
+ return html;
1077
+ };
1078
+
1079
+ const handleDownload = async (format) => {
1080
+ if (!extractionResult || !extractionResult.fields) {
1081
+ console.error("No extraction data available");
1082
+ return;
1083
+ }
1084
+
1085
+ setDownloading(format);
1086
+
1087
+ try {
1088
+ const fields = extractionResult.fields;
1089
+ let content = "";
1090
+ let filename = "";
1091
+ let mimeType = "";
1092
+
1093
+ if (format === "json") {
1094
+ const preparedFields = prepareFieldsForOutput(fields, "json");
1095
+ content = JSON.stringify(preparedFields, null, 2);
1096
+ filename = `extraction_${new Date().toISOString().split('T')[0]}.json`;
1097
+ mimeType = "application/json";
1098
+ } else if (format === "xml") {
1099
+ content = objectToXML(fields);
1100
+ filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`;
1101
+ mimeType = "application/xml";
1102
+ } else if (format === "docx") {
1103
+ // For DOCX, create a Word-compatible HTML document that preserves layout
1104
+ // Extract text and convert to HTML (same as text viewer)
1105
+ const textContent = extractTextFromFields(fields);
1106
+ const htmlContent = renderMarkdownToHTML(textContent);
1107
+
1108
+ // Create a Word-compatible HTML document with proper MIME type
1109
+ // Word can open HTML files with .docx extension if we use the right MIME type
1110
+ const wordHTML = `<!DOCTYPE html>
1111
+ <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns="http://www.w3.org/TR/REC-html40">
1112
+ <head>
1113
+ <meta charset="UTF-8">
1114
+ <meta name="ProgId" content="Word.Document">
1115
+ <meta name="Generator" content="Microsoft Word">
1116
+ <meta name="Originator" content="Microsoft Word">
1117
+ <!--[if gte mso 9]><xml>
1118
+ <w:WordDocument>
1119
+ <w:View>Print</w:View>
1120
+ <w:Zoom>100</w:Zoom>
1121
+ <w:DoNotOptimizeForBrowser/>
1122
+ </w:WordDocument>
1123
+ </xml><![endif]-->
1124
+ <title>Document Extraction</title>
1125
+ <style>
1126
+ @page {
1127
+ size: 8.5in 11in;
1128
+ margin: 1in;
1129
+ }
1130
+ body {
1131
+ font-family: 'Calibri', 'Arial', sans-serif;
1132
+ font-size: 11pt;
1133
+ line-height: 1.6;
1134
+ margin: 0;
1135
+ color: #333;
1136
+ }
1137
+ h1 {
1138
+ font-size: 18pt;
1139
+ font-weight: bold;
1140
+ color: #0f172a;
1141
+ margin-top: 24pt;
1142
+ margin-bottom: 12pt;
1143
+ page-break-after: avoid;
1144
+ }
1145
+ h2 {
1146
+ font-size: 16pt;
1147
+ font-weight: 600;
1148
+ color: #0f172a;
1149
+ margin-top: 20pt;
1150
+ margin-bottom: 10pt;
1151
+ page-break-after: avoid;
1152
+ }
1153
+ h3 {
1154
+ font-size: 14pt;
1155
+ font-weight: 600;
1156
+ color: #1e293b;
1157
+ margin-top: 16pt;
1158
+ margin-bottom: 8pt;
1159
+ page-break-after: avoid;
1160
+ }
1161
+ p {
1162
+ margin-top: 6pt;
1163
+ margin-bottom: 6pt;
1164
+ }
1165
+ table {
1166
+ width: 100%;
1167
+ border-collapse: collapse;
1168
+ margin: 12pt 0;
1169
+ font-size: 10pt;
1170
+ page-break-inside: avoid;
1171
+ }
1172
+ table th {
1173
+ background-color: #f8fafc;
1174
+ border: 1pt solid #cbd5e1;
1175
+ padding: 6pt;
1176
+ text-align: left;
1177
+ font-weight: 600;
1178
+ color: #0f172a;
1179
+ }
1180
+ table td {
1181
+ border: 1pt solid #cbd5e1;
1182
+ padding: 6pt;
1183
+ color: #334155;
1184
+ }
1185
+ table tr:nth-child(even) {
1186
+ background-color: #f8fafc;
1187
+ }
1188
+ sup {
1189
+ font-size: 0.75em;
1190
+ vertical-align: super;
1191
+ line-height: 0;
1192
+ }
1193
+ sub {
1194
+ font-size: 0.75em;
1195
+ vertical-align: sub;
1196
+ line-height: 0;
1197
+ }
1198
+ strong {
1199
+ font-weight: 600;
1200
+ }
1201
+ em {
1202
+ font-style: italic;
1203
+ }
1204
+ a {
1205
+ color: #4f46e5;
1206
+ text-decoration: underline;
1207
+ }
1208
+ </style>
1209
+ </head>
1210
+ <body>
1211
+ ${htmlContent}
1212
+ </body>
1213
+ </html>`;
1214
+
1215
+ content = wordHTML;
1216
+ filename = `extraction_${new Date().toISOString().split('T')[0]}.doc`;
1217
+ mimeType = "application/msword";
1218
+ }
1219
+
1220
+ // Create blob and download
1221
+ const blob = new Blob([content], { type: mimeType });
1222
+ const url = URL.createObjectURL(blob);
1223
+ const link = document.createElement("a");
1224
+ link.href = url;
1225
+ link.download = filename;
1226
+ document.body.appendChild(link);
1227
+ link.click();
1228
+ document.body.removeChild(link);
1229
+ URL.revokeObjectURL(url);
1230
+
1231
+ setDownloading(null);
1232
+ } catch (error) {
1233
+ console.error("Download error:", error);
1234
+ setDownloading(null);
1235
+ }
1236
+ };
1237
+
1238
+ const handleCopyLink = async () => {
1239
+ if (!extractionResult?.id) return;
1240
+
1241
+ setIsGeneratingLink(true);
1242
+ setIsShareLinkModalOpen(true);
1243
+ setShareLink("");
1244
+
1245
+ try {
1246
+ const result = await createShareLink(extractionResult.id);
1247
+ if (result.success && result.share_link) {
1248
+ setShareLink(result.share_link);
1249
+ } else {
1250
+ throw new Error("Failed to generate share link");
1251
+ }
1252
+ } catch (err) {
1253
+ console.error("Failed to create share link:", err);
1254
+ setShareLink("");
1255
+ // Still show modal but with error state
1256
+ } finally {
1257
+ setIsGeneratingLink(false);
1258
+ }
1259
+ };
1260
+
1261
+ const handleShare = async (extractionId, recipientEmail) => {
1262
+ await shareExtraction(extractionId, recipientEmail);
1263
+ };
1264
+
1265
+ if (!isComplete) return null;
1266
+
1267
+ return (
1268
+ <motion.div
1269
+ initial={{ opacity: 0, y: 20 }}
1270
+ animate={{ opacity: 1, y: 0 }}
1271
+ className="flex items-center gap-3"
1272
+ >
1273
+ {/* Export Options Dropdown */}
1274
+ <DropdownMenu>
1275
+ <DropdownMenuTrigger asChild>
1276
+ <Button
1277
+ variant="ghost"
1278
+ className="h-11 w-11 rounded-xl hover:bg-slate-100"
1279
+ disabled={downloading !== null}
1280
+ >
1281
+ {downloading ? (
1282
+ <motion.div
1283
+ animate={{ rotate: 360 }}
1284
+ transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
1285
+ >
1286
+ <Download className="h-4 w-4" />
1287
+ </motion.div>
1288
+ ) : (
1289
+ <Share2 className="h-4 w-4" />
1290
+ )}
1291
+ </Button>
1292
+ </DropdownMenuTrigger>
1293
+ <DropdownMenuContent align="end" className="w-56 rounded-xl p-2">
1294
+ <DropdownMenuItem
1295
+ className="rounded-lg cursor-pointer"
1296
+ onClick={() => setIsShareModalOpen(true)}
1297
+ >
1298
+ <Mail className="h-4 w-4 mr-2 text-indigo-600" />
1299
+ Share output
1300
+ </DropdownMenuItem>
1301
+ <DropdownMenuItem
1302
+ className="rounded-lg cursor-pointer"
1303
+ onClick={handleCopyLink}
1304
+ >
1305
+ <Link2 className="h-4 w-4 mr-2 text-indigo-600" />
1306
+ Copy share link
1307
+ </DropdownMenuItem>
1308
+ <DropdownMenuSeparator />
1309
+ <DropdownMenuItem
1310
+ className="rounded-lg cursor-pointer"
1311
+ onClick={() => handleDownload("docx")}
1312
+ disabled={downloading === "docx"}
1313
+ >
1314
+ {downloading === "docx" ? (
1315
+ <motion.div
1316
+ animate={{ rotate: 360 }}
1317
+ transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
1318
+ className="h-4 w-4 mr-2"
1319
+ >
1320
+ <Download className="h-4 w-4" />
1321
+ </motion.div>
1322
+ ) : (
1323
+ <FileText className="h-4 w-4 mr-2 text-blue-600" />
1324
+ )}
1325
+ Download Docx
1326
+ </DropdownMenuItem>
1327
+ <DropdownMenuItem
1328
+ className="rounded-lg cursor-pointer"
1329
+ onClick={() => handleDownload("json")}
1330
+ disabled={downloading === "json"}
1331
+ >
1332
+ {downloading === "json" ? (
1333
+ <motion.div
1334
+ animate={{ rotate: 360 }}
1335
+ transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
1336
+ className="h-4 w-4 mr-2"
1337
+ >
1338
+ <Download className="h-4 w-4" />
1339
+ </motion.div>
1340
+ ) : (
1341
+ <Braces className="h-4 w-4 mr-2 text-indigo-600" />
1342
+ )}
1343
+ Download JSON
1344
+ </DropdownMenuItem>
1345
+ <DropdownMenuItem
1346
+ className="rounded-lg cursor-pointer"
1347
+ onClick={() => handleDownload("xml")}
1348
+ disabled={downloading === "xml"}
1349
+ >
1350
+ {downloading === "xml" ? (
1351
+ <motion.div
1352
+ animate={{ rotate: 360 }}
1353
+ transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
1354
+ className="h-4 w-4 mr-2"
1355
+ >
1356
+ <Download className="h-4 w-4" />
1357
+ </motion.div>
1358
+ ) : (
1359
+ <FileCode2 className="h-4 w-4 mr-2 text-slate-600" />
1360
+ )}
1361
+ Download XML
1362
+ </DropdownMenuItem>
1363
+ </DropdownMenuContent>
1364
+ </DropdownMenu>
1365
+
1366
+ {/* Share Modal */}
1367
+ <ShareModal
1368
+ isOpen={isShareModalOpen}
1369
+ onClose={() => setIsShareModalOpen(false)}
1370
+ onShare={handleShare}
1371
+ extractionId={extractionResult?.id}
1372
+ />
1373
+
1374
+ {/* Share Link Modal */}
1375
+ <ShareLinkModal
1376
+ isOpen={isShareLinkModalOpen}
1377
+ onClose={() => {
1378
+ setIsShareLinkModalOpen(false);
1379
+ setShareLink("");
1380
+ }}
1381
+ shareLink={shareLink}
1382
+ isLoading={isGeneratingLink}
1383
+ />
1384
+ </motion.div>
1385
+ );
1386
+ }
1387
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/components/ShareLinkModal.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import React, { useState, useEffect } from "react";
2
  import { motion, AnimatePresence } from "framer-motion";
3
  import { X, Copy, Check, Loader2 } from "lucide-react";
@@ -139,3 +140,146 @@ export default function ShareLinkModal({ isOpen, onClose, shareLink, isLoading }
139
  );
140
  }
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import React, { useState, useEffect } from "react";
3
  import { motion, AnimatePresence } from "framer-motion";
4
  import { X, Copy, Check, Loader2 } from "lucide-react";
 
140
  );
141
  }
142
 
143
+ =======
144
+ import React, { useState, useEffect } from "react";
145
+ import { motion, AnimatePresence } from "framer-motion";
146
+ import { X, Copy, Check, Loader2 } from "lucide-react";
147
+ import { Button } from "@/components/ui/button";
148
+ import { Input } from "@/components/ui/input";
149
+
150
+ export default function ShareLinkModal({ isOpen, onClose, shareLink, isLoading }) {
151
+ const [copied, setCopied] = useState(false);
152
+
153
+ useEffect(() => {
154
+ if (!isOpen) {
155
+ setCopied(false);
156
+ }
157
+ }, [isOpen]);
158
+
159
+ const handleCopy = async () => {
160
+ if (!shareLink) return;
161
+
162
+ try {
163
+ await navigator.clipboard.writeText(shareLink);
164
+ setCopied(true);
165
+ setTimeout(() => setCopied(false), 2000);
166
+ } catch (err) {
167
+ // Fallback for older browsers
168
+ const textArea = document.createElement("textarea");
169
+ textArea.value = shareLink;
170
+ textArea.style.position = "fixed";
171
+ textArea.style.opacity = "0";
172
+ document.body.appendChild(textArea);
173
+ textArea.select();
174
+ try {
175
+ document.execCommand("copy");
176
+ setCopied(true);
177
+ setTimeout(() => setCopied(false), 2000);
178
+ } catch (fallbackErr) {
179
+ console.error("Failed to copy:", fallbackErr);
180
+ }
181
+ document.body.removeChild(textArea);
182
+ }
183
+ };
184
+
185
+ if (!isOpen) return null;
186
+
187
+ return (
188
+ <AnimatePresence>
189
+ <div className="fixed inset-0 z-50 flex items-center justify-center">
190
+ {/* Backdrop */}
191
+ <motion.div
192
+ initial={{ opacity: 0 }}
193
+ animate={{ opacity: 1 }}
194
+ exit={{ opacity: 0 }}
195
+ className="absolute inset-0 bg-black/50 backdrop-blur-sm"
196
+ onClick={onClose}
197
+ />
198
+
199
+ {/* Modal */}
200
+ <motion.div
201
+ initial={{ opacity: 0, scale: 0.95, y: 20 }}
202
+ animate={{ opacity: 1, scale: 1, y: 0 }}
203
+ exit={{ opacity: 0, scale: 0.95, y: 20 }}
204
+ className="relative z-10 w-full max-w-md mx-4 bg-white rounded-2xl shadow-2xl overflow-hidden"
205
+ onClick={(e) => e.stopPropagation()}
206
+ >
207
+ {/* Header */}
208
+ <div className="px-6 py-4 border-b border-slate-200 flex items-center justify-between">
209
+ <h2 className="text-xl font-semibold text-slate-900">Copy Share Link</h2>
210
+ <button
211
+ onClick={onClose}
212
+ disabled={isLoading}
213
+ className="p-2 rounded-lg hover:bg-slate-100 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
214
+ >
215
+ <X className="h-5 w-5 text-slate-500" />
216
+ </button>
217
+ </div>
218
+
219
+ {/* Content */}
220
+ <div className="px-6 py-6">
221
+ {isLoading ? (
222
+ <div className="text-center py-8">
223
+ <Loader2 className="h-8 w-8 mx-auto mb-4 text-indigo-600 animate-spin" />
224
+ <p className="text-sm text-slate-600">Generating share link...</p>
225
+ </div>
226
+ ) : shareLink ? (
227
+ <div className="space-y-4">
228
+ <div>
229
+ <label className="block text-sm font-medium text-slate-700 mb-2">
230
+ Share Link
231
+ </label>
232
+ <div className="flex gap-2">
233
+ <Input
234
+ type="text"
235
+ value={shareLink}
236
+ readOnly
237
+ className="flex-1 h-12 rounded-xl border-slate-200 bg-slate-50 text-sm font-mono"
238
+ />
239
+ <Button
240
+ onClick={handleCopy}
241
+ className="h-12 px-4 rounded-xl bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700"
242
+ >
243
+ {copied ? (
244
+ <>
245
+ <Check className="h-4 w-4 mr-2" />
246
+ Copied!
247
+ </>
248
+ ) : (
249
+ <>
250
+ <Copy className="h-4 w-4 mr-2" />
251
+ Copy
252
+ </>
253
+ )}
254
+ </Button>
255
+ </div>
256
+ </div>
257
+ <p className="text-xs text-slate-500">
258
+ Share this link with anyone you want to give access to this extraction. They'll need to sign in to view it.
259
+ </p>
260
+ </div>
261
+ ) : (
262
+ <div className="text-center py-8">
263
+ <p className="text-sm text-slate-600">No share link available</p>
264
+ </div>
265
+ )}
266
+
267
+ <div className="pt-4 mt-6 border-t border-slate-200">
268
+ <Button
269
+ type="button"
270
+ variant="outline"
271
+ onClick={onClose}
272
+ disabled={isLoading}
273
+ className="w-full h-11 rounded-xl"
274
+ >
275
+ Close
276
+ </Button>
277
+ </div>
278
+ </div>
279
+ </motion.div>
280
+ </div>
281
+ </AnimatePresence>
282
+ );
283
+ }
284
+
285
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/components/ShareModal.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import React, { useState } from "react";
2
  import { motion, AnimatePresence } from "framer-motion";
3
  import { X, Mail, Send, Loader2 } from "lucide-react";
@@ -195,3 +196,202 @@ export default function ShareModal({ isOpen, onClose, onShare, extractionId }) {
195
  );
196
  }
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import React, { useState } from "react";
3
  import { motion, AnimatePresence } from "framer-motion";
4
  import { X, Mail, Send, Loader2 } from "lucide-react";
 
196
  );
197
  }
198
 
199
+ =======
200
+ import React, { useState } from "react";
201
+ import { motion, AnimatePresence } from "framer-motion";
202
+ import { X, Mail, Send, Loader2 } from "lucide-react";
203
+ import { Button } from "@/components/ui/button";
204
+ import { Input } from "@/components/ui/input";
205
+
206
+ export default function ShareModal({ isOpen, onClose, onShare, extractionId }) {
207
+ const [email, setEmail] = useState("");
208
+ const [isLoading, setIsLoading] = useState(false);
209
+ const [error, setError] = useState("");
210
+ const [success, setSuccess] = useState(false);
211
+ const [successMessage, setSuccessMessage] = useState("");
212
+
213
+ const handleSubmit = async (e) => {
214
+ e.preventDefault();
215
+ setError("");
216
+ setSuccess(false);
217
+
218
+ // Parse and validate multiple emails (comma or semicolon separated)
219
+ if (!email.trim()) {
220
+ setError("Please enter at least one recipient email address");
221
+ return;
222
+ }
223
+
224
+ // Split by comma or semicolon, trim each email, and filter out empty strings
225
+ const emailList = email
226
+ .split(/[,;]/)
227
+ .map((e) => e.trim())
228
+ .filter((e) => e.length > 0);
229
+
230
+ if (emailList.length === 0) {
231
+ setError("Please enter at least one recipient email address");
232
+ return;
233
+ }
234
+
235
+ // Validate each email
236
+ const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
237
+ const invalidEmails = emailList.filter((e) => !emailRegex.test(e));
238
+
239
+ if (invalidEmails.length > 0) {
240
+ setError(`Invalid email address(es): ${invalidEmails.join(", ")}`);
241
+ return;
242
+ }
243
+
244
+ setIsLoading(true);
245
+ try {
246
+ const result = await onShare(extractionId, emailList);
247
+ setSuccessMessage(result?.message || `Successfully shared with ${emailList.length} recipient(s)`);
248
+ setSuccess(true);
249
+ setEmail("");
250
+ // Close modal after 2 seconds
251
+ setTimeout(() => {
252
+ setSuccess(false);
253
+ setSuccessMessage("");
254
+ onClose();
255
+ }, 2000);
256
+ } catch (err) {
257
+ setError(err.message || "Failed to share extraction. Please try again.");
258
+ } finally {
259
+ setIsLoading(false);
260
+ }
261
+ };
262
+
263
+ const handleClose = () => {
264
+ if (!isLoading) {
265
+ setEmail("");
266
+ setError("");
267
+ setSuccess(false);
268
+ onClose();
269
+ }
270
+ };
271
+
272
+ if (!isOpen) return null;
273
+
274
+ return (
275
+ <AnimatePresence>
276
+ <div className="fixed inset-0 z-50 flex items-center justify-center">
277
+ {/* Backdrop */}
278
+ <motion.div
279
+ initial={{ opacity: 0 }}
280
+ animate={{ opacity: 1 }}
281
+ exit={{ opacity: 0 }}
282
+ className="absolute inset-0 bg-black/50 backdrop-blur-sm"
283
+ onClick={handleClose}
284
+ />
285
+
286
+ {/* Modal */}
287
+ <motion.div
288
+ initial={{ opacity: 0, scale: 0.95, y: 20 }}
289
+ animate={{ opacity: 1, scale: 1, y: 0 }}
290
+ exit={{ opacity: 0, scale: 0.95, y: 20 }}
291
+ className="relative z-10 w-full max-w-md mx-4 bg-white rounded-2xl shadow-2xl overflow-hidden"
292
+ onClick={(e) => e.stopPropagation()}
293
+ >
294
+ {/* Header */}
295
+ <div className="px-6 py-4 border-b border-slate-200 flex items-center justify-between">
296
+ <h2 className="text-xl font-semibold text-slate-900">Share Output</h2>
297
+ <button
298
+ onClick={handleClose}
299
+ disabled={isLoading}
300
+ className="p-2 rounded-lg hover:bg-slate-100 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
301
+ >
302
+ <X className="h-5 w-5 text-slate-500" />
303
+ </button>
304
+ </div>
305
+
306
+ {/* Content */}
307
+ <div className="px-6 py-6">
308
+ {success ? (
309
+ <motion.div
310
+ initial={{ opacity: 0, scale: 0.9 }}
311
+ animate={{ opacity: 1, scale: 1 }}
312
+ className="text-center py-8"
313
+ >
314
+ <div className="w-16 h-16 mx-auto mb-4 rounded-full bg-emerald-100 flex items-center justify-center">
315
+ <Send className="h-8 w-8 text-emerald-600" />
316
+ </div>
317
+ <h3 className="text-lg font-semibold text-slate-900 mb-2">
318
+ Share Sent Successfully!
319
+ </h3>
320
+ <p className="text-sm text-slate-600">
321
+ {successMessage || "The recipient(s) will receive an email with a link to view the extraction."}
322
+ </p>
323
+ </motion.div>
324
+ ) : (
325
+ <form onSubmit={handleSubmit} className="space-y-4">
326
+ <div>
327
+ <label
328
+ htmlFor="recipient-email"
329
+ className="block text-sm font-medium text-slate-700 mb-2"
330
+ >
331
+ Recipient Email(s)
332
+ </label>
333
+ <p className="text-xs text-slate-500 mb-2">
334
+ Separate multiple emails with commas or semicolons
335
+ </p>
336
+ <div className="relative">
337
+ <Mail className="absolute left-3 top-1/2 -translate-y-1/2 h-5 w-5 text-slate-400" />
338
+ <Input
339
+ id="recipient-email"
340
+ type="text"
341
+ value={email}
342
+ onChange={(e) => setEmail(e.target.value)}
343
+ placeholder="Enter email addresses (comma or semicolon separated)"
344
+ className="pl-10 h-12 rounded-xl border-slate-200 focus:border-indigo-500 focus:ring-indigo-500"
345
+ disabled={isLoading}
346
+ autoFocus
347
+ />
348
+ </div>
349
+ {error && (
350
+ <motion.p
351
+ initial={{ opacity: 0, y: -10 }}
352
+ animate={{ opacity: 1, y: 0 }}
353
+ className="mt-2 text-sm text-red-600"
354
+ >
355
+ {error}
356
+ </motion.p>
357
+ )}
358
+ </div>
359
+
360
+ <div className="pt-4 flex gap-3">
361
+ <Button
362
+ type="button"
363
+ variant="outline"
364
+ onClick={handleClose}
365
+ disabled={isLoading}
366
+ className="flex-1 h-11 rounded-xl"
367
+ >
368
+ Cancel
369
+ </Button>
370
+ <Button
371
+ type="submit"
372
+ disabled={isLoading || !email.trim()}
373
+ className="flex-1 h-11 rounded-xl bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700"
374
+ >
375
+ {isLoading ? (
376
+ <>
377
+ <Loader2 className="h-4 w-4 mr-2 animate-spin" />
378
+ Sending...
379
+ </>
380
+ ) : (
381
+ <>
382
+ <Send className="h-4 w-4 mr-2" />
383
+ Send
384
+ </>
385
+ )}
386
+ </Button>
387
+ </div>
388
+ </form>
389
+ )}
390
+ </div>
391
+ </motion.div>
392
+ </div>
393
+ </AnimatePresence>
394
+ );
395
+ }
396
+
397
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/components/auth/LoginForm.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import React, { useState } from "react";
2
  import { motion } from "framer-motion";
3
  import { Button } from "@/components/ui/button";
@@ -510,3 +511,517 @@ export default function LoginForm() {
510
  </div>
511
  );
512
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import React, { useState } from "react";
3
  import { motion } from "framer-motion";
4
  import { Button } from "@/components/ui/button";
 
511
  </div>
512
  );
513
  }
514
+ =======
515
+ import React, { useState } from "react";
516
+ import { motion } from "framer-motion";
517
+ import { Button } from "@/components/ui/button";
518
+ import { Input } from "@/components/ui/input";
519
+ import { Separator } from "@/components/ui/separator";
520
+ import {
521
+ Zap,
522
+ Target,
523
+ Upload,
524
+ CheckCircle2,
525
+ ArrowRight,
526
+ Mail,
527
+ Sparkles,
528
+ Shield,
529
+ Globe,
530
+ AlertCircle,
531
+ Loader2,
532
+ } from "lucide-react";
533
+ import { useAuth } from "@/contexts/AuthContext";
534
+
535
+ export default function LoginForm() {
536
+ const { firebaseLogin, requestOTP, verifyOTP } = useAuth();
537
+ const [email, setEmail] = useState("");
538
+ const [showOtp, setShowOtp] = useState(false);
539
+ const [otp, setOtp] = useState(["", "", "", "", "", ""]);
540
+ const [loading, setLoading] = useState(false);
541
+ const [error, setError] = useState("");
542
+
543
+ // Business email validation
544
+ const PERSONAL_EMAIL_DOMAINS = [
545
+ "gmail.com",
546
+ "yahoo.com",
547
+ "hotmail.com",
548
+ "outlook.com",
549
+ "aol.com",
550
+ "icloud.com",
551
+ "mail.com",
552
+ "protonmail.com",
553
+ "yandex.com",
554
+ "zoho.com",
555
+ "gmx.com",
556
+ "live.com",
557
+ "msn.com",
558
+ ];
559
+
560
+ const isBusinessEmail = (email) => {
561
+ if (!email || !email.includes("@")) return false;
562
+ const domain = email.split("@")[1].toLowerCase();
563
+ return !PERSONAL_EMAIL_DOMAINS.includes(domain);
564
+ };
565
+
566
+ const handleGoogleLogin = async () => {
567
+ setLoading(true);
568
+ setError("");
569
+ try {
570
+ await firebaseLogin();
571
+ } catch (err) {
572
+ setError(err.message || "Failed to sign in with Google");
573
+ } finally {
574
+ setLoading(false);
575
+ }
576
+ };
577
+
578
+ const handleEmailSubmit = async (e) => {
579
+ e.preventDefault();
580
+ setLoading(true);
581
+ setError("");
582
+
583
+ if (!email) {
584
+ setError("Please enter your email address");
585
+ setLoading(false);
586
+ return;
587
+ }
588
+
589
+ if (!isBusinessEmail(email)) {
590
+ setError("Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, etc.) are not permitted.");
591
+ setLoading(false);
592
+ return;
593
+ }
594
+
595
+ try {
596
+ await requestOTP(email);
597
+ setShowOtp(true);
598
+ } catch (err) {
599
+ setError(err.message || "Failed to send OTP");
600
+ } finally {
601
+ setLoading(false);
602
+ }
603
+ };
604
+
605
+ const handleOtpChange = (index, value) => {
606
+ if (value.length <= 1 && /^\d*$/.test(value)) {
607
+ const newOtp = [...otp];
608
+ newOtp[index] = value;
609
+ setOtp(newOtp);
610
+ setError("");
611
+
612
+ // Auto-focus next input
613
+ if (value && index < 5) {
614
+ const nextInput = document.getElementById(`otp-${index + 1}`);
615
+ nextInput?.focus();
616
+ }
617
+ }
618
+ };
619
+
620
+ const handleOtpPaste = (e, startIndex = 0) => {
621
+ e.preventDefault();
622
+ const pastedData = e.clipboardData.getData("text");
623
+ // Extract only digits from pasted content
624
+ const digits = pastedData.replace(/\D/g, "").slice(0, 6);
625
+
626
+ if (digits.length > 0) {
627
+ const newOtp = [...otp];
628
+ // Fill the OTP array with pasted digits starting from the current field
629
+ for (let i = 0; i < digits.length && (startIndex + i) < 6; i++) {
630
+ newOtp[startIndex + i] = digits[i];
631
+ }
632
+ setOtp(newOtp);
633
+ setError("");
634
+
635
+ // Focus on the next empty input or the last input if all are filled
636
+ const nextEmptyIndex = Math.min(startIndex + digits.length, 5);
637
+ const nextInput = document.getElementById(`otp-${nextEmptyIndex}`);
638
+ nextInput?.focus();
639
+ }
640
+ };
641
+
642
+ const handleOtpKeyDown = (index, e) => {
643
+ if (e.key === "Backspace" && !otp[index] && index > 0) {
644
+ const prevInput = document.getElementById(`otp-${index - 1}`);
645
+ prevInput?.focus();
646
+ }
647
+ };
648
+
649
+ const handleOtpVerify = async (e) => {
650
+ e.preventDefault();
651
+ setLoading(true);
652
+ setError("");
653
+
654
+ const otpString = otp.join("");
655
+ if (otpString.length !== 6) {
656
+ setError("Please enter a valid 6-digit OTP");
657
+ setLoading(false);
658
+ return;
659
+ }
660
+
661
+ try {
662
+ await verifyOTP(email, otpString);
663
+ // Success - user will be redirected by AuthContext
664
+ } catch (err) {
665
+ setError(err.message || "Invalid OTP. Please try again.");
666
+ setOtp(["", "", "", "", "", ""]);
667
+ } finally {
668
+ setLoading(false);
669
+ }
670
+ };
671
+
672
+ const features = [
673
+ {
674
+ icon: Zap,
675
+ title: "Lightning Fast",
676
+ description: "Process documents in seconds and get outputs for ERP ingestion",
677
+ color: "text-amber-500",
678
+ bg: "bg-amber-50",
679
+ },
680
+ {
681
+ icon: Target,
682
+ title: "100% Accuracy",
683
+ description: "Industry-leading extraction with Visual Reasoning Processor",
684
+ color: "text-emerald-500",
685
+ bg: "bg-emerald-50",
686
+ },
687
+ {
688
+ icon: Globe,
689
+ title: "Any Format, Any Language",
690
+ description: "PDF, images, scanned docs — multi-lingual support included",
691
+ color: "text-blue-500",
692
+ bg: "bg-blue-50",
693
+ },
694
+ ];
695
+
696
+ const supportedFormats = [
697
+ { ext: "PDF", color: "bg-red-500" },
698
+ { ext: "PNG", color: "bg-blue-500" },
699
+ { ext: "JPG", color: "bg-green-500" },
700
+ { ext: "TIFF", color: "bg-purple-500" },
701
+ ];
702
+
703
+ return (
704
+ <div className="min-h-screen bg-gradient-to-br from-slate-50 via-white to-blue-50 flex">
705
+ {/* Left Side - Product Showcase */}
706
+ <div className="hidden lg:flex lg:w-[56%] flex-col justify-between p-8 relative overflow-hidden">
707
+ {/* Background Elements */}
708
+ <div className="absolute top-0 right-0 w-96 h-96 bg-blue-100/40 rounded-full blur-3xl -translate-y-1/2 translate-x-1/2" />
709
+ <div className="absolute bottom-0 left-0 w-80 h-80 bg-emerald-100/40 rounded-full blur-3xl translate-y-1/2 -translate-x-1/2" />
710
+
711
+ {/* Logo & Brand */}
712
+ <motion.div
713
+ initial={{ opacity: 0, y: -20 }}
714
+ animate={{ opacity: 1, y: 0 }}
715
+ className="relative z-10 mb-6"
716
+ >
717
+ <div className="flex items-center gap-3">
718
+ <div className="h-12 w-12 flex items-center justify-center flex-shrink-0">
719
+ <img
720
+ src="/logo.png"
721
+ alt="EZOFIS AI Logo"
722
+ className="h-full w-full object-contain"
723
+ onError={(e) => {
724
+ // Fallback: hide image if logo not found
725
+ e.target.style.display = 'none';
726
+ }}
727
+ />
728
+ </div>
729
+ <div>
730
+ <h1 className="text-2xl font-bold text-slate-900 tracking-tight">EZOFISOCR</h1>
731
+ <p className="text-sm text-slate-500 font-medium">VRP Intelligence</p>
732
+ </div>
733
+ </div>
734
+ </motion.div>
735
+
736
+ {/* Main Content */}
737
+ <motion.div
738
+ initial={{ opacity: 0, y: 20 }}
739
+ animate={{ opacity: 1, y: 0 }}
740
+ transition={{ delay: 0.1 }}
741
+ className="relative z-10 space-y-5 flex-1 flex flex-col justify-center ml-24 xl:ml-36"
742
+ >
743
+ <div className="space-y-3">
744
+ <h2 className="text-3xl xl:text-4xl font-bold text-slate-900 leading-tight">
745
+ Pure Agentic
746
+ <span className="block text-transparent bg-clip-text bg-gradient-to-r from-blue-600 to-indigo-600">
747
+ Document Intelligence
748
+ </span>
749
+ </h2>
750
+ <p className="text-base text-slate-600 max-w-lg leading-relaxed">
751
+ Deterministic, layout-aware extraction (without LLM) using our proprietary{" "}
752
+ <span className="font-semibold text-slate-800">Visual Reasoning Processor (VRP)</span>
753
+ </p>
754
+ </div>
755
+
756
+ {/* Product Preview Card */}
757
+ <motion.div
758
+ initial={{ opacity: 0, scale: 0.95 }}
759
+ animate={{ opacity: 1, scale: 1 }}
760
+ transition={{ delay: 0.3 }}
761
+ className="bg-white rounded-2xl border border-slate-200/80 shadow-xl shadow-slate-200/50 p-4 max-w-lg"
762
+ >
763
+ <div className="border-2 border-dashed border-slate-200 rounded-xl p-5 text-center bg-slate-50/50">
764
+ <div className="w-12 h-12 rounded-full bg-slate-100 flex items-center justify-center mx-auto mb-3">
765
+ <Upload className="w-5 h-5 text-slate-400" />
766
+ </div>
767
+ <p className="text-slate-700 font-medium mb-1 text-sm">Drop a document to extract data</p>
768
+ <p className="text-xs text-slate-400">Invoices, purchase orders, delivery notes, receipts, and operational documents</p>
769
+
770
+ <div className="flex items-center justify-center gap-2 mt-3">
771
+ {supportedFormats.map((format, i) => (
772
+ <span key={i} className={`${format.color} text-white text-xs font-bold px-2 py-1 rounded`}>
773
+ {format.ext}
774
+ </span>
775
+ ))}
776
+ </div>
777
+ </div>
778
+
779
+ <div className="flex items-center justify-between mt-3 pt-3 border-t border-slate-100">
780
+ <div className="flex items-center gap-2">
781
+ <div className="w-2 h-2 rounded-full bg-emerald-500 animate-pulse" />
782
+ <span className="text-xs text-slate-600">Ready to extract</span>
783
+ </div>
784
+ <div className="flex items-center gap-1 text-emerald-600">
785
+ <CheckCircle2 className="w-3.5 h-3.5" />
786
+ <span className="text-xs font-semibold">99.8% Accuracy</span>
787
+ </div>
788
+ </div>
789
+ </motion.div>
790
+
791
+ {/* Features */}
792
+ <div className="grid gap-3">
793
+ {features.map((feature, index) => (
794
+ <motion.div
795
+ key={feature.title}
796
+ initial={{ opacity: 0, x: -20 }}
797
+ animate={{ opacity: 1, x: 0 }}
798
+ transition={{ delay: 0.4 + index * 0.1 }}
799
+ className="flex items-start gap-3 group"
800
+ >
801
+ <div
802
+ className={`w-9 h-9 rounded-xl ${feature.bg} flex items-center justify-center flex-shrink-0 group-hover:scale-110 transition-transform`}
803
+ >
804
+ <feature.icon className={`w-4 h-4 ${feature.color}`} />
805
+ </div>
806
+ <div>
807
+ <h3 className="font-semibold text-slate-900 text-sm">{feature.title}</h3>
808
+ <p className="text-xs text-slate-500">{feature.description}</p>
809
+ </div>
810
+ </motion.div>
811
+ ))}
812
+ </div>
813
+ </motion.div>
814
+
815
+ {/* Trust Badge */}
816
+ <motion.div
817
+ initial={{ opacity: 0 }}
818
+ animate={{ opacity: 1 }}
819
+ transition={{ delay: 0.6 }}
820
+ className="relative z-10 flex items-center gap-3 text-xs text-slate-500 mt-6"
821
+ >
822
+ <Shield className="w-4 h-4" />
823
+ <span>Enterprise-grade security • SOC 2 Compliant • GDPR Ready</span>
824
+ </motion.div>
825
+ </div>
826
+
827
+ {/* Right Side - Sign In Form */}
828
+ <div className="w-full lg:w-[44%] flex items-center justify-center p-6 sm:p-10">
829
+ <motion.div
830
+ initial={{ opacity: 0, y: 20 }}
831
+ animate={{ opacity: 1, y: 0 }}
832
+ transition={{ delay: 0.2 }}
833
+ className="w-full max-w-md"
834
+ >
835
+ {/* Mobile Logo */}
836
+ <div className="lg:hidden flex items-center justify-center gap-3 mb-8">
837
+ <div className="h-12 w-12 flex items-center justify-center flex-shrink-0">
838
+ <img
839
+ src="/logo.png"
840
+ alt="EZOFIS AI Logo"
841
+ className="h-full w-full object-contain"
842
+ onError={(e) => {
843
+ // Fallback: hide image if logo not found
844
+ e.target.style.display = 'none';
845
+ }}
846
+ />
847
+ </div>
848
+ <div>
849
+ <h1 className="text-2xl font-bold text-slate-900 tracking-tight">EZOFISOCR</h1>
850
+ <p className="text-sm text-slate-500 font-medium">VRP Intelligence</p>
851
+ </div>
852
+ </div>
853
+
854
+ <div className="bg-white rounded-3xl border border-slate-200/80 shadow-2xl shadow-slate-200/50 p-8 sm:p-10">
855
+ <div className="text-center mb-8">
856
+ <h2 className="text-2xl font-bold text-slate-900 mb-2">
857
+ {showOtp ? "Enter verification code" : "Secure Access"}
858
+ </h2>
859
+ <p className="text-slate-500">
860
+ {showOtp ? `We sent a code to ${email}` : "Access your document intelligence workspace"}
861
+ </p>
862
+ </div>
863
+
864
+ {/* Error Message */}
865
+ {error && (
866
+ <motion.div
867
+ initial={{ opacity: 0, y: -10 }}
868
+ animate={{ opacity: 1, y: 0 }}
869
+ className="mb-6 p-3 bg-red-50 border border-red-200 rounded-xl flex items-start gap-2 text-sm text-red-700"
870
+ >
871
+ <AlertCircle className="h-4 w-4 flex-shrink-0 mt-0.5" />
872
+ <p>{error}</p>
873
+ </motion.div>
874
+ )}
875
+
876
+ {!showOtp ? (
877
+ <>
878
+ {/* Google Sign In */}
879
+ <Button
880
+ onClick={handleGoogleLogin}
881
+ disabled={loading}
882
+ variant="outline"
883
+ className="w-full h-12 text-base font-medium border-slate-200 hover:bg-slate-50 hover:border-slate-300 transition-all group"
884
+ >
885
+ {loading ? (
886
+ <Loader2 className="w-5 h-5 mr-3 animate-spin" />
887
+ ) : (
888
+ <svg className="w-5 h-5 mr-3" viewBox="0 0 24 24">
889
+ <path fill="#4285F4" d="M22.56 12.25c0-.78-.07-1.53-.2-2.25H12v4.26h5.92c-.26 1.37-1.04 2.53-2.21 3.31v2.77h3.57c2.08-1.92 3.28-4.74 3.28-8.09z" />
890
+ <path fill="#34A853" d="M12 23c2.97 0 5.46-.98 7.28-2.66l-3.57-2.77c-.98.66-2.23 1.06-3.71 1.06-2.86 0-5.29-1.93-6.16-4.53H2.18v2.84C3.99 20.53 7.7 23 12 23z" />
891
+ <path fill="#FBBC05" d="M5.84 14.09c-.22-.66-.35-1.36-.35-2.09s.13-1.43.35-2.09V7.07H2.18C1.43 8.55 1 10.22 1 12s.43 3.45 1.18 4.93l2.85-2.22.81-.62z" />
892
+ <path fill="#EA4335" d="M12 5.38c1.62 0 3.06.56 4.21 1.64l3.15-3.15C17.45 2.09 14.97 1 12 1 7.7 1 3.99 3.47 2.18 7.07l3.66 2.84c.87-2.6 3.3-4.53 6.16-4.53z" />
893
+ </svg>
894
+ )}
895
+ Continue with Google
896
+ <ArrowRight className="w-4 h-4 ml-auto opacity-0 -translate-x-2 group-hover:opacity-100 group-hover:translate-x-0 transition-all" />
897
+ </Button>
898
+
899
+ <div className="relative my-8">
900
+ <Separator />
901
+ <span className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2 bg-white px-4 text-sm text-slate-400">
902
+ or continue with email
903
+ </span>
904
+ </div>
905
+
906
+ {/* Email Input */}
907
+ <form onSubmit={handleEmailSubmit} className="space-y-4">
908
+ <div className="relative">
909
+ <Mail className="absolute left-4 top-1/2 -translate-y-1/2 w-5 h-5 text-slate-400" />
910
+ <Input
911
+ type="email"
912
+ placeholder="name@company.com"
913
+ value={email}
914
+ onChange={(e) => {
915
+ setEmail(e.target.value);
916
+ setError("");
917
+ }}
918
+ className="h-12 pl-12 text-base border-slate-200 focus:border-blue-500 focus:ring-blue-500"
919
+ />
920
+ </div>
921
+ <Button
922
+ type="submit"
923
+ disabled={loading}
924
+ className="w-full h-12 text-base font-medium bg-gradient-to-r from-blue-600 to-indigo-600 hover:from-blue-700 hover:to-indigo-700 shadow-lg shadow-blue-500/25 transition-all"
925
+ >
926
+ {loading ? (
927
+ <>
928
+ <Loader2 className="w-4 h-4 mr-2 animate-spin" />
929
+ Sending...
930
+ </>
931
+ ) : (
932
+ <>
933
+ Continue with Email
934
+ <ArrowRight className="w-4 h-4 ml-2" />
935
+ </>
936
+ )}
937
+ </Button>
938
+ </form>
939
+ </>
940
+ ) : (
941
+ /* OTP Input */
942
+ <form onSubmit={handleOtpVerify} className="space-y-6">
943
+ <div className="flex justify-center gap-2">
944
+ {otp.map((digit, index) => (
945
+ <Input
946
+ key={index}
947
+ id={`otp-${index}`}
948
+ type="text"
949
+ inputMode="numeric"
950
+ maxLength={1}
951
+ value={digit}
952
+ onChange={(e) => handleOtpChange(index, e.target.value)}
953
+ onKeyDown={(e) => handleOtpKeyDown(index, e)}
954
+ onPaste={(e) => handleOtpPaste(e, index)}
955
+ className="w-12 h-14 text-center text-xl font-semibold border-slate-200 focus:border-blue-500 focus:ring-blue-500"
956
+ />
957
+ ))}
958
+ </div>
959
+
960
+ <Button
961
+ type="submit"
962
+ disabled={loading || otp.join("").length !== 6}
963
+ className="w-full h-12 text-base font-medium bg-gradient-to-r from-blue-600 to-indigo-600 hover:from-blue-700 hover:to-indigo-700 shadow-lg shadow-blue-500/25"
964
+ >
965
+ {loading ? (
966
+ <>
967
+ <Loader2 className="w-4 h-4 mr-2 animate-spin" />
968
+ Verifying...
969
+ </>
970
+ ) : (
971
+ <>
972
+ Verify & Sign In
973
+ <ArrowRight className="w-4 h-4 ml-2" />
974
+ </>
975
+ )}
976
+ </Button>
977
+
978
+ <button
979
+ type="button"
980
+ onClick={() => {
981
+ setShowOtp(false);
982
+ setOtp(["", "", "", "", "", ""]);
983
+ setError("");
984
+ }}
985
+ className="w-full text-sm text-slate-500 hover:text-slate-700 transition-colors"
986
+ >
987
+ ← Back to sign in options
988
+ </button>
989
+ </form>
990
+ )}
991
+
992
+ {/* Notice */}
993
+ <div className="mt-8 pt-6 border-t border-slate-100">
994
+ <div className="flex items-start gap-2 text-xs text-slate-400 mb-4">
995
+ <Shield className="w-4 h-4 flex-shrink-0 mt-0.5" />
996
+ <span>Only business email addresses are allowed</span>
997
+ </div>
998
+ <p className="text-xs text-slate-400 text-center leading-relaxed">
999
+ By signing in, you agree to our{" "}
1000
+ <a href="#" className="text-blue-600 hover:underline">
1001
+ Terms of Service
1002
+ </a>{" "}
1003
+ and{" "}
1004
+ <a href="#" className="text-blue-600 hover:underline">
1005
+ Privacy Policy
1006
+ </a>
1007
+ </p>
1008
+ </div>
1009
+ </div>
1010
+
1011
+ {/* Mobile Features */}
1012
+ <div className="lg:hidden mt-8 space-y-4">
1013
+ {features.map((feature) => (
1014
+ <div key={feature.title} className="flex items-center gap-3 text-sm">
1015
+ <div className={`w-8 h-8 rounded-lg ${feature.bg} flex items-center justify-center`}>
1016
+ <feature.icon className={`w-4 h-4 ${feature.color}`} />
1017
+ </div>
1018
+ <span className="text-slate-600">{feature.title}</span>
1019
+ </div>
1020
+ ))}
1021
+ </div>
1022
+ </motion.div>
1023
+ </div>
1024
+ </div>
1025
+ );
1026
+ }
1027
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/components/ocr/DocumentPreview.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import React, { useState, useEffect, useRef } from "react";
2
  import { motion } from "framer-motion";
3
  import { FileText, ZoomIn, ZoomOut, RotateCw } from "lucide-react";
@@ -227,3 +228,234 @@ export default function DocumentPreview({ file, isProcessing, isFromHistory = fa
227
  </div>
228
  );
229
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import React, { useState, useEffect, useRef } from "react";
3
  import { motion } from "framer-motion";
4
  import { FileText, ZoomIn, ZoomOut, RotateCw } from "lucide-react";
 
228
  </div>
229
  );
230
  }
231
+ =======
232
+ import React, { useState, useEffect, useRef } from "react";
233
+ import { motion } from "framer-motion";
234
+ import { FileText, ZoomIn, ZoomOut, RotateCw } from "lucide-react";
235
+ import { Button } from "@/components/ui/button";
236
+
237
+ export default function DocumentPreview({ file, isProcessing, isFromHistory = false }) {
238
+ const [previewUrls, setPreviewUrls] = useState([]);
239
+ const [zoom, setZoom] = useState(100);
240
+ const [rotation, setRotation] = useState(0);
241
+ const objectUrlsRef = useRef([]);
242
+
243
+ useEffect(() => {
244
+ if (!file) {
245
+ // Cleanup previous URLs
246
+ objectUrlsRef.current.forEach((url) => {
247
+ if (url && url.startsWith("blob:")) {
248
+ URL.revokeObjectURL(url);
249
+ }
250
+ });
251
+ objectUrlsRef.current = [];
252
+ setPreviewUrls([]);
253
+ return;
254
+ }
255
+
256
+ const loadPreview = async () => {
257
+ const urls = [];
258
+ const newObjectUrls = [];
259
+
260
+ // Check if it's a PDF
261
+ if (file.type === "application/pdf" || file.name?.toLowerCase().endsWith(".pdf")) {
262
+ try {
263
+ // Use pdf.js to render PDF pages
264
+ const pdfjsLib = await import("pdfjs-dist");
265
+
266
+ // Configure worker - use jsdelivr CDN which is more reliable
267
+ // This will use the same version as the installed package
268
+ const version = pdfjsLib.version || "4.0.379";
269
+ pdfjsLib.GlobalWorkerOptions.workerSrc = `https://cdn.jsdelivr.net/npm/pdfjs-dist@${version}/build/pdf.worker.min.mjs`;
270
+
271
+ const arrayBuffer = await file.arrayBuffer();
272
+ const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
273
+ const numPages = pdf.numPages;
274
+
275
+ for (let pageNum = 1; pageNum <= numPages; pageNum++) {
276
+ const page = await pdf.getPage(pageNum);
277
+ const viewport = page.getViewport({ scale: 2.0 });
278
+
279
+ const canvas = document.createElement("canvas");
280
+ const context = canvas.getContext("2d");
281
+ canvas.height = viewport.height;
282
+ canvas.width = viewport.width;
283
+
284
+ await page.render({
285
+ canvasContext: context,
286
+ viewport: viewport,
287
+ }).promise;
288
+
289
+ urls.push(canvas.toDataURL("image/jpeg", 0.95));
290
+ }
291
+ } catch (error) {
292
+ console.error("Error loading PDF:", error);
293
+ // Fallback: show error message
294
+ urls.push(null);
295
+ }
296
+ } else {
297
+ // For images, create object URL
298
+ const url = URL.createObjectURL(file);
299
+ urls.push(url);
300
+ newObjectUrls.push(url);
301
+ }
302
+
303
+ // Cleanup old object URLs
304
+ objectUrlsRef.current.forEach((url) => {
305
+ if (url && url.startsWith("blob:")) {
306
+ URL.revokeObjectURL(url);
307
+ }
308
+ });
309
+ objectUrlsRef.current = newObjectUrls;
310
+ setPreviewUrls(urls);
311
+ };
312
+
313
+ loadPreview();
314
+
315
+ // Cleanup function - revoke object URLs when component unmounts or file changes
316
+ return () => {
317
+ objectUrlsRef.current.forEach((url) => {
318
+ if (url && url.startsWith("blob:")) {
319
+ URL.revokeObjectURL(url);
320
+ }
321
+ });
322
+ objectUrlsRef.current = [];
323
+ };
324
+ }, [file]);
325
+
326
+ return (
327
+ <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden">
328
+ {/* Header */}
329
+ <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100">
330
+ <div className="flex items-center gap-3">
331
+ <div className="h-8 w-8 rounded-lg bg-indigo-50 flex items-center justify-center">
332
+ <FileText className="h-4 w-4 text-indigo-600" />
333
+ </div>
334
+ <div>
335
+ <h3 className="font-semibold text-slate-800 text-sm">Document Preview</h3>
336
+ <p className="text-xs text-slate-400">{file?.name || "No file selected"}</p>
337
+ </div>
338
+ </div>
339
+
340
+ {file && (
341
+ <div className="flex items-center gap-1">
342
+ <Button
343
+ variant="ghost"
344
+ size="icon"
345
+ className="h-8 w-8 text-slate-400 hover:text-slate-600"
346
+ onClick={() => setZoom(Math.max(50, zoom - 25))}
347
+ >
348
+ <ZoomOut className="h-4 w-4" />
349
+ </Button>
350
+ <span className="text-xs text-slate-500 w-12 text-center">{zoom}%</span>
351
+ <Button
352
+ variant="ghost"
353
+ size="icon"
354
+ className="h-8 w-8 text-slate-400 hover:text-slate-600"
355
+ onClick={() => setZoom(Math.min(200, zoom + 25))}
356
+ >
357
+ <ZoomIn className="h-4 w-4" />
358
+ </Button>
359
+ <div className="w-px h-4 bg-slate-200 mx-2" />
360
+ <Button
361
+ variant="ghost"
362
+ size="icon"
363
+ className="h-8 w-8 text-slate-400 hover:text-slate-600"
364
+ onClick={() => setRotation((rotation + 90) % 360)}
365
+ >
366
+ <RotateCw className="h-4 w-4" />
367
+ </Button>
368
+ </div>
369
+ )}
370
+ </div>
371
+
372
+ {/* Preview Area */}
373
+ <div className="flex-1 p-6 bg-slate-50/50 overflow-auto">
374
+ {!file ? (
375
+ <div className="h-full flex items-center justify-center">
376
+ <div className="text-center">
377
+ <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
378
+ <FileText className="h-10 w-10 text-slate-300" />
379
+ </div>
380
+ <p className="text-slate-400 text-sm">Upload a document to preview</p>
381
+ </div>
382
+ </div>
383
+ ) : previewUrls.length === 0 ? (
384
+ <div className="h-full flex items-center justify-center">
385
+ <div className="text-center">
386
+ <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
387
+ <FileText className="h-10 w-10 text-slate-300" />
388
+ </div>
389
+ <p className="text-slate-400 text-sm">Loading preview...</p>
390
+ </div>
391
+ </div>
392
+ ) : (
393
+ <div className="space-y-4">
394
+ {previewUrls.map((url, index) => (
395
+ <motion.div
396
+ key={index}
397
+ initial={{ opacity: 0, y: 20 }}
398
+ animate={{ opacity: 1, y: 0 }}
399
+ transition={{ delay: index * 0.1 }}
400
+ className="relative bg-white rounded-xl shadow-sm border border-slate-200 overflow-hidden flex items-center justify-center"
401
+ style={{
402
+ minHeight: "400px",
403
+ }}
404
+ >
405
+ {url ? (
406
+ <img
407
+ src={url}
408
+ alt={`Page ${index + 1}`}
409
+ className="w-full h-auto"
410
+ style={{
411
+ transform: `scale(${zoom / 100}) rotate(${rotation}deg)`,
412
+ maxWidth: "100%",
413
+ objectFit: "contain",
414
+ transition: "transform 0.2s ease",
415
+ }}
416
+ />
417
+ ) : (
418
+ <div className="p-8 text-center">
419
+ <p className="text-slate-400 text-sm">
420
+ {isFromHistory
421
+ ? "Original document not available for historical extractions"
422
+ : "Unable to load preview"}
423
+ </p>
424
+ </div>
425
+ )}
426
+
427
+ {/* Processing overlay */}
428
+ {isProcessing && (
429
+ <motion.div
430
+ initial={{ opacity: 0 }}
431
+ animate={{ opacity: 1 }}
432
+ className="absolute inset-0 bg-indigo-600/5 backdrop-blur-[1px] pointer-events-none"
433
+ >
434
+ <motion.div
435
+ initial={{ top: 0 }}
436
+ animate={{ top: "100%" }}
437
+ transition={{
438
+ duration: 2,
439
+ repeat: Infinity,
440
+ ease: "linear",
441
+ }}
442
+ className="absolute left-0 right-0 h-1 bg-gradient-to-r from-transparent via-indigo-500 to-transparent"
443
+ />
444
+ </motion.div>
445
+ )}
446
+
447
+ {/* Page number */}
448
+ {previewUrls.length > 1 && (
449
+ <div className="absolute bottom-3 right-3 text-xs text-slate-400 bg-white/90 px-2 py-1 rounded">
450
+ Page {index + 1}
451
+ </div>
452
+ )}
453
+ </motion.div>
454
+ ))}
455
+ </div>
456
+ )}
457
+ </div>
458
+ </div>
459
+ );
460
+ }
461
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/components/ocr/ExtractionOutput.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import React, { useState, useEffect, useRef } from "react";
2
  import { motion, AnimatePresence } from "framer-motion";
3
  import {
@@ -1199,3 +1200,1206 @@ export default function ExtractionOutput({ hasFile, isProcessing, isComplete, ex
1199
  </div>
1200
  );
1201
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import React, { useState, useEffect, useRef } from "react";
3
  import { motion, AnimatePresence } from "framer-motion";
4
  import {
 
1200
  </div>
1201
  );
1202
  }
1203
+ =======
1204
+ import React, { useState, useEffect, useRef } from "react";
1205
+ import { motion, AnimatePresence } from "framer-motion";
1206
+ import {
1207
+ Code2,
1208
+ Copy,
1209
+ Check,
1210
+ Braces,
1211
+ FileCode2,
1212
+ FileText,
1213
+ Sparkles,
1214
+ ChevronDown,
1215
+ Upload,
1216
+ } from "lucide-react";
1217
+ import { Button } from "@/components/ui/button";
1218
+ import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
1219
+ import { cn } from "@/lib/utils";
1220
+
1221
+ // Helper function to convert pipe-separated tables to HTML tables
1222
+ function convertPipeTablesToHTML(text) {
1223
+ if (!text) return text;
1224
+
1225
+ const lines = text.split('\n');
1226
+ const result = [];
1227
+ let i = 0;
1228
+
1229
+ while (i < lines.length) {
1230
+ const line = lines[i];
1231
+
1232
+ // Check if this line looks like a table row (has multiple pipes)
1233
+ if (line.includes('|') && line.split('|').length >= 3) {
1234
+ // Check if it's a separator line (only |, -, :, spaces)
1235
+ const isSeparator = /^[\s|\-:]+$/.test(line.trim());
1236
+
1237
+ if (!isSeparator) {
1238
+ // Start of a table - collect all table rows
1239
+ const tableRows = [];
1240
+ let j = i;
1241
+
1242
+ // Collect header row
1243
+ const headerLine = lines[j];
1244
+ const headerCells = headerLine.split('|').map(cell => cell.trim()).filter(cell => cell || cell === '');
1245
+ // Remove empty cells at start/end
1246
+ if (headerCells.length > 0 && !headerCells[0]) headerCells.shift();
1247
+ if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop();
1248
+
1249
+ if (headerCells.length >= 2) {
1250
+ tableRows.push(headerCells);
1251
+ j++;
1252
+
1253
+ // Skip separator line if present
1254
+ if (j < lines.length && /^[\s|\-:]+$/.test(lines[j].trim())) {
1255
+ j++;
1256
+ }
1257
+
1258
+ // Collect data rows
1259
+ while (j < lines.length) {
1260
+ const rowLine = lines[j];
1261
+ if (!rowLine.trim()) break; // Empty line ends table
1262
+
1263
+ // Check if it's still a table row
1264
+ if (rowLine.includes('|') && rowLine.split('|').length >= 2) {
1265
+ const isRowSeparator = /^[\s|\-:]+$/.test(rowLine.trim());
1266
+ if (!isRowSeparator) {
1267
+ const rowCells = rowLine.split('|').map(cell => cell.trim());
1268
+ // Remove empty cells at start/end
1269
+ if (rowCells.length > 0 && !rowCells[0]) rowCells.shift();
1270
+ if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop();
1271
+ tableRows.push(rowCells);
1272
+ j++;
1273
+ } else {
1274
+ j++;
1275
+ }
1276
+ } else {
1277
+ break; // Not a table row anymore
1278
+ }
1279
+ }
1280
+
1281
+ // Convert to HTML table
1282
+ if (tableRows.length > 0) {
1283
+ let htmlTable = '<table class="border-collapse border border-gray-300 w-full my-4">\n<thead>\n<tr>';
1284
+
1285
+ // Header row
1286
+ tableRows[0].forEach(cell => {
1287
+ htmlTable += `<th class="border border-gray-300 px-4 py-2 bg-gray-100 font-semibold text-left">${escapeHtml(cell)}</th>`;
1288
+ });
1289
+ htmlTable += '</tr>\n</thead>\n<tbody>\n';
1290
+
1291
+ // Data rows
1292
+ for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) {
1293
+ htmlTable += '<tr>';
1294
+ tableRows[rowIdx].forEach((cell, colIdx) => {
1295
+ // Use header cell count to ensure alignment
1296
+ const cellContent = cell || '';
1297
+ htmlTable += `<td class="border border-gray-300 px-4 py-2">${escapeHtml(cellContent)}</td>`;
1298
+ });
1299
+ htmlTable += '</tr>\n';
1300
+ }
1301
+
1302
+ htmlTable += '</tbody>\n</table>';
1303
+ result.push(htmlTable);
1304
+ i = j;
1305
+ continue;
1306
+ }
1307
+ }
1308
+ }
1309
+ }
1310
+
1311
+ // Not a table row, add as-is
1312
+ result.push(line);
1313
+ i++;
1314
+ }
1315
+
1316
+ return result.join('\n');
1317
+ }
1318
+
1319
+ // Helper function to escape HTML
1320
+ function escapeHtml(text) {
1321
+ if (!text) return '';
1322
+ const div = document.createElement('div');
1323
+ div.textContent = text;
1324
+ return div.innerHTML;
1325
+ }
1326
+
1327
+ // Helper function to convert markdown/HTML text to safe HTML
1328
+ function renderMarkdownToHTML(text) {
1329
+ if (!text) return "";
1330
+
1331
+ let html = text;
1332
+
1333
+ // FIRST: Convert pipe-separated tables to HTML tables
1334
+ html = convertPipeTablesToHTML(html);
1335
+
1336
+ // Convert LaTeX-style superscripts/subscripts FIRST (before protecting tables)
1337
+ // This ensures they're converted everywhere, including inside tables
1338
+
1339
+ // Convert LaTeX-style superscripts: $^{text}$ or $^text$ to <sup>text</sup>
1340
+ html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '<sup>$1</sup>');
1341
+ html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '<sup>$1</sup>');
1342
+
1343
+ // Convert LaTeX-style subscripts: $_{text}$ or $_text$ to <sub>text</sub>
1344
+ html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '<sub>$1</sub>');
1345
+ html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '<sub>$1</sub>');
1346
+
1347
+ // Split by HTML tags to preserve existing HTML (like tables)
1348
+ // Process markdown only in non-HTML sections
1349
+
1350
+ // First, protect existing HTML blocks (tables, etc.)
1351
+ const htmlBlocks = [];
1352
+ let htmlBlockIndex = 0;
1353
+
1354
+ // Extract and protect HTML table blocks
1355
+ html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => {
1356
+ const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`;
1357
+ htmlBlocks[htmlBlockIndex] = match;
1358
+ htmlBlockIndex++;
1359
+ return placeholder;
1360
+ });
1361
+
1362
+ // Convert markdown headers (only if not inside HTML)
1363
+ html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>');
1364
+ html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>');
1365
+ html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>');
1366
+
1367
+ // Convert markdown bold/italic (but not inside HTML tags)
1368
+ html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
1369
+ html = html.replace(/\*(.*?)\*/g, '<em>$1</em>');
1370
+
1371
+ // Convert markdown links
1372
+ html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" target="_blank" rel="noopener noreferrer">$1</a>');
1373
+
1374
+ // Convert line breaks to paragraphs (but preserve structure around HTML blocks)
1375
+ const parts = html.split(/(__HTML_BLOCK_\d+__)/);
1376
+ const processedParts = parts.map((part, index) => {
1377
+ if (part.match(/^__HTML_BLOCK_\d+__$/)) {
1378
+ // Restore HTML block
1379
+ const blockIndex = parseInt(part.match(/\d+/)[0]);
1380
+ return htmlBlocks[blockIndex];
1381
+ } else {
1382
+ // Process markdown in this part
1383
+ let processed = part;
1384
+
1385
+ // Convert double line breaks to paragraph breaks
1386
+ processed = processed.replace(/\n\n+/g, '</p><p>');
1387
+ // Convert single line breaks to <br> (but not if already in a tag)
1388
+ processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2');
1389
+
1390
+ // Wrap in paragraph if there's content
1391
+ if (processed.trim() && !processed.trim().startsWith('<')) {
1392
+ processed = '<p>' + processed + '</p>';
1393
+ }
1394
+
1395
+ return processed;
1396
+ }
1397
+ });
1398
+
1399
+ html = processedParts.join('');
1400
+
1401
+ // Process LaTeX notation in restored HTML blocks (tables) as well
1402
+ // This handles any LaTeX that might be in table cells
1403
+ html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*\^\s*\{([^}]+)\}\s*\$([^<]*)(<\/td>|<\/th>)/gi,
1404
+ (match, openTag, before, supText, after, closeTag) => {
1405
+ return openTag + before + '<sup>' + supText + '</sup>' + after + closeTag;
1406
+ });
1407
+ html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*\^\s*([^\s$<>]+)\s*\$([^<]*)(<\/td>|<\/th>)/gi,
1408
+ (match, openTag, before, supText, after, closeTag) => {
1409
+ return openTag + before + '<sup>' + supText + '</sup>' + after + closeTag;
1410
+ });
1411
+ html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*_\s*\{([^}]+)\}\s*\$([^<]*)(<\/td>|<\/th>)/gi,
1412
+ (match, openTag, before, subText, after, closeTag) => {
1413
+ return openTag + before + '<sub>' + subText + '</sub>' + after + closeTag;
1414
+ });
1415
+ html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*_\s*([^\s$<>]+)\s*\$([^<]*)(<\/td>|<\/th>)/gi,
1416
+ (match, openTag, before, subText, after, closeTag) => {
1417
+ return openTag + before + '<sub>' + subText + '</sub>' + after + closeTag;
1418
+ });
1419
+
1420
+ // Clean up empty paragraphs and fix paragraph structure
1421
+ html = html.replace(/<p><\/p>/g, '');
1422
+ html = html.replace(/<p>\s*<br>\s*<\/p>/g, '');
1423
+ html = html.replace(/<p>\s*<\/p>/g, '');
1424
+
1425
+ // Ensure proper spacing around HTML blocks
1426
+ html = html.replace(/(<\/table>)\s*(<h[1-3])/g, '$1</p><p>$2');
1427
+ html = html.replace(/(<\/h[1-3]>)\s*(<table)/g, '$1<p>$2');
1428
+ html = html.replace(/(<\/table>)\s*(<p>)/g, '$1$2');
1429
+
1430
+ return html;
1431
+ }
1432
+
1433
+ // Mock extracted data
1434
+ const mockData = {
1435
+ document: {
1436
+ type: "Invoice",
1437
+ confidence: 0.98,
1438
+ },
1439
+ vendor: {
1440
+ name: "Acme Corporation",
1441
+ address: "123 Business Ave, Suite 400",
1442
+ city: "San Francisco",
1443
+ state: "CA",
1444
+ zip: "94102",
1445
+ phone: "+1 (555) 123-4567",
1446
+ },
1447
+ invoice: {
1448
+ number: "INV-2024-0847",
1449
+ date: "2024-01-15",
1450
+ due_date: "2024-02-14",
1451
+ po_number: "PO-9823",
1452
+ },
1453
+ items: [
1454
+ { description: "Professional Services", quantity: 40, unit_price: 150.0, total: 6000.0 },
1455
+ { description: "Software License", quantity: 5, unit_price: 299.99, total: 1499.95 },
1456
+ { description: "Support Package", quantity: 1, unit_price: 500.0, total: 500.0 },
1457
+ ],
1458
+ totals: {
1459
+ subtotal: 7999.95,
1460
+ tax_rate: 0.0875,
1461
+ tax_amount: 699.99,
1462
+ total: 8699.94,
1463
+ },
1464
+ };
1465
+
1466
+ const mockXML = `<?xml version="1.0" encoding="UTF-8"?>
1467
+ <extraction>
1468
+ <document type="Invoice" confidence="0.98"/>
1469
+ <vendor>
1470
+ <name>Acme Corporation</name>
1471
+ <address>123 Business Ave, Suite 400</address>
1472
+ <city>San Francisco</city>
1473
+ <state>CA</state>
1474
+ <zip>94102</zip>
1475
+ </vendor>
1476
+ <invoice>
1477
+ <number>INV-2024-0847</number>
1478
+ <date>2024-01-15</date>
1479
+ <due_date>2024-02-14</due_date>
1480
+ </invoice>
1481
+ <items>
1482
+ <item>
1483
+ <description>Professional Services</description>
1484
+ <quantity>40</quantity>
1485
+ <total>6000.00</total>
1486
+ </item>
1487
+ </items>
1488
+ <totals>
1489
+ <subtotal>7999.95</subtotal>
1490
+ <tax>699.99</tax>
1491
+ <total>8699.94</total>
1492
+ </totals>
1493
+ </extraction>`;
1494
+
1495
+ const mockText = `INVOICE
1496
+
1497
+ ACME CORPORATION
1498
+ 123 Business Ave, Suite 400
1499
+ San Francisco, CA 94102
1500
+ Phone: +1 (555) 123-4567
1501
+
1502
+ Invoice Number: INV-2024-0847
1503
+ Invoice Date: January 15, 2024
1504
+ Due Date: February 14, 2024
1505
+ PO Number: PO-9823
1506
+
1507
+ BILL TO:
1508
+ Customer Name
1509
+ 456 Client Street
1510
+ New York, NY 10001
1511
+
1512
+ ITEMS:
1513
+ ─────────────────────────────────────────────────────────
1514
+ Description Qty Unit Price Total
1515
+ ─────────────────────────────────────────────────────────
1516
+ Professional Services 40 $150.00 $6,000.00
1517
+ Software License 5 $299.99 $1,499.95
1518
+ Support Package 1 $500.00 $500.00
1519
+ ─────────────────────────────────────────────────────────
1520
+
1521
+ Subtotal: $7,999.95
1522
+ Tax (8.75%): $699.99
1523
+ ─────────────────────────
1524
+ TOTAL: $8,699.94
1525
+
1526
+ Payment Terms: Net 30
1527
+ Thank you for your business!`;
1528
+
1529
+ // Helper function to convert object to XML
1530
+ // Prepare fields for JSON/XML output - remove duplicates and restructure
1531
+ function prepareFieldsForOutput(fields, format = "json") {
1532
+ if (!fields || typeof fields !== "object") {
1533
+ return fields;
1534
+ }
1535
+
1536
+ const output = { ...fields };
1537
+
1538
+ // Extract Fields from root level if it exists
1539
+ const rootFields = output.Fields;
1540
+ // Remove Fields from output temporarily (will be added back at top)
1541
+ delete output.Fields;
1542
+
1543
+ // Remove full_text from top-level if pages array exists (to avoid duplication)
1544
+ if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
1545
+ delete output.full_text;
1546
+
1547
+ // Clean up each page: remove full_text from page.fields (it duplicates page.text)
1548
+ output.pages = output.pages.map(page => {
1549
+ const cleanedPage = { ...page };
1550
+ if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
1551
+ const cleanedFields = { ...cleanedPage.fields };
1552
+ // Remove full_text from page fields (duplicates page.text)
1553
+ delete cleanedFields.full_text;
1554
+ cleanedPage.fields = cleanedFields;
1555
+ }
1556
+ return cleanedPage;
1557
+ });
1558
+ }
1559
+
1560
+ // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
1561
+ if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
1562
+ // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
1563
+ const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields"));
1564
+
1565
+ output.pages.forEach((page, idx) => {
1566
+ const pageNum = page.page_number || idx + 1;
1567
+ const pageFields = page.fields || {};
1568
+
1569
+ // Remove duplicate fields from page.fields:
1570
+ // 1. Remove full_text (duplicates page.text)
1571
+ // 2. Remove fields that match top-level fields (already shown at root)
1572
+ const cleanedPageFields = {};
1573
+ for (const [key, value] of Object.entries(pageFields)) {
1574
+ // Skip full_text and fields that match top-level exactly
1575
+ if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
1576
+ cleanedPageFields[key] = value;
1577
+ }
1578
+ }
1579
+
1580
+ const pageObj = {
1581
+ text: page.text || "",
1582
+ confidence: page.confidence || 0,
1583
+ doc_type: page.doc_type || "other"
1584
+ };
1585
+
1586
+ // Add table and footer_notes if they exist
1587
+ if (page.table && Array.isArray(page.table) && page.table.length > 0) {
1588
+ pageObj.table = page.table;
1589
+ }
1590
+ if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) {
1591
+ pageObj.footer_notes = page.footer_notes;
1592
+ }
1593
+
1594
+ // Only add fields if there are unique page-specific fields
1595
+ if (Object.keys(cleanedPageFields).length > 0) {
1596
+ pageObj.fields = cleanedPageFields;
1597
+ }
1598
+
1599
+ output[`page_${pageNum}`] = pageObj;
1600
+ });
1601
+ // Remove pages array - we now have page_1, page_2, etc. as separate fields
1602
+ delete output.pages;
1603
+ }
1604
+
1605
+ // Handle page_X structure (from backend) - remove Fields from page objects if they exist
1606
+ if (output && typeof output === "object") {
1607
+ const pageKeys = Object.keys(output).filter(k => k.startsWith("page_"));
1608
+ for (const pageKey of pageKeys) {
1609
+ const pageData = output[pageKey];
1610
+ if (pageData && typeof pageData === "object") {
1611
+ // Remove Fields from page objects (it's now at root level)
1612
+ delete pageData.Fields;
1613
+ delete pageData.metadata;
1614
+ }
1615
+ }
1616
+ }
1617
+
1618
+ // Rebuild output with Fields at the top (only if it exists and is not empty)
1619
+ const finalOutput = {};
1620
+ if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) {
1621
+ finalOutput.Fields = rootFields;
1622
+ }
1623
+
1624
+ // Add all other keys
1625
+ Object.keys(output).forEach(key => {
1626
+ finalOutput[key] = output[key];
1627
+ });
1628
+
1629
+ return finalOutput;
1630
+ }
1631
+
1632
+ function objectToXML(obj, rootName = "extraction") {
1633
+ // Prepare fields - remove full_text if pages exist
1634
+ const preparedObj = prepareFieldsForOutput(obj, "xml");
1635
+
1636
+ let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
1637
+
1638
+ const convert = (obj, indent = " ") => {
1639
+ for (const [key, value] of Object.entries(obj)) {
1640
+ if (value === null || value === undefined) continue;
1641
+
1642
+ // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
1643
+ if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
1644
+ continue;
1645
+ }
1646
+
1647
+ if (Array.isArray(value)) {
1648
+ value.forEach((item) => {
1649
+ xml += `${indent}<${key}>\n`;
1650
+ if (typeof item === "object") {
1651
+ convert(item, indent + " ");
1652
+ } else {
1653
+ xml += `${indent} ${escapeXML(String(item))}\n`;
1654
+ }
1655
+ xml += `${indent}</${key}>\n`;
1656
+ });
1657
+ } else if (typeof value === "object") {
1658
+ xml += `${indent}<${key}>\n`;
1659
+ convert(value, indent + " ");
1660
+ xml += `${indent}</${key}>\n`;
1661
+ } else {
1662
+ xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
1663
+ }
1664
+ }
1665
+ };
1666
+
1667
+ convert(preparedObj);
1668
+ xml += `</${rootName}>`;
1669
+ return xml;
1670
+ }
1671
+
1672
+ function escapeXML(str) {
1673
+ return str
1674
+ .replace(/&/g, "&amp;")
1675
+ .replace(/</g, "&lt;")
1676
+ .replace(/>/g, "&gt;")
1677
+ .replace(/"/g, "&quot;")
1678
+ .replace(/'/g, "&apos;");
1679
+ }
1680
+
1681
+ // Helper function to extract text from page structure
1682
+ function extractTextFromFields(fields) {
1683
+ if (!fields || typeof fields !== "object") {
1684
+ return "";
1685
+ }
1686
+
1687
+ // Check for page_X structure first (preferred format)
1688
+ const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_"));
1689
+ if (pageKeys.length > 0) {
1690
+ // Get text from first page (or combine all pages)
1691
+ const pageTexts = pageKeys.map(key => {
1692
+ const page = fields[key];
1693
+ if (page && page.text) {
1694
+ return page.text;
1695
+ }
1696
+ return "";
1697
+ }).filter(text => text);
1698
+
1699
+ if (pageTexts.length > 0) {
1700
+ return pageTexts.join("\n\n");
1701
+ }
1702
+ }
1703
+
1704
+ // Fallback to full_text
1705
+ if (fields.full_text) {
1706
+ return fields.full_text;
1707
+ }
1708
+
1709
+ return "";
1710
+ }
1711
+
1712
+ // Helper function to format fields as readable text
1713
+ function fieldsToText(fields) {
1714
+ if (!fields || typeof fields !== "object") {
1715
+ return "No data extracted.";
1716
+ }
1717
+
1718
+ // Extract text from page structure or full_text
1719
+ const extractedText = extractTextFromFields(fields);
1720
+
1721
+ if (extractedText) {
1722
+ return extractedText;
1723
+
1724
+ // Don't show pages array separately if full_text already contains page markers
1725
+ // (full_text from backend already includes "=== PAGE 1 ===" etc.)
1726
+ const hasPageMarkers = fields.full_text.includes("=== PAGE") || fields.full_text.includes("--- Page");
1727
+
1728
+ // Only show pages array if full_text doesn't already have page breakdown
1729
+ if (!hasPageMarkers && fields.pages && Array.isArray(fields.pages)) {
1730
+ text += "\n\n=== TEXT BY PAGE ===\n\n";
1731
+ fields.pages.forEach((page, idx) => {
1732
+ text += `--- Page ${page.page_number || idx + 1} ---\n`;
1733
+ text += page.text || "";
1734
+ text += "\n\n";
1735
+ });
1736
+ }
1737
+
1738
+ // Then show other structured fields
1739
+ const otherFields = { ...fields };
1740
+ delete otherFields.full_text;
1741
+ delete otherFields.pages;
1742
+
1743
+ if (Object.keys(otherFields).length > 0) {
1744
+ text += "\n\n=== STRUCTURED FIELDS ===\n\n";
1745
+ const formatValue = (key, value, indent = "") => {
1746
+ if (Array.isArray(value)) {
1747
+ text += `${indent}${key}:\n`;
1748
+ value.forEach((item, idx) => {
1749
+ if (typeof item === "object") {
1750
+ text += `${indent} Item ${idx + 1}:\n`;
1751
+ Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " "));
1752
+ } else {
1753
+ text += `${indent} - ${item}\n`;
1754
+ }
1755
+ });
1756
+ } else if (typeof value === "object" && value !== null) {
1757
+ text += `${indent}${key}:\n`;
1758
+ Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " "));
1759
+ } else {
1760
+ text += `${indent}${key}: ${value}\n`;
1761
+ }
1762
+ };
1763
+
1764
+ Object.entries(otherFields).forEach(([key, value]) => {
1765
+ formatValue(key, value);
1766
+ text += "\n";
1767
+ });
1768
+ }
1769
+
1770
+ return text.trim();
1771
+ }
1772
+
1773
+ // Fallback: format all fields normally
1774
+ let text = "";
1775
+ const formatValue = (key, value, indent = "") => {
1776
+ if (Array.isArray(value)) {
1777
+ text += `${indent}${key}:\n`;
1778
+ value.forEach((item, idx) => {
1779
+ if (typeof item === "object") {
1780
+ text += `${indent} Item ${idx + 1}:\n`;
1781
+ Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " "));
1782
+ } else {
1783
+ text += `${indent} - ${item}\n`;
1784
+ }
1785
+ });
1786
+ } else if (typeof value === "object" && value !== null) {
1787
+ text += `${indent}${key}:\n`;
1788
+ Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " "));
1789
+ } else {
1790
+ text += `${indent}${key}: ${value}\n`;
1791
+ }
1792
+ };
1793
+
1794
+ Object.entries(fields).forEach(([key, value]) => {
1795
+ formatValue(key, value);
1796
+ text += "\n";
1797
+ });
1798
+
1799
+ return text.trim() || "No data extracted.";
1800
+ }
1801
+
1802
+ export default function ExtractionOutput({ hasFile, isProcessing, isComplete, extractionResult, onNewUpload }) {
1803
+ const [activeTab, setActiveTab] = useState("json");
1804
+ const [copied, setCopied] = useState(false);
1805
+ const [statusMessage, setStatusMessage] = useState("Preparing document...");
1806
+
1807
+ // Get fields from extraction result, default to empty object
1808
+ const fields = extractionResult?.fields || {};
1809
+ const confidence = extractionResult?.confidence || 0;
1810
+ const fieldsExtracted = extractionResult?.fieldsExtracted || 0;
1811
+ const totalTime = extractionResult?.totalTime || 0;
1812
+
1813
+ // Dynamic status messages that rotate during processing
1814
+ const statusMessages = [
1815
+ "Preparing document...",
1816
+ "Converting pages to images...",
1817
+ "Visual Reasoning...",
1818
+ "Reading text from document...",
1819
+ "Identifying document structure...",
1820
+ "Extracting tables and data...",
1821
+ "Analyzing content...",
1822
+ "Processing pages...",
1823
+ "Organizing extracted information...",
1824
+ "Finalizing results...",
1825
+ ];
1826
+
1827
+ // Rotate status messages during processing
1828
+ const messageIndexRef = useRef(0);
1829
+
1830
+ useEffect(() => {
1831
+ if (!isProcessing) {
1832
+ setStatusMessage("Analyzing document structure");
1833
+ messageIndexRef.current = 0;
1834
+ return;
1835
+ }
1836
+
1837
+ setStatusMessage(statusMessages[0]);
1838
+ messageIndexRef.current = 0;
1839
+
1840
+ const interval = setInterval(() => {
1841
+ messageIndexRef.current = (messageIndexRef.current + 1) % statusMessages.length;
1842
+ setStatusMessage(statusMessages[messageIndexRef.current]);
1843
+ }, 2500); // Change message every 2.5 seconds
1844
+
1845
+ return () => clearInterval(interval);
1846
+ }, [isProcessing]);
1847
+
1848
+ // Initialize expanded sections based on available fields
1849
+ const [expandedSections, setExpandedSections] = useState(() =>
1850
+ Object.keys(fields).slice(0, 5) // Expand first 5 sections by default
1851
+ );
1852
+
1853
+ // Helper function to convert HTML to formatted plain text with layout preserved
1854
+ const htmlToFormattedText = (html) => {
1855
+ if (!html) return "";
1856
+
1857
+ // Create a temporary div to parse HTML
1858
+ const tempDiv = document.createElement("div");
1859
+ tempDiv.innerHTML = html;
1860
+
1861
+ let text = "";
1862
+
1863
+ // Process each element
1864
+ const processNode = (node) => {
1865
+ if (node.nodeType === Node.TEXT_NODE) {
1866
+ return node.textContent;
1867
+ }
1868
+
1869
+ if (node.nodeType !== Node.ELEMENT_NODE) {
1870
+ return "";
1871
+ }
1872
+
1873
+ const tagName = node.tagName?.toLowerCase();
1874
+ const children = Array.from(node.childNodes);
1875
+
1876
+ switch (tagName) {
1877
+ case "h1":
1878
+ return "\n\n" + processChildren(children).trim() + "\n\n";
1879
+ case "h2":
1880
+ return "\n\n" + processChildren(children).trim() + "\n\n";
1881
+ case "h3":
1882
+ return "\n" + processChildren(children).trim() + "\n";
1883
+ case "p":
1884
+ return processChildren(children) + "\n\n";
1885
+ case "br":
1886
+ return "\n";
1887
+ case "strong":
1888
+ case "b":
1889
+ return processChildren(children);
1890
+ case "em":
1891
+ case "i":
1892
+ return processChildren(children);
1893
+ case "sup":
1894
+ return processChildren(children);
1895
+ case "sub":
1896
+ return processChildren(children);
1897
+ case "table":
1898
+ return "\n" + processTable(node) + "\n\n";
1899
+ case "ul":
1900
+ case "ol":
1901
+ return "\n" + processList(node) + "\n\n";
1902
+ case "li":
1903
+ return " • " + processChildren(children).trim() + "\n";
1904
+ default:
1905
+ return processChildren(children);
1906
+ }
1907
+ };
1908
+
1909
+ const processChildren = (children) => {
1910
+ return children.map(processNode).join("");
1911
+ };
1912
+
1913
+ const processTable = (table) => {
1914
+ let tableText = "";
1915
+ const rows = table.querySelectorAll("tr");
1916
+
1917
+ if (rows.length === 0) return "";
1918
+
1919
+ // First pass: calculate column widths
1920
+ const allRows = Array.from(rows);
1921
+ const columnCount = Math.max(...allRows.map(row => row.querySelectorAll("td, th").length));
1922
+ const columnWidths = new Array(columnCount).fill(0);
1923
+
1924
+ allRows.forEach(row => {
1925
+ const cells = row.querySelectorAll("td, th");
1926
+ cells.forEach((cell, colIndex) => {
1927
+ const cellText = processChildren(Array.from(cell.childNodes)).trim().replace(/\s+/g, " ");
1928
+ columnWidths[colIndex] = Math.max(columnWidths[colIndex] || 0, cellText.length, 10);
1929
+ });
1930
+ });
1931
+
1932
+ // Second pass: format rows
1933
+ allRows.forEach((row, rowIndex) => {
1934
+ const cells = row.querySelectorAll("td, th");
1935
+ const cellTexts = Array.from(cells).map(cell => {
1936
+ let cellContent = processChildren(Array.from(cell.childNodes)).trim();
1937
+ cellContent = cellContent.replace(/\s+/g, " ");
1938
+ return cellContent;
1939
+ });
1940
+
1941
+ // Pad cells to column widths
1942
+ const paddedCells = cellTexts.map((text, i) => {
1943
+ const width = columnWidths[i] || 10;
1944
+ return text.padEnd(width);
1945
+ });
1946
+
1947
+ tableText += paddedCells.join(" | ") + "\n";
1948
+
1949
+ // Add separator after header row
1950
+ if (rowIndex === 0 && row.querySelector("th")) {
1951
+ tableText += columnWidths.map(w => "-".repeat(w)).join("-|-") + "\n";
1952
+ }
1953
+ });
1954
+
1955
+ return tableText;
1956
+ };
1957
+
1958
+ const processList = (list) => {
1959
+ const items = list.querySelectorAll("li");
1960
+ return Array.from(items).map(item => {
1961
+ return " • " + processChildren(Array.from(item.childNodes)).trim();
1962
+ }).join("\n");
1963
+ };
1964
+
1965
+ text = processChildren(Array.from(tempDiv.childNodes));
1966
+
1967
+ // Clean up extra newlines
1968
+ text = text.replace(/\n{3,}/g, "\n\n");
1969
+ text = text.trim();
1970
+
1971
+ return text;
1972
+ };
1973
+
1974
+ const handleCopy = () => {
1975
+ let content = "";
1976
+ if (activeTab === "json") {
1977
+ const preparedFields = prepareFieldsForOutput(fields, "json");
1978
+ content = JSON.stringify(preparedFields, null, 2);
1979
+ } else if (activeTab === "xml") {
1980
+ content = objectToXML(fields);
1981
+ } else {
1982
+ // For text tab, get the formatted HTML and convert to plain text with layout
1983
+ const textContent = extractTextFromFields(fields);
1984
+ const htmlContent = renderMarkdownToHTML(textContent);
1985
+ content = htmlToFormattedText(htmlContent);
1986
+ }
1987
+
1988
+ navigator.clipboard.writeText(content);
1989
+ setCopied(true);
1990
+ setTimeout(() => setCopied(false), 2000);
1991
+ };
1992
+
1993
+ // Get prepared fields for display
1994
+ const preparedFields = React.useMemo(() => {
1995
+ return prepareFieldsForOutput(fields, "json");
1996
+ }, [fields]);
1997
+
1998
+ // Update expanded sections when fields change
1999
+ React.useEffect(() => {
2000
+ if (extractionResult?.fields) {
2001
+ setExpandedSections(Object.keys(extractionResult.fields).slice(0, 5));
2002
+ }
2003
+ }, [extractionResult]);
2004
+
2005
+ const toggleSection = (section) => {
2006
+ setExpandedSections((prev) =>
2007
+ prev.includes(section) ? prev.filter((s) => s !== section) : [...prev, section]
2008
+ );
2009
+ };
2010
+
2011
+ const renderValue = (value) => {
2012
+ if (typeof value === "number") {
2013
+ return <span className="text-amber-600">{value}</span>;
2014
+ }
2015
+ if (typeof value === "string") {
2016
+ return <span className="text-emerald-600">"{value}"</span>;
2017
+ }
2018
+ return String(value);
2019
+ };
2020
+
2021
+ const renderSection = (key, value, level = 0) => {
2022
+ const isExpanded = expandedSections.includes(key);
2023
+ const isObject = typeof value === "object" && value !== null;
2024
+ const isArray = Array.isArray(value);
2025
+
2026
+ if (!isObject) {
2027
+ return (
2028
+ <div
2029
+ key={key}
2030
+ className="flex items-start gap-2 py-1"
2031
+ style={{ paddingLeft: level * 16 }}
2032
+ >
2033
+ <span className="text-violet-500">"{key}"</span>
2034
+ <span className="text-slate-400">:</span>
2035
+ {renderValue(value)}
2036
+ </div>
2037
+ );
2038
+ }
2039
+
2040
+ return (
2041
+ <div key={key}>
2042
+ <button
2043
+ onClick={() => toggleSection(key)}
2044
+ className="flex items-center gap-2 py-1 hover:bg-slate-50 w-full text-left rounded"
2045
+ style={{ paddingLeft: level * 16 }}
2046
+ >
2047
+ <ChevronDown
2048
+ className={cn(
2049
+ "h-3 w-3 text-slate-400 transition-transform",
2050
+ !isExpanded && "-rotate-90"
2051
+ )}
2052
+ />
2053
+ <span className="text-violet-500">"{key}"</span>
2054
+ <span className="text-slate-400">:</span>
2055
+ <span className="text-slate-400">{isArray ? "[" : "{"}</span>
2056
+ {!isExpanded && (
2057
+ <span className="text-slate-300 text-xs">
2058
+ {isArray ? `${value.length} items` : `${Object.keys(value).length} fields`}
2059
+ </span>
2060
+ )}
2061
+ </button>
2062
+ <AnimatePresence>
2063
+ {isExpanded && (
2064
+ <motion.div
2065
+ initial={{ height: 0, opacity: 0 }}
2066
+ animate={{ height: "auto", opacity: 1 }}
2067
+ exit={{ height: 0, opacity: 0 }}
2068
+ transition={{ duration: 0.2 }}
2069
+ className="overflow-hidden"
2070
+ >
2071
+ {isArray ? (
2072
+ value.map((item, idx) => (
2073
+ <div key={idx} className="border-l border-slate-100 ml-4">
2074
+ {Object.entries(item).map(([k, v]) => renderSection(k, v, level + 2))}
2075
+ {idx < value.length - 1 && <div className="h-2" />}
2076
+ </div>
2077
+ ))
2078
+ ) : (
2079
+ Object.entries(value).map(([k, v]) => renderSection(k, v, level + 1))
2080
+ )}
2081
+ <div style={{ paddingLeft: level * 16 }} className="text-slate-400">
2082
+ {isArray ? "]" : "}"}
2083
+ </div>
2084
+ </motion.div>
2085
+ )}
2086
+ </AnimatePresence>
2087
+ </div>
2088
+ );
2089
+ };
2090
+
2091
+ return (
2092
+ <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden">
2093
+ {/* Header */}
2094
+ <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100">
2095
+ <div className="flex items-center gap-3">
2096
+ <div className="h-8 w-8 rounded-lg bg-emerald-50 flex items-center justify-center">
2097
+ <Code2 className="h-4 w-4 text-emerald-600" />
2098
+ </div>
2099
+ <div>
2100
+ <h3 className="font-semibold text-slate-800 text-sm">Extracted Data</h3>
2101
+ <p className="text-xs text-slate-400">
2102
+ {isComplete
2103
+ ? `${fieldsExtracted} field${fieldsExtracted !== 1 ? 's' : ''} extracted`
2104
+ : "Waiting for extraction"}
2105
+ </p>
2106
+ </div>
2107
+ {isComplete && onNewUpload && (
2108
+ <Button
2109
+ variant="ghost"
2110
+ size="sm"
2111
+ onClick={onNewUpload}
2112
+ className="h-8 ml-auto text-xs gap-1.5 text-indigo-600 hover:text-indigo-700 hover:bg-indigo-50"
2113
+ title="Upload new document"
2114
+ >
2115
+ <Upload className="h-3.5 w-3.5" />
2116
+ New
2117
+ </Button>
2118
+ )}
2119
+ </div>
2120
+
2121
+ {isComplete && (
2122
+ <div className="flex items-center gap-2">
2123
+ <Tabs value={activeTab} onValueChange={setActiveTab}>
2124
+ <TabsList className="h-8 bg-slate-100 p-0.5">
2125
+ <TabsTrigger value="text" className="h-7 text-xs gap-1.5">
2126
+ <FileText className="h-3 w-3" />
2127
+ Text
2128
+ </TabsTrigger>
2129
+ <TabsTrigger value="json" className="h-7 text-xs gap-1.5">
2130
+ <Braces className="h-3 w-3" />
2131
+ JSON
2132
+ </TabsTrigger>
2133
+ <TabsTrigger value="xml" className="h-7 text-xs gap-1.5">
2134
+ <FileCode2 className="h-3 w-3" />
2135
+ XML
2136
+ </TabsTrigger>
2137
+ </TabsList>
2138
+ </Tabs>
2139
+ <Button
2140
+ variant="ghost"
2141
+ size="sm"
2142
+ onClick={handleCopy}
2143
+ className="h-8 text-xs gap-1.5"
2144
+ >
2145
+ {copied ? (
2146
+ <>
2147
+ <Check className="h-3 w-3 text-emerald-500" />
2148
+ Copied
2149
+ </>
2150
+ ) : (
2151
+ <>
2152
+ <Copy className="h-3 w-3" />
2153
+ Copy
2154
+ </>
2155
+ )}
2156
+ </Button>
2157
+ </div>
2158
+ )}
2159
+ </div>
2160
+
2161
+ {/* Output Area */}
2162
+ <div className="flex-1 overflow-auto">
2163
+ {!hasFile ? (
2164
+ <div className="h-full flex items-center justify-center p-6">
2165
+ <div className="text-center">
2166
+ <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
2167
+ <Code2 className="h-10 w-10 text-slate-300" />
2168
+ </div>
2169
+ <p className="text-slate-400 text-sm">Extracted data will appear here</p>
2170
+ </div>
2171
+ </div>
2172
+ ) : isProcessing ? (
2173
+ <div className="h-full flex items-center justify-center p-6">
2174
+ <div className="text-center">
2175
+ <motion.div
2176
+ animate={{ rotate: 360 }}
2177
+ transition={{ duration: 2, repeat: Infinity, ease: "linear" }}
2178
+ className="h-16 w-16 mx-auto rounded-2xl bg-gradient-to-br from-indigo-100 to-violet-100 flex items-center justify-center mb-4"
2179
+ >
2180
+ <Sparkles className="h-8 w-8 text-indigo-500" />
2181
+ </motion.div>
2182
+ <p className="text-slate-700 font-medium mb-1">Extracting data...</p>
2183
+ <p className="text-slate-400 text-sm">{statusMessage}</p>
2184
+
2185
+ <div className="mt-6 flex items-center justify-center gap-1">
2186
+ {[0, 1, 2].map((i) => (
2187
+ <motion.div
2188
+ key={i}
2189
+ animate={{ scale: [1, 1.2, 1] }}
2190
+ transition={{
2191
+ duration: 0.6,
2192
+ repeat: Infinity,
2193
+ delay: i * 0.2,
2194
+ }}
2195
+ className="h-2 w-2 rounded-full bg-indigo-400"
2196
+ />
2197
+ ))}
2198
+ </div>
2199
+ </div>
2200
+ </div>
2201
+ ) : isComplete && Object.keys(fields).length === 0 ? (
2202
+ <div className="h-full flex items-center justify-center p-6">
2203
+ <div className="text-center">
2204
+ <div className="h-20 w-20 mx-auto rounded-2xl bg-amber-100 flex items-center justify-center mb-4">
2205
+ <Code2 className="h-10 w-10 text-amber-600" />
2206
+ </div>
2207
+ <p className="text-slate-600 font-medium mb-1">No data extracted</p>
2208
+ <p className="text-slate-400 text-sm">The document may not contain extractable fields</p>
2209
+ </div>
2210
+ </div>
2211
+ ) : (
2212
+ <div className="p-4 font-mono text-sm">
2213
+ {activeTab === "text" ? (
2214
+ <div
2215
+ className="text-sm text-slate-700 leading-relaxed"
2216
+ style={{
2217
+ fontFamily: 'system-ui, -apple-system, sans-serif'
2218
+ }}
2219
+ >
2220
+ <div
2221
+ className="markdown-content"
2222
+ dangerouslySetInnerHTML={{ __html: renderMarkdownToHTML(fieldsToText(fields)) }}
2223
+ style={{
2224
+ lineHeight: '1.6'
2225
+ }}
2226
+ />
2227
+ <style>{`
2228
+ .markdown-content h1 {
2229
+ font-size: 1.5rem;
2230
+ font-weight: 700;
2231
+ color: #0f172a;
2232
+ margin-top: 1.5rem;
2233
+ margin-bottom: 1rem;
2234
+ line-height: 1.3;
2235
+ }
2236
+ .markdown-content h2 {
2237
+ font-size: 1.25rem;
2238
+ font-weight: 600;
2239
+ color: #0f172a;
2240
+ margin-top: 1.25rem;
2241
+ margin-bottom: 0.75rem;
2242
+ line-height: 1.3;
2243
+ }
2244
+ .markdown-content h3 {
2245
+ font-size: 1.125rem;
2246
+ font-weight: 600;
2247
+ color: #1e293b;
2248
+ margin-top: 1rem;
2249
+ margin-bottom: 0.5rem;
2250
+ line-height: 1.3;
2251
+ }
2252
+ .markdown-content p {
2253
+ margin-top: 0.75rem;
2254
+ margin-bottom: 0.75rem;
2255
+ color: #334155;
2256
+ }
2257
+ .markdown-content table {
2258
+ width: 100%;
2259
+ border-collapse: collapse;
2260
+ margin: 1.5rem 0;
2261
+ font-size: 0.875rem;
2262
+ box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1);
2263
+ }
2264
+ .markdown-content table caption {
2265
+ font-weight: 600;
2266
+ margin-bottom: 0.5rem;
2267
+ text-align: left;
2268
+ }
2269
+ .markdown-content table th {
2270
+ background-color: #f8fafc;
2271
+ border: 1px solid #cbd5e1;
2272
+ padding: 0.75rem;
2273
+ text-align: left;
2274
+ font-weight: 600;
2275
+ color: #0f172a;
2276
+ }
2277
+ .markdown-content table td {
2278
+ border: 1px solid #cbd5e1;
2279
+ padding: 0.75rem;
2280
+ color: #334155;
2281
+ }
2282
+ .markdown-content table tr:nth-child(even) {
2283
+ background-color: #f8fafc;
2284
+ }
2285
+ .markdown-content table tr:hover {
2286
+ background-color: #f1f5f9;
2287
+ }
2288
+ .markdown-content strong {
2289
+ font-weight: 600;
2290
+ color: #0f172a;
2291
+ }
2292
+ .markdown-content em {
2293
+ font-style: italic;
2294
+ }
2295
+ .markdown-content a {
2296
+ color: #4f46e5;
2297
+ text-decoration: underline;
2298
+ }
2299
+ .markdown-content a:hover {
2300
+ color: #4338ca;
2301
+ }
2302
+ .markdown-content sup {
2303
+ font-size: 0.75em;
2304
+ vertical-align: super;
2305
+ line-height: 0;
2306
+ position: relative;
2307
+ top: -0.5em;
2308
+ }
2309
+ .markdown-content sub {
2310
+ font-size: 0.75em;
2311
+ vertical-align: sub;
2312
+ line-height: 0;
2313
+ position: relative;
2314
+ bottom: -0.25em;
2315
+ }
2316
+ .markdown-content ul, .markdown-content ol {
2317
+ margin: 0.75rem 0;
2318
+ padding-left: 1.5rem;
2319
+ }
2320
+ .markdown-content li {
2321
+ margin: 0.25rem 0;
2322
+ }
2323
+ `}</style>
2324
+ </div>
2325
+ ) : activeTab === "json" ? (
2326
+ <div className="space-y-1">
2327
+ <span className="text-slate-400">{"{"}</span>
2328
+ {Object.keys(preparedFields).length > 0 ? (
2329
+ Object.entries(preparedFields).map(([key, value]) =>
2330
+ renderSection(key, value, 1)
2331
+ )
2332
+ ) : (
2333
+ <div className="pl-4 text-slate-400 italic">No fields extracted</div>
2334
+ )}
2335
+ <span className="text-slate-400">{"}"}</span>
2336
+ </div>
2337
+ ) : (
2338
+ <pre className="text-sm text-slate-600 whitespace-pre-wrap">
2339
+ {objectToXML(fields).split("\n").map((line, i) => (
2340
+ <div key={i} className="hover:bg-slate-50 px-2 -mx-2 rounded">
2341
+ {line.includes("<") ? (
2342
+ <>
2343
+ {line.split(/(<\/?[\w\s=".-]+>)/g).map((part, j) => {
2344
+ if (part.startsWith("</")) {
2345
+ return (
2346
+ <span key={j} className="text-rose-500">
2347
+ {part}
2348
+ </span>
2349
+ );
2350
+ }
2351
+ if (part.startsWith("<")) {
2352
+ return (
2353
+ <span key={j} className="text-indigo-500">
2354
+ {part}
2355
+ </span>
2356
+ );
2357
+ }
2358
+ return (
2359
+ <span key={j} className="text-slate-700">
2360
+ {part}
2361
+ </span>
2362
+ );
2363
+ })}
2364
+ </>
2365
+ ) : (
2366
+ line
2367
+ )}
2368
+ </div>
2369
+ ))}
2370
+ </pre>
2371
+ )}
2372
+ </div>
2373
+ )}
2374
+ </div>
2375
+
2376
+ {/* Confidence Footer */}
2377
+ {isComplete && extractionResult && (
2378
+ <div className="px-5 py-3 border-t border-slate-100 bg-slate-50/50">
2379
+ <div className="flex items-center justify-between text-xs">
2380
+ <div className="flex items-center gap-4">
2381
+ <div className="flex items-center gap-1.5">
2382
+ <div className={cn(
2383
+ "h-2 w-2 rounded-full",
2384
+ confidence >= 90 ? "bg-emerald-500" : confidence >= 70 ? "bg-amber-500" : "bg-red-500"
2385
+ )} />
2386
+ <span className="text-slate-500">Confidence:</span>
2387
+ <span className="font-semibold text-slate-700">
2388
+ {confidence > 0 ? `${confidence.toFixed(1)}%` : "N/A"}
2389
+ </span>
2390
+ </div>
2391
+ <div className="flex items-center gap-1.5">
2392
+ <span className="text-slate-500">Fields:</span>
2393
+ <span className="font-semibold text-slate-700">{fieldsExtracted}</span>
2394
+ </div>
2395
+ </div>
2396
+ <span className="text-slate-400">
2397
+ Processed in {totalTime >= 1000 ? `${(totalTime / 1000).toFixed(1)}s` : `${totalTime}ms`}
2398
+ </span>
2399
+ </div>
2400
+ </div>
2401
+ )}
2402
+ </div>
2403
+ );
2404
+ }
2405
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/components/ocr/ProcessingStatus.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import React from "react";
2
  import { motion } from "framer-motion";
3
  import {
@@ -116,3 +117,123 @@ export default function ProcessingStatus({ isProcessing, isComplete, currentStag
116
  </motion.div>
117
  );
118
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import React from "react";
3
  import { motion } from "framer-motion";
4
  import {
 
117
  </motion.div>
118
  );
119
  }
120
+ =======
121
+ import React from "react";
122
+ import { motion } from "framer-motion";
123
+ import {
124
+ FileSearch,
125
+ Cpu,
126
+ TableProperties,
127
+ CheckCircle2,
128
+ Loader2,
129
+ } from "lucide-react";
130
+ import { cn } from "@/lib/utils";
131
+
132
+ const steps = [
133
+ { id: "upload", label: "Received", icon: FileSearch },
134
+ { id: "analyze", label: "Analysis", icon: Cpu },
135
+ { id: "extract", label: "Extraction", icon: TableProperties },
136
+ { id: "complete", label: "Done", icon: CheckCircle2 },
137
+ ];
138
+
139
+ export default function ProcessingStatus({ isProcessing, isComplete, currentStage }) {
140
+ const getCurrentStep = () => {
141
+ if (isComplete) return 4; // Done
142
+ if (!isProcessing) return 0; // Not started
143
+
144
+ // Use provided currentStage or default based on isProcessing
145
+ if (currentStage === "extraction") return 3; // Extraction
146
+ if (currentStage === "analysis") return 2; // Analysis
147
+ if (currentStage === "received") return 1; // Received
148
+
149
+ // Default: if processing, start at Analysis
150
+ return 2; // Analysis
151
+ };
152
+
153
+ const currentStep = getCurrentStep();
154
+
155
+ if (!isProcessing && !isComplete) return null;
156
+
157
+ return (
158
+ <motion.div
159
+ initial={{ opacity: 0, y: -10 }}
160
+ animate={{ opacity: 1, y: 0 }}
161
+ className="bg-white rounded-xl border border-slate-200 px-4 py-3"
162
+ >
163
+ <div className="flex items-center justify-between gap-2">
164
+ {steps.map((step, index) => {
165
+ const isActive = index + 1 === currentStep;
166
+ const isCompleted = index + 1 < currentStep || isComplete;
167
+ const Icon = step.icon;
168
+
169
+ return (
170
+ <React.Fragment key={step.id}>
171
+ <div className="flex items-center gap-2">
172
+ <motion.div
173
+ initial={false}
174
+ animate={{
175
+ scale: (isActive && !isComplete) ? 1.05 : 1,
176
+ backgroundColor: isCompleted
177
+ ? "rgb(16 185 129)"
178
+ : (isActive && !isComplete)
179
+ ? "rgb(99 102 241)"
180
+ : "rgb(241 245 249)",
181
+ }}
182
+ className={cn(
183
+ "h-8 w-8 rounded-lg flex items-center justify-center transition-colors",
184
+ (isCompleted || isActive) && "shadow-md"
185
+ )}
186
+ style={{
187
+ boxShadow: (isActive && !isComplete)
188
+ ? "0 4px 8px -2px rgba(99, 102, 241, 0.3)"
189
+ : isCompleted
190
+ ? "0 4px 8px -2px rgba(16, 185, 129, 0.3)"
191
+ : "none",
192
+ }}
193
+ >
194
+ {(isActive && !isComplete) ? (
195
+ <motion.div
196
+ animate={{ rotate: 360 }}
197
+ transition={{ duration: 1.5, repeat: Infinity, ease: "linear" }}
198
+ >
199
+ <Loader2 className="h-4 w-4 text-white" />
200
+ </motion.div>
201
+ ) : isCompleted ? (
202
+ <CheckCircle2 className="h-4 w-4 text-white" />
203
+ ) : (
204
+ <Icon className={cn("h-4 w-4 text-slate-400")} />
205
+ )}
206
+ </motion.div>
207
+ <span
208
+ className={cn(
209
+ "text-xs font-medium hidden sm:inline",
210
+ isActive ? "text-indigo-600" : isCompleted ? "text-emerald-600" : "text-slate-400"
211
+ )}
212
+ >
213
+ {step.label}
214
+ </span>
215
+ </div>
216
+
217
+ {index < steps.length - 1 && (
218
+ <div className="flex-1 h-0.5 mx-1 relative overflow-hidden rounded-full bg-slate-100">
219
+ <motion.div
220
+ initial={{ width: 0 }}
221
+ animate={{
222
+ width: isCompleted ? "100%" : isActive ? "50%" : "0%",
223
+ }}
224
+ transition={{ duration: 0.5 }}
225
+ className={cn(
226
+ "absolute inset-y-0 left-0",
227
+ isCompleted ? "bg-emerald-500" : "bg-indigo-500"
228
+ )}
229
+ />
230
+ </div>
231
+ )}
232
+ </React.Fragment>
233
+ );
234
+ })}
235
+ </div>
236
+ </motion.div>
237
+ );
238
+ }
239
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/components/ocr/UpgradeModal.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import React from "react";
2
  import { motion } from "framer-motion";
3
  import { cn } from "@/lib/utils";
@@ -211,3 +212,218 @@ export default function UpgradeModal({ open, onClose }) {
211
  );
212
  }
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import React from "react";
3
  import { motion } from "framer-motion";
4
  import { cn } from "@/lib/utils";
 
212
  );
213
  }
214
 
215
+ =======
216
+ import React from "react";
217
+ import { motion } from "framer-motion";
218
+ import { cn } from "@/lib/utils";
219
+ import {
220
+ X,
221
+ Sparkles,
222
+ Zap,
223
+ Shield,
224
+ Cloud,
225
+ BarChart3,
226
+ Bot,
227
+ Globe,
228
+ Lock,
229
+ Rocket,
230
+ Users,
231
+ CheckCircle2,
232
+ ArrowRight
233
+ } from "lucide-react";
234
+ import { Button } from "@/components/ui/button";
235
+
236
+ const features = [
237
+ {
238
+ icon: Zap,
239
+ title: "Production-Scale Processing",
240
+ description: "Remove trial limits and run live AP and operations workflows",
241
+ color: "amber",
242
+ cta: "Explore with a demo",
243
+ gradient: "from-amber-500 to-orange-500"
244
+ },
245
+ {
246
+ icon: Bot,
247
+ title: "Advanced Agentic Processing",
248
+ description: "You can customize your own agentic pipeline with your own data",
249
+ color: "indigo",
250
+ cta: "Talk to Sales",
251
+ gradient: "from-indigo-500 to-violet-500"
252
+ },
253
+ {
254
+ icon: Cloud,
255
+ title: "API Access",
256
+ description: "Integrate EZOFIS into your workflow with our REST API",
257
+ color: "blue",
258
+ cta: "Talk to a Techie!",
259
+ gradient: "from-blue-500 to-cyan-500"
260
+ }
261
+ ];
262
+
263
+ export default function UpgradeModal({ open, onClose }) {
264
+ if (!open) return null;
265
+
266
+ return (
267
+ <div className="fixed inset-0 z-50 flex items-center justify-center">
268
+ {/* Backdrop */}
269
+ <motion.div
270
+ initial={{ opacity: 0 }}
271
+ animate={{ opacity: 1 }}
272
+ exit={{ opacity: 0 }}
273
+ className="absolute inset-0 bg-black/50 backdrop-blur-sm"
274
+ onClick={onClose}
275
+ />
276
+
277
+ {/* Modal */}
278
+ <motion.div
279
+ initial={{ opacity: 0, scale: 0.95, y: 20 }}
280
+ animate={{ opacity: 1, scale: 1, y: 0 }}
281
+ exit={{ opacity: 0, scale: 0.95, y: 20 }}
282
+ className="relative z-10 w-full max-w-6xl max-h-[90vh] mx-4 bg-white rounded-2xl shadow-2xl overflow-hidden flex flex-col"
283
+ onClick={(e) => e.stopPropagation()}
284
+ >
285
+ {/* Header */}
286
+ <div className="sticky top-0 bg-gradient-to-r from-indigo-600 via-violet-600 to-purple-600 text-white px-8 py-6 z-10">
287
+ <button
288
+ onClick={onClose}
289
+ className="absolute right-6 top-6 h-8 w-8 rounded-lg bg-white/10 hover:bg-white/20 flex items-center justify-center transition-colors"
290
+ >
291
+ <X className="h-4 w-4" />
292
+ </button>
293
+
294
+ <motion.div
295
+ initial={{ opacity: 0, y: 20 }}
296
+ animate={{ opacity: 1, y: 0 }}
297
+ className="text-center"
298
+ >
299
+ <div className="inline-flex items-center gap-2 px-4 py-1.5 rounded-full bg-white/10 backdrop-blur-sm mb-4">
300
+ <Sparkles className="h-4 w-4" />
301
+ <span className="text-sm font-medium">Trial Limit Reached</span>
302
+ </div>
303
+ <h2 className="text-3xl font-bold mb-2">You've processed 2 documents</h2>
304
+ <p className="text-white/80 text-lg">Continue with production-ready document intelligence</p>
305
+ </motion.div>
306
+ </div>
307
+
308
+ {/* Stats Bar */}
309
+ <div className="grid grid-cols-3 gap-6 px-8 py-6 bg-slate-50 border-b border-slate-200">
310
+ {[
311
+ { label: "Accuracy Rate", value: "99.8%", icon: CheckCircle2 },
312
+ { label: "Processing Speed", value: "< 10s", icon: Zap },
313
+ { label: "Operational Users", value: "10,000+", icon: Users }
314
+ ].map((stat, i) => (
315
+ <motion.div
316
+ key={stat.label}
317
+ initial={{ opacity: 0, y: 20 }}
318
+ animate={{ opacity: 1, y: 0 }}
319
+ transition={{ delay: i * 0.1 }}
320
+ className="text-center"
321
+ >
322
+ <div className="flex items-center justify-center gap-2 mb-1">
323
+ <stat.icon className="h-4 w-4 text-indigo-600" />
324
+ <span className="text-2xl font-bold text-slate-900">{stat.value}</span>
325
+ </div>
326
+ <p className="text-sm text-slate-500">{stat.label}</p>
327
+ </motion.div>
328
+ ))}
329
+ </div>
330
+
331
+ {/* Features Grid - Scrollable */}
332
+ <div className="flex-1 overflow-auto px-8 py-8">
333
+ <div className="text-center mb-8">
334
+ <h3 className="text-2xl font-bold text-slate-900 mb-2">
335
+ Continue to Production Use
336
+ </h3>
337
+
338
+ </div>
339
+
340
+ <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
341
+ {features.map((feature, index) => (
342
+ <motion.div
343
+ key={feature.title}
344
+ initial={{ opacity: 0, y: 20 }}
345
+ animate={{ opacity: 1, y: 0 }}
346
+ transition={{ delay: 0.2 + index * 0.1 }}
347
+ className="group relative bg-white rounded-2xl border border-slate-200 p-6 hover:shadow-xl hover:shadow-slate-200/50 transition-all duration-300 hover:-translate-y-1 overflow-hidden"
348
+ >
349
+ {/* Gradient Background on Hover */}
350
+ <div className={`absolute inset-0 bg-gradient-to-br ${feature.gradient} opacity-0 group-hover:opacity-5 transition-opacity duration-300`} />
351
+
352
+ <div className="relative">
353
+ <div className={cn(
354
+ "h-12 w-12 rounded-xl flex items-center justify-center mb-4 group-hover:scale-110 transition-transform duration-300",
355
+ feature.color === "amber" && "bg-amber-50",
356
+ feature.color === "indigo" && "bg-indigo-50",
357
+ feature.color === "blue" && "bg-blue-50",
358
+ feature.color === "emerald" && "bg-emerald-50",
359
+ feature.color === "slate" && "bg-slate-50",
360
+ feature.color === "purple" && "bg-purple-50"
361
+ )}>
362
+ <feature.icon className={cn(
363
+ "h-6 w-6",
364
+ feature.color === "amber" && "text-amber-600",
365
+ feature.color === "indigo" && "text-indigo-600",
366
+ feature.color === "blue" && "text-blue-600",
367
+ feature.color === "emerald" && "text-emerald-600",
368
+ feature.color === "slate" && "text-slate-600",
369
+ feature.color === "purple" && "text-purple-600"
370
+ )} />
371
+ </div>
372
+ <h4 className="font-semibold text-slate-900 mb-2">{feature.title}</h4>
373
+ <p className="text-sm text-slate-600 mb-4 leading-relaxed">{feature.description}</p>
374
+
375
+ <Button
376
+ variant="ghost"
377
+ size="sm"
378
+ className={cn(
379
+ "w-full h-9 border transition-all group-hover:shadow-md",
380
+ feature.color === "amber" && "text-amber-600 hover:bg-amber-50 border-amber-200 hover:border-amber-300",
381
+ feature.color === "indigo" && "text-indigo-600 hover:bg-indigo-50 border-indigo-200 hover:border-indigo-300",
382
+ feature.color === "blue" && "text-blue-600 hover:bg-blue-50 border-blue-200 hover:border-blue-300",
383
+ feature.color === "emerald" && "text-emerald-600 hover:bg-emerald-50 border-emerald-200 hover:border-emerald-300",
384
+ feature.color === "slate" && "text-slate-600 hover:bg-slate-50 border-slate-200 hover:border-slate-300",
385
+ feature.color === "purple" && "text-purple-600 hover:bg-purple-50 border-purple-200 hover:border-purple-300"
386
+ )}
387
+ >
388
+ {feature.cta}
389
+ <ArrowRight className="h-3.5 w-3.5 ml-2 group-hover:translate-x-1 transition-transform" />
390
+ </Button>
391
+ </div>
392
+ </motion.div>
393
+ ))}
394
+ </div>
395
+ </div>
396
+
397
+ {/* CTA Footer */}
398
+ <div className="sticky bottom-0 bg-white border-t border-slate-200 px-8 py-6">
399
+ <div className="flex items-center justify-between gap-6">
400
+ <div className="flex-1">
401
+ <h4 className="font-semibold text-slate-900 mb-1">Ready to scale?</h4>
402
+ <p className="text-sm text-slate-600">No commitment. We’ll tailor the demo to your documents and workflows.</p>
403
+ </div>
404
+ <div className="flex items-center gap-3">
405
+ <Button
406
+ variant="outline"
407
+ size="lg"
408
+ className="h-11 border-slate-300"
409
+ >
410
+ <Users className="h-4 w-4 mr-2" />
411
+ Talk to Sales
412
+ </Button>
413
+ <Button
414
+ size="lg"
415
+ className="h-11 bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700 shadow-lg shadow-indigo-500/25 hover:shadow-xl hover:shadow-indigo-500/30"
416
+ >
417
+ <Rocket className="h-4 w-4 mr-2" />
418
+ Start a production evaluation
419
+ <Sparkles className="h-4 w-4 ml-2" />
420
+ </Button>
421
+ </div>
422
+ </div>
423
+ </div>
424
+ </motion.div>
425
+ </div>
426
+ );
427
+ }
428
+
429
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/components/ocr/UploadZone.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import React, { useState, useEffect } from "react";
2
  import { motion, AnimatePresence } from "framer-motion";
3
  import { Upload, FileText, Image, FileSpreadsheet, X, Sparkles, AlertCircle } from "lucide-react";
@@ -249,3 +250,256 @@ export default function UploadZone({ onFileSelect, selectedFile, onClear, keyFie
249
  </div>
250
  );
251
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import React, { useState, useEffect } from "react";
3
  import { motion, AnimatePresence } from "framer-motion";
4
  import { Upload, FileText, Image, FileSpreadsheet, X, Sparkles, AlertCircle } from "lucide-react";
 
250
  </div>
251
  );
252
  }
253
+ =======
254
+ import React, { useState, useEffect } from "react";
255
+ import { motion, AnimatePresence } from "framer-motion";
256
+ import { Upload, FileText, Image, FileSpreadsheet, X, Sparkles, AlertCircle } from "lucide-react";
257
+ import { cn } from "@/lib/utils";
258
+ import { Input } from "@/components/ui/input";
259
+
260
+ // Allowed file types
261
+ const ALLOWED_TYPES = [
262
+ "application/pdf",
263
+ "image/png",
264
+ "image/jpeg",
265
+ "image/jpg",
266
+ "image/tiff",
267
+ "image/tif"
268
+ ];
269
+
270
+ // Allowed file extensions (for fallback validation)
271
+ const ALLOWED_EXTENSIONS = [".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"];
272
+
273
+ // Maximum file size: 4 MB
274
+ const MAX_FILE_SIZE = 4 * 1024 * 1024; // 4 MB in bytes
275
+
276
+ export default function UploadZone({ onFileSelect, selectedFile, onClear, keyFields = "", onKeyFieldsChange = () => {} }) {
277
+ const [isDragging, setIsDragging] = useState(false);
278
+ const [error, setError] = useState(null);
279
+
280
+ const validateFile = (file) => {
281
+ // Reset error
282
+ setError(null);
283
+
284
+ // Check file type
285
+ const fileExtension = "." + file.name.split(".").pop().toLowerCase();
286
+ const isValidType = ALLOWED_TYPES.includes(file.type) || ALLOWED_EXTENSIONS.includes(fileExtension);
287
+
288
+ if (!isValidType) {
289
+ setError("Only PDF, PNG, JPG, and TIFF files are allowed.");
290
+ return false;
291
+ }
292
+
293
+ // Check file size
294
+ if (file.size > MAX_FILE_SIZE) {
295
+ const fileSizeMB = (file.size / 1024 / 1024).toFixed(2);
296
+ setError(`File size exceeds 4 MB limit. Your file is ${fileSizeMB} MB.`);
297
+ return false;
298
+ }
299
+
300
+ return true;
301
+ };
302
+
303
+ const handleFileSelect = (file) => {
304
+ if (validateFile(file)) {
305
+ setError(null);
306
+ onFileSelect(file);
307
+ }
308
+ };
309
+
310
+ const handleDragOver = (e) => {
311
+ e.preventDefault();
312
+ setIsDragging(true);
313
+ };
314
+
315
+ const handleDragLeave = () => {
316
+ setIsDragging(false);
317
+ };
318
+
319
+ const handleDrop = (e) => {
320
+ e.preventDefault();
321
+ setIsDragging(false);
322
+ const file = e.dataTransfer.files[0];
323
+ if (file) {
324
+ handleFileSelect(file);
325
+ }
326
+ };
327
+
328
+ const getFileIcon = (type) => {
329
+ if (type?.includes("image")) return Image;
330
+ if (type?.includes("spreadsheet") || type?.includes("excel")) return FileSpreadsheet;
331
+ return FileText;
332
+ };
333
+
334
+ const FileIcon = selectedFile ? getFileIcon(selectedFile.type) : FileText;
335
+
336
+ // Clear error when file is cleared
337
+ useEffect(() => {
338
+ if (!selectedFile) {
339
+ setError(null);
340
+ }
341
+ }, [selectedFile]);
342
+
343
+ return (
344
+ <div className="w-full">
345
+ <AnimatePresence mode="wait">
346
+ {!selectedFile ? (
347
+ <motion.div
348
+ key="upload"
349
+ initial={{ opacity: 0, y: 10 }}
350
+ animate={{ opacity: 1, y: 0 }}
351
+ exit={{ opacity: 0, y: -10 }}
352
+ transition={{ duration: 0.2 }}
353
+ onDragOver={handleDragOver}
354
+ onDragLeave={handleDragLeave}
355
+ onDrop={handleDrop}
356
+ className={cn(
357
+ "relative group cursor-pointer",
358
+ "border-2 border-dashed rounded-2xl",
359
+ "transition-all duration-300 ease-out",
360
+ isDragging
361
+ ? "border-indigo-400 bg-indigo-50/50"
362
+ : "border-slate-200 hover:border-indigo-300 hover:bg-slate-50/50"
363
+ )}
364
+ >
365
+ <label className="flex flex-col items-center justify-center py-16 px-8 cursor-pointer">
366
+ <motion.div
367
+ animate={isDragging ? { scale: 1.1, y: -5 } : { scale: 1, y: 0 }}
368
+ className={cn(
369
+ "h-16 w-16 rounded-2xl flex items-center justify-center mb-6 transition-colors duration-300",
370
+ isDragging
371
+ ? "bg-indigo-100"
372
+ : "bg-gradient-to-br from-slate-100 to-slate-50 group-hover:from-indigo-100 group-hover:to-violet-50"
373
+ )}
374
+ >
375
+ <Upload
376
+ className={cn(
377
+ "h-7 w-7 transition-colors duration-300",
378
+ isDragging ? "text-indigo-600" : "text-slate-400 group-hover:text-indigo-500"
379
+ )}
380
+ />
381
+ </motion.div>
382
+
383
+ <div className="text-center">
384
+ <p className="text-lg font-semibold text-slate-700 mb-1">
385
+ {isDragging ? "Drop your file here" : "Drop your file here, or browse"}
386
+ </p>
387
+ <p className="text-sm text-slate-400">
388
+ Supports PDF, PNG, JPG, TIFF up to 4MB
389
+ </p>
390
+ </div>
391
+
392
+ <div className="flex items-center gap-2 mt-6">
393
+ <div className="flex -space-x-1">
394
+ {[
395
+ "bg-red-100 text-red-600",
396
+ "bg-blue-100 text-blue-600",
397
+ "bg-green-100 text-green-600",
398
+ "bg-amber-100 text-amber-600",
399
+ ].map((color, i) => (
400
+ <div
401
+ key={i}
402
+ className={`h-8 w-8 rounded-lg ${color.split(" ")[0]} flex items-center justify-center border-2 border-white`}
403
+ >
404
+ <FileText className={`h-4 w-4 ${color.split(" ")[1]}`} />
405
+ </div>
406
+ ))}
407
+ </div>
408
+ <span className="text-xs text-slate-400 ml-2">Multiple formats supported</span>
409
+ </div>
410
+
411
+ <input
412
+ type="file"
413
+ className="hidden"
414
+ accept=".pdf,.png,.jpg,.jpeg,.tiff,.tif"
415
+ onChange={(e) => {
416
+ const file = e.target.files[0];
417
+ if (file) {
418
+ handleFileSelect(file);
419
+ }
420
+ // Reset input so same file can be selected again after error
421
+ e.target.value = "";
422
+ }}
423
+ />
424
+ </label>
425
+
426
+ {/* Decorative gradient border on hover */}
427
+ <div className="absolute inset-0 -z-10 rounded-2xl bg-gradient-to-r from-indigo-500 via-violet-500 to-purple-500 opacity-0 group-hover:opacity-10 blur-xl transition-opacity duration-500" />
428
+ </motion.div>
429
+ ) : (
430
+ <motion.div
431
+ key="selected"
432
+ initial={{ opacity: 0, scale: 0.95 }}
433
+ animate={{ opacity: 1, scale: 1 }}
434
+ exit={{ opacity: 0, scale: 0.95 }}
435
+ className="grid grid-cols-1 lg:grid-cols-2 gap-3"
436
+ >
437
+ {/* File Info Box */}
438
+ <div className="relative bg-gradient-to-br from-indigo-50 to-violet-50 rounded-xl p-3 border border-indigo-100">
439
+ <div className="flex items-center gap-3">
440
+ <div className="h-10 w-10 rounded-lg bg-white shadow-sm flex items-center justify-center flex-shrink-0">
441
+ <FileIcon className="h-5 w-5 text-indigo-600" />
442
+ </div>
443
+ <div className="flex-1 min-w-0">
444
+ <p className="font-medium text-slate-800 truncate text-sm">{selectedFile.name}</p>
445
+ <div className="flex items-center gap-2 text-xs text-slate-500">
446
+ <span>{(selectedFile.size / 1024 / 1024).toFixed(2)} MB</span>
447
+ <span className="text-indigo-500">•</span>
448
+ <span className="text-indigo-600 flex items-center gap-1">
449
+ <Sparkles className="h-3 w-3" />
450
+ Ready for extraction
451
+ </span>
452
+ </div>
453
+ </div>
454
+ <button
455
+ onClick={onClear}
456
+ className="h-8 w-8 rounded-lg bg-white hover:bg-red-50 border border-slate-200 hover:border-red-200 flex items-center justify-center text-slate-400 hover:text-red-500 transition-colors"
457
+ >
458
+ <X className="h-4 w-4" />
459
+ </button>
460
+ </div>
461
+ </div>
462
+
463
+ {/* Key Fields Box */}
464
+ <div className="relative bg-white rounded-xl p-3 border border-slate-200">
465
+ <label className="block text-xs font-medium text-slate-600 mb-1.5">
466
+ <span className="font-bold">Key Fields</span> <span className="font-normal">(if required)</span>
467
+ </label>
468
+ <Input
469
+ type="text"
470
+ value={keyFields || ""}
471
+ onChange={(e) => {
472
+ if (onKeyFieldsChange) {
473
+ onKeyFieldsChange(e.target.value);
474
+ }
475
+ }}
476
+ placeholder="Invoice Number, Invoice Date, PO Number, Supplier Name, Total Amount, Payment terms, Additional Notes"
477
+ className="h-8 text-xs border-slate-200 focus:border-indigo-300 focus:ring-indigo-200"
478
+ />
479
+ </div>
480
+ </motion.div>
481
+ )}
482
+ </AnimatePresence>
483
+
484
+ {/* Error Message */}
485
+ {error && (
486
+ <motion.div
487
+ initial={{ opacity: 0, y: -10 }}
488
+ animate={{ opacity: 1, y: 0 }}
489
+ exit={{ opacity: 0, y: -10 }}
490
+ className="mt-3 p-3 bg-red-50 border border-red-200 rounded-xl flex items-start gap-2"
491
+ >
492
+ <AlertCircle className="h-4 w-4 text-red-600 flex-shrink-0 mt-0.5" />
493
+ <p className="text-sm text-red-700 flex-1">{error}</p>
494
+ <button
495
+ onClick={() => setError(null)}
496
+ className="text-red-600 hover:text-red-800 transition-colors"
497
+ >
498
+ <X className="h-4 w-4" />
499
+ </button>
500
+ </motion.div>
501
+ )}
502
+ </div>
503
+ );
504
+ }
505
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/components/ui/separator.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import React from "react";
2
  import { cn } from "@/lib/utils";
3
 
@@ -14,3 +15,21 @@ export function Separator({ className, orientation = "horizontal", ...props }) {
14
  );
15
  }
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import React from "react";
3
  import { cn } from "@/lib/utils";
4
 
 
15
  );
16
  }
17
 
18
+ =======
19
+ import React from "react";
20
+ import { cn } from "@/lib/utils";
21
+
22
+ export function Separator({ className, orientation = "horizontal", ...props }) {
23
+ return (
24
+ <div
25
+ className={cn(
26
+ "shrink-0 bg-slate-200",
27
+ orientation === "horizontal" ? "h-px w-full" : "h-full w-px",
28
+ className
29
+ )}
30
+ {...props}
31
+ />
32
+ );
33
+ }
34
+
35
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/config/firebase.js CHANGED
@@ -1,3 +1,4 @@
 
1
  /**
2
  * Firebase configuration and initialization
3
  */
@@ -28,3 +29,35 @@ googleProvider.setCustomParameters({
28
 
29
  export default app;
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  /**
3
  * Firebase configuration and initialization
4
  */
 
29
 
30
  export default app;
31
 
32
+ =======
33
+ /**
34
+ * Firebase configuration and initialization
35
+ */
36
+ import { initializeApp } from 'firebase/app';
37
+ import { getAuth, GoogleAuthProvider } from 'firebase/auth';
38
+
39
+ // Firebase configuration from environment variables
40
+ const firebaseConfig = {
41
+ apiKey: import.meta.env.VITE_FIREBASE_API_KEY,
42
+ authDomain: import.meta.env.VITE_FIREBASE_AUTH_DOMAIN,
43
+ projectId: import.meta.env.VITE_FIREBASE_PROJECT_ID,
44
+ storageBucket: import.meta.env.VITE_FIREBASE_STORAGE_BUCKET,
45
+ messagingSenderId: import.meta.env.VITE_FIREBASE_MESSAGING_SENDER_ID,
46
+ appId: import.meta.env.VITE_FIREBASE_APP_ID,
47
+ };
48
+
49
+ // Initialize Firebase
50
+ const app = initializeApp(firebaseConfig);
51
+
52
+ // Initialize Firebase Authentication and get a reference to the service
53
+ export const auth = getAuth(app);
54
+
55
+ // Configure Google Auth Provider
56
+ export const googleProvider = new GoogleAuthProvider();
57
+ googleProvider.setCustomParameters({
58
+ prompt: 'select_account'
59
+ });
60
+
61
+ export default app;
62
+
63
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/contexts/AuthContext.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import React, { createContext, useContext, useState, useEffect } from "react";
2
  import { signInWithPopup, signOut as firebaseSignOut } from "firebase/auth";
3
  import { auth, googleProvider } from "@/config/firebase";
@@ -113,3 +114,120 @@ export function useAuth() {
113
  return context;
114
  }
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import React, { createContext, useContext, useState, useEffect } from "react";
3
  import { signInWithPopup, signOut as firebaseSignOut } from "firebase/auth";
4
  import { auth, googleProvider } from "@/config/firebase";
 
114
  return context;
115
  }
116
 
117
+ =======
118
+ import React, { createContext, useContext, useState, useEffect } from "react";
119
+ import { signInWithPopup, signOut as firebaseSignOut } from "firebase/auth";
120
+ import { auth, googleProvider } from "@/config/firebase";
121
+ import { getCurrentUser, firebaseLogin, requestOTP, verifyOTP, logout as apiLogout } from "@/services/auth";
122
+
123
+ const AuthContext = createContext(null);
124
+
125
+ export function AuthProvider({ children }) {
126
+ const [user, setUser] = useState(null);
127
+ const [loading, setLoading] = useState(true);
128
+ const [token, setToken] = useState(localStorage.getItem("auth_token"));
129
+
130
+ useEffect(() => {
131
+ // Check if user is already authenticated
132
+ if (token) {
133
+ checkAuth();
134
+ } else {
135
+ setLoading(false);
136
+ }
137
+ }, [token]);
138
+
139
+ const checkAuth = async () => {
140
+ try {
141
+ const userData = await getCurrentUser();
142
+ setUser(userData);
143
+ } catch (error) {
144
+ // Token is invalid, clear it
145
+ localStorage.removeItem("auth_token");
146
+ setToken(null);
147
+ setUser(null);
148
+ } finally {
149
+ setLoading(false);
150
+ }
151
+ };
152
+
153
+ const handleFirebaseLogin = async () => {
154
+ try {
155
+ const result = await signInWithPopup(auth, googleProvider);
156
+ const idToken = await result.user.getIdToken();
157
+ const response = await firebaseLogin(idToken);
158
+ handleAuthCallback(response.token);
159
+ } catch (error) {
160
+ if (error.code === 'auth/popup-closed' || error.code === 'auth/cancelled-popup-request') {
161
+ // User closed popup or cancelled - don't show error
162
+ return;
163
+ }
164
+ console.error("Firebase login error:", error);
165
+ throw new Error(error.message || "Firebase authentication failed");
166
+ }
167
+ };
168
+
169
+ const handleOTPRequest = async (email) => {
170
+ try {
171
+ await requestOTP(email);
172
+ } catch (error) {
173
+ console.error("OTP request error:", error);
174
+ throw error;
175
+ }
176
+ };
177
+
178
+ const handleOTPVerify = async (email, otp) => {
179
+ try {
180
+ const response = await verifyOTP(email, otp);
181
+ handleAuthCallback(response.token);
182
+ } catch (error) {
183
+ console.error("OTP verify error:", error);
184
+ throw error;
185
+ }
186
+ };
187
+
188
+ const handleLogout = async () => {
189
+ try {
190
+ // Sign out from Firebase if user was using Firebase auth
191
+ if (auth.currentUser) {
192
+ await firebaseSignOut(auth);
193
+ }
194
+ await apiLogout();
195
+ } catch (error) {
196
+ console.error("Logout error:", error);
197
+ } finally {
198
+ localStorage.removeItem("auth_token");
199
+ setToken(null);
200
+ setUser(null);
201
+ }
202
+ };
203
+
204
+ const handleAuthCallback = (newToken) => {
205
+ localStorage.setItem("auth_token", newToken);
206
+ setToken(newToken);
207
+ checkAuth();
208
+ };
209
+
210
+ const value = {
211
+ user,
212
+ token,
213
+ loading,
214
+ firebaseLogin: handleFirebaseLogin,
215
+ requestOTP: handleOTPRequest,
216
+ verifyOTP: handleOTPVerify,
217
+ logout: handleLogout,
218
+ handleAuthCallback,
219
+ isAuthenticated: !!user,
220
+ };
221
+
222
+ return <AuthContext.Provider value={value}>{children}</AuthContext.Provider>;
223
+ }
224
+
225
+ export function useAuth() {
226
+ const context = useContext(AuthContext);
227
+ if (!context) {
228
+ throw new Error("useAuth must be used within an AuthProvider");
229
+ }
230
+ return context;
231
+ }
232
+
233
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/pages/Dashboard.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  // frontend/src/pages/Dashboard.jsx
2
 
3
  import React, { useState, useEffect } from "react";
@@ -474,3 +475,481 @@ export default function Dashboard() {
474
  </div>
475
  );
476
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  // frontend/src/pages/Dashboard.jsx
3
 
4
  import React, { useState, useEffect } from "react";
 
475
  </div>
476
  );
477
  }
478
+ =======
479
+ // frontend/src/pages/Dashboard.jsx
480
+
481
+ import React, { useState, useEffect } from "react";
482
+ import { useSearchParams } from "react-router-dom";
483
+ import { motion } from "framer-motion";
484
+ import { Sparkles, Zap, FileText, TrendingUp, Clock, AlertCircle } from "lucide-react";
485
+ import { Button } from "@/components/ui/button";
486
+ import UploadZone from "@/components/ocr/UploadZone";
487
+ import DocumentPreview from "@/components/ocr/DocumentPreview";
488
+ import ExtractionOutput from "@/components/ocr/ExtractionOutput";
489
+ import ExportButtons from "@/components/ExportButtons";
490
+ import ProcessingStatus from "@/components/ocr/ProcessingStatus";
491
+ import UpgradeModal from "@/components/ocr/UpgradeModal";
492
+ import { extractDocument, getHistory, getExtractionById } from "@/services/api";
493
+
494
+ export default function Dashboard() {
495
+ const [searchParams, setSearchParams] = useSearchParams();
496
+ const [selectedFile, setSelectedFile] = useState(null);
497
+ const [keyFields, setKeyFields] = useState("");
498
+ const [isProcessing, setIsProcessing] = useState(false);
499
+ const [isComplete, setIsComplete] = useState(false);
500
+ const [extractionResult, setExtractionResult] = useState(null);
501
+ const [error, setError] = useState(null);
502
+ const [processingStage, setProcessingStage] = useState("received"); // received, analysis, extraction, done
503
+ const [stats, setStats] = useState({ totalExtracted: 0, averageAccuracy: 0 });
504
+ const [isLoadingFromHistory, setIsLoadingFromHistory] = useState(false);
505
+ const [showUpgradeModal, setShowUpgradeModal] = useState(false);
506
+
507
+ const TRIAL_LIMIT = 2; // Maximum number of extractions allowed in trial
508
+
509
+ const handleFileSelect = (file) => {
510
+ // Check if user has reached trial limit
511
+ if (stats.totalExtracted >= TRIAL_LIMIT) {
512
+ setShowUpgradeModal(true);
513
+ return;
514
+ }
515
+ setSelectedFile(file);
516
+ setIsComplete(false);
517
+ setExtractionResult(null);
518
+ setError(null);
519
+ };
520
+
521
+ const handleClear = () => {
522
+ setSelectedFile(null);
523
+ setKeyFields("");
524
+ setIsProcessing(false);
525
+ setIsComplete(false);
526
+ setExtractionResult(null);
527
+ setError(null);
528
+ setProcessingStage("received");
529
+ };
530
+
531
+ // Load extraction from history if extractionId is in URL
532
+ useEffect(() => {
533
+ const extractionId = searchParams.get("extractionId");
534
+ console.log("Dashboard useEffect - extractionId:", extractionId, "isLoadingFromHistory:", isLoadingFromHistory, "extractionResult:", extractionResult);
535
+
536
+ if (extractionId && !isLoadingFromHistory) {
537
+ // Only load if we don't already have this extraction loaded
538
+ const currentExtractionId = extractionResult?.id;
539
+ if (currentExtractionId && currentExtractionId === parseInt(extractionId)) {
540
+ console.log("Extraction already loaded, skipping");
541
+ return;
542
+ }
543
+
544
+ const loadExtractionFromHistory = async () => {
545
+ setIsLoadingFromHistory(true);
546
+ setError(null);
547
+ try {
548
+ console.log("Loading extraction from history, ID:", extractionId);
549
+ const extraction = await getExtractionById(parseInt(extractionId));
550
+ console.log("Extraction loaded:", extraction);
551
+ console.log("Extraction fields:", extraction.fields);
552
+ console.log("Fields type:", typeof extraction.fields);
553
+ console.log("Fields keys:", extraction.fields ? Object.keys(extraction.fields) : "none");
554
+
555
+ if (!extraction) {
556
+ throw new Error("No extraction data received");
557
+ }
558
+
559
+ // Ensure fields is an object, not a string
560
+ let fieldsData = extraction.fields || {};
561
+ if (typeof fieldsData === 'string') {
562
+ try {
563
+ fieldsData = JSON.parse(fieldsData);
564
+ } catch (e) {
565
+ console.error("Failed to parse fields as JSON:", e);
566
+ fieldsData = {};
567
+ }
568
+ }
569
+
570
+ console.log("Processed fields:", fieldsData);
571
+
572
+ // Create file object from base64 if available, otherwise create empty file
573
+ let fileForPreview;
574
+ if (extraction.fileBase64) {
575
+ // Convert base64 to binary
576
+ const binaryString = atob(extraction.fileBase64);
577
+ const bytes = new Uint8Array(binaryString.length);
578
+ for (let i = 0; i < binaryString.length; i++) {
579
+ bytes[i] = binaryString.charCodeAt(i);
580
+ }
581
+ const fileBlob = new Blob([bytes], { type: extraction.fileType || "application/pdf" });
582
+ fileForPreview = new File(
583
+ [fileBlob],
584
+ extraction.fileName || "document.pdf",
585
+ { type: extraction.fileType || "application/pdf" }
586
+ );
587
+ console.log("Created file from base64:", fileForPreview.name, fileForPreview.size, "bytes");
588
+ } else {
589
+ // Fallback: create empty file if base64 not available
590
+ const fileBlob = new Blob([], { type: extraction.fileType || "application/pdf" });
591
+ fileForPreview = new File(
592
+ [fileBlob],
593
+ extraction.fileName || "document.pdf",
594
+ { type: extraction.fileType || "application/pdf" }
595
+ );
596
+ console.log("No base64 available, created empty file");
597
+ }
598
+
599
+ // Set the extraction result - match the structure from extractDocument
600
+ const result = {
601
+ id: extraction.id,
602
+ fields: fieldsData,
603
+ confidence: extraction.confidence || 0,
604
+ fieldsExtracted: extraction.fieldsExtracted || 0,
605
+ totalTime: extraction.totalTime || 0,
606
+ fileName: extraction.fileName,
607
+ fileType: extraction.fileType,
608
+ fileSize: extraction.fileSize,
609
+ };
610
+
611
+ console.log("Setting extraction result:", result);
612
+ setExtractionResult(result);
613
+ setSelectedFile(fileForPreview);
614
+ setIsComplete(true);
615
+ setIsProcessing(false);
616
+ setProcessingStage("done");
617
+
618
+ // Remove the extractionId from URL
619
+ setSearchParams({});
620
+ } catch (err) {
621
+ console.error("Failed to load extraction from history:", err);
622
+ const errorMessage = err.message || "Failed to load extraction from history";
623
+ setError(errorMessage);
624
+ // Don't clear the URL params on error so user can see what went wrong
625
+ } finally {
626
+ setIsLoadingFromHistory(false);
627
+ }
628
+ };
629
+
630
+ loadExtractionFromHistory();
631
+ }
632
+ }, [searchParams, isLoadingFromHistory, setSearchParams]);
633
+
634
+ // Fetch and calculate stats from history
635
+ useEffect(() => {
636
+ const fetchStats = async () => {
637
+ try {
638
+ const history = await getHistory();
639
+
640
+ // Calculate total extracted (only completed extractions)
641
+ const completedExtractions = history.filter(item => item.status === "completed");
642
+ const totalExtracted = completedExtractions.length;
643
+
644
+ // Calculate average accuracy from completed extractions
645
+ const accuracies = completedExtractions
646
+ .map(item => item.confidence || 0)
647
+ .filter(acc => acc > 0);
648
+
649
+ const averageAccuracy = accuracies.length > 0
650
+ ? accuracies.reduce((sum, acc) => sum + acc, 0) / accuracies.length
651
+ : 0;
652
+
653
+ setStats({
654
+ totalExtracted,
655
+ averageAccuracy: Math.round(averageAccuracy * 10) / 10 // Round to 1 decimal place
656
+ });
657
+ } catch (err) {
658
+ console.error("Failed to fetch stats:", err);
659
+ // Keep default values on error
660
+ }
661
+ };
662
+
663
+ // Fetch stats on mount and when extraction completes
664
+ fetchStats();
665
+ }, [isComplete]);
666
+
667
+ const handleExtract = async () => {
668
+ if (!selectedFile) return;
669
+
670
+ // Check if user has reached trial limit before processing
671
+ if (stats.totalExtracted >= TRIAL_LIMIT) {
672
+ setShowUpgradeModal(true);
673
+ return;
674
+ }
675
+
676
+ setIsProcessing(true);
677
+ setIsComplete(false);
678
+ setError(null);
679
+ setExtractionResult(null);
680
+ setProcessingStage("received");
681
+
682
+ // Move to Analysis stage immediately after starting
683
+ setTimeout(() => {
684
+ setProcessingStage("analysis");
685
+ }, 100);
686
+
687
+ // Move to Extraction stage after analysis phase (2.5 seconds)
688
+ let extractionTimer = setTimeout(() => {
689
+ setProcessingStage("extraction");
690
+ }, 2500);
691
+
692
+ try {
693
+ const result = await extractDocument(selectedFile, keyFields);
694
+
695
+ // Clear the extraction timer
696
+ clearTimeout(extractionTimer);
697
+
698
+ // Move to extraction stage if not already there, then to done
699
+ setProcessingStage("extraction");
700
+
701
+ // Small delay to show extraction stage, then move to done when results are rendered
702
+ setTimeout(() => {
703
+ setProcessingStage("done");
704
+ setExtractionResult(result);
705
+ setIsComplete(true);
706
+ setIsProcessing(false);
707
+ }, 500); // Give time to see extraction stage
708
+ } catch (err) {
709
+ clearTimeout(extractionTimer);
710
+ console.error("Extraction error:", err);
711
+ setError(err.message || "Failed to extract document. Please try again.");
712
+ setIsComplete(false);
713
+ setProcessingStage("received");
714
+ setIsProcessing(false);
715
+ }
716
+ };
717
+
718
+ return (
719
+ <div className="min-h-screen bg-[#FAFAFA]">
720
+ {/* Header */}
721
+ <header className="bg-white border-b border-slate-200/80 sticky top-0 z-40 h-16">
722
+ <div className="px-8 h-full flex items-center justify-between">
723
+ <div>
724
+ <h1 className="text-xl font-bold text-slate-900 tracking-tight leading-tight">
725
+ Multi-Lingual Document Extraction
726
+ </h1>
727
+ <p className="text-sm text-slate-500 leading-tight">
728
+ Upload any document and extract structured data with VRP (No LLM)
729
+ </p>
730
+ </div>
731
+ <div className="flex items-center gap-3">
732
+ {/* Stats Pills */}
733
+ <div className="hidden lg:flex items-center gap-2">
734
+ <div className="flex items-center gap-2 px-3 py-1.5 bg-slate-100 rounded-lg">
735
+ <FileText className="h-4 w-4 text-slate-500" />
736
+ <span className="text-sm font-medium text-slate-700">
737
+ {stats.totalExtracted}/{TRIAL_LIMIT} Used
738
+ </span>
739
+ </div>
740
+ <div className="flex items-center gap-2 px-3 py-1.5 bg-emerald-50 rounded-lg">
741
+ <TrendingUp className="h-4 w-4 text-emerald-600" />
742
+ <span className="text-sm font-medium text-emerald-700">
743
+ {stats.averageAccuracy > 0 ? `${stats.averageAccuracy}%` : "0%"} Accuracy
744
+ </span>
745
+ </div>
746
+ </div>
747
+
748
+ <ExportButtons isComplete={isComplete} extractionResult={extractionResult} />
749
+ </div>
750
+ </div>
751
+ </header>
752
+
753
+ {/* Main Content */}
754
+ <div className="p-8">
755
+ {/* Upload Section */}
756
+ <motion.div
757
+ initial={{ opacity: 0, y: 20 }}
758
+ animate={{ opacity: 1, y: 0 }}
759
+ className="max-w-3xl mx-auto mb-4"
760
+ >
761
+ <UploadZone
762
+ onFileSelect={handleFileSelect}
763
+ selectedFile={selectedFile}
764
+ onClear={handleClear}
765
+ keyFields={keyFields}
766
+ onKeyFieldsChange={setKeyFields}
767
+ />
768
+
769
+ {/* Extract Button */}
770
+ {selectedFile && !isProcessing && !isComplete && (
771
+ <motion.div
772
+ initial={{ opacity: 0, y: 10 }}
773
+ animate={{ opacity: 1, y: 0 }}
774
+ className="mt-4 flex justify-center"
775
+ >
776
+ <Button
777
+ onClick={handleExtract}
778
+ size="lg"
779
+ className="h-14 px-8 rounded-2xl font-semibold text-base bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700 shadow-xl shadow-indigo-500/25 hover:shadow-2xl hover:shadow-indigo-500/30 transition-all duration-300 hover:-translate-y-0.5"
780
+ >
781
+ <Sparkles className="h-5 w-5 mr-2" />
782
+ Start Extraction
783
+ <Zap className="h-4 w-4 ml-2 opacity-70" />
784
+ </Button>
785
+ </motion.div>
786
+ )}
787
+ </motion.div>
788
+
789
+ {/* Error Message */}
790
+ {error && (
791
+ <motion.div
792
+ initial={{ opacity: 0, y: -10 }}
793
+ animate={{ opacity: 1, y: 0 }}
794
+ className="max-w-3xl mx-auto mb-6"
795
+ >
796
+ <div className="bg-red-50 border border-red-200 rounded-2xl p-4 flex items-start gap-3">
797
+ <AlertCircle className="h-5 w-5 text-red-600 flex-shrink-0 mt-0.5" />
798
+ <div className="flex-1">
799
+ <h3 className="font-semibold text-red-900 mb-1">Extraction Failed</h3>
800
+ <p className="text-sm text-red-700">{error}</p>
801
+ </div>
802
+ <button
803
+ onClick={() => setError(null)}
804
+ className="text-red-400 hover:text-red-600 transition-colors"
805
+ >
806
+ ×
807
+ </button>
808
+ </div>
809
+ </motion.div>
810
+ )}
811
+
812
+ {/* Loading from History */}
813
+ {isLoadingFromHistory && (
814
+ <motion.div
815
+ initial={{ opacity: 0, y: -10 }}
816
+ animate={{ opacity: 1, y: 0 }}
817
+ className="max-w-3xl mx-auto mb-6"
818
+ >
819
+ <div className="bg-blue-50 border border-blue-200 rounded-2xl p-4 flex items-center gap-3">
820
+ <Clock className="h-5 w-5 text-blue-600 animate-spin" />
821
+ <div className="flex-1">
822
+ <h3 className="font-semibold text-blue-900 mb-1">Loading extraction...</h3>
823
+ <p className="text-sm text-blue-700">Retrieving extraction data from history</p>
824
+ </div>
825
+ </div>
826
+ </motion.div>
827
+ )}
828
+
829
+ {/* Processing Status */}
830
+ {(isProcessing || isComplete) && !isLoadingFromHistory && (
831
+ <div className="max-w-3xl mx-auto mb-4">
832
+ <ProcessingStatus
833
+ isProcessing={isProcessing}
834
+ isComplete={isComplete}
835
+ currentStage={processingStage}
836
+ />
837
+ </div>
838
+ )}
839
+
840
+ {/* Split View */}
841
+ {selectedFile && (
842
+ <motion.div
843
+ initial={{ opacity: 0, y: 20 }}
844
+ animate={{ opacity: 1, y: 0 }}
845
+ transition={{ delay: 0.2 }}
846
+ className="grid grid-cols-1 lg:grid-cols-2 gap-4"
847
+ style={{ height: "calc(100vh - 320px)", minHeight: "450px" }}
848
+ >
849
+ <DocumentPreview
850
+ file={selectedFile}
851
+ isProcessing={isProcessing}
852
+ isFromHistory={!!extractionResult?.id}
853
+ />
854
+ <ExtractionOutput
855
+ hasFile={!!selectedFile}
856
+ isProcessing={isProcessing}
857
+ isComplete={isComplete}
858
+ extractionResult={extractionResult}
859
+ onNewUpload={handleClear}
860
+ />
861
+ </motion.div>
862
+ )}
863
+
864
+ {/* Empty State Features */}
865
+ {!selectedFile && (
866
+ <motion.div
867
+ initial={{ opacity: 0 }}
868
+ animate={{ opacity: 1 }}
869
+ transition={{ delay: 0.3 }}
870
+ className="max-w-5xl mx-auto mt-12"
871
+ >
872
+ <div className="text-center mb-10">
873
+ <h2 className="text-2xl font-bold text-slate-900 mb-2">
874
+ Pure Agentic Document Intelligence
875
+ </h2>
876
+ <p className="text-slate-500">
877
+ Extract structured data from any document without LLM using VRP (Visual Resoning Processor)
878
+ </p>
879
+ </div>
880
+
881
+ <div className="grid grid-cols-1 md:grid-cols-3 gap-6">
882
+ {[
883
+ {
884
+ icon: Zap,
885
+ title: "Lightning Fast",
886
+ description:
887
+ "Process documents faster with our agentic pipeline",
888
+ color: "amber",
889
+ },
890
+ {
891
+ icon: Sparkles,
892
+ title: `${stats.averageAccuracy > 0 ? stats.averageAccuracy : "99.8"}% Accuracy`,
893
+ description:
894
+ "Industry-leading extraction accuracy",
895
+ color: "indigo",
896
+ },
897
+ {
898
+ icon: Clock,
899
+ title: "Any Format",
900
+ description:
901
+ "Support for PDF, images, and scanned documents",
902
+ color: "emerald",
903
+ },
904
+ ].map((feature, index) => (
905
+ <motion.div
906
+ key={feature.title}
907
+ initial={{ opacity: 0, y: 20 }}
908
+ animate={{ opacity: 1, y: 0 }}
909
+ transition={{ delay: 0.4 + index * 0.1 }}
910
+ className="group bg-white rounded-2xl border border-slate-200 p-6 hover:shadow-xl hover:shadow-slate-200/50 transition-all duration-300 hover:-translate-y-1"
911
+ >
912
+ <div
913
+ className={`h-12 w-12 rounded-xl bg-${feature.color}-50 flex items-center justify-center mb-4 group-hover:scale-110 transition-transform duration-300`}
914
+ >
915
+ <feature.icon
916
+ className={`h-6 w-6 text-${feature.color}-600`}
917
+ />
918
+ </div>
919
+ <h3 className="font-semibold text-slate-900 mb-2">
920
+ {feature.title}
921
+ </h3>
922
+ <p className="text-sm text-slate-500 leading-relaxed">
923
+ {feature.description}
924
+ </p>
925
+ </motion.div>
926
+ ))}
927
+ </div>
928
+
929
+ {/* Supported Formats */}
930
+ <div className="mt-12 text-center">
931
+ <p className="text-xs text-slate-400 uppercase tracking-wider mb-4 font-medium">
932
+ Supported Formats
933
+ </p>
934
+ <div className="flex items-center justify-center gap-6 flex-wrap">
935
+ {["PDF", "PNG", "JPG", "TIFF", "JPEG"].map((format) => (
936
+ <div
937
+ key={format}
938
+ className="flex items-center gap-2 text-slate-400"
939
+ >
940
+ <FileText className="h-4 w-4" />
941
+ <span className="text-sm font-medium">{format}</span>
942
+ </div>
943
+ ))}
944
+ </div>
945
+ </div>
946
+ </motion.div>
947
+ )}
948
+ </div>
949
+
950
+ {/* Upgrade Modal */}
951
+ <UpgradeModal open={showUpgradeModal} onClose={() => setShowUpgradeModal(false)} />
952
+ </div>
953
+ );
954
+ }
955
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/pages/History.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  // frontend/src/pages/History.jsx
2
 
3
  import React, { useState, useEffect } from "react";
@@ -857,3 +858,864 @@ export default function History() {
857
  </div>
858
  );
859
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  // frontend/src/pages/History.jsx
3
 
4
  import React, { useState, useEffect } from "react";
 
858
  </div>
859
  );
860
  }
861
+ =======
862
+ // frontend/src/pages/History.jsx
863
+
864
+ import React, { useState, useEffect } from "react";
865
+ import { useNavigate, useSearchParams } from "react-router-dom";
866
+ import { motion, AnimatePresence } from "framer-motion";
867
+ import {
868
+ FileText,
869
+ Clock,
870
+ CheckCircle2,
871
+ ChevronRight,
872
+ Download,
873
+ Eye,
874
+ Trash2,
875
+ Search,
876
+ Filter,
877
+ Calendar,
878
+ Upload,
879
+ Cpu,
880
+ TableProperties,
881
+ MonitorPlay,
882
+ TrendingUp,
883
+ TrendingDown,
884
+ Minus,
885
+ AlertCircle,
886
+ X,
887
+ FileSpreadsheet,
888
+ Table2,
889
+ } from "lucide-react";
890
+ import { Button } from "@/components/ui/button";
891
+ import { Input } from "@/components/ui/input";
892
+ import { Badge } from "@/components/ui/badge";
893
+ import {
894
+ Select,
895
+ SelectContent,
896
+ SelectItem,
897
+ SelectTrigger,
898
+ SelectValue,
899
+ } from "@/components/ui/select";
900
+ import {
901
+ DropdownMenu,
902
+ DropdownMenuContent,
903
+ DropdownMenuItem,
904
+ DropdownMenuSeparator,
905
+ DropdownMenuTrigger,
906
+ } from "@/components/ui/dropdown-menu";
907
+ import { cn } from "@/lib/utils";
908
+ import { getHistory } from "@/services/api";
909
+
910
+ // minimal "toast"
911
+ const toastSuccess = (msg) => {
912
+ console.log(msg);
913
+ };
914
+
915
+ const stageConfig = {
916
+ uploading: { label: "Uploading", icon: Upload, color: "blue" },
917
+ aiAnalysis: { label: "AI Analysis", icon: Cpu, color: "violet" },
918
+ dataExtraction: { label: "Data Extraction", icon: TableProperties, color: "emerald" },
919
+ outputRendering: { label: "Output Rendering", icon: MonitorPlay, color: "amber" },
920
+ };
921
+
922
+ const variationConfig = {
923
+ fast: { icon: TrendingDown, color: "text-emerald-500", label: "Faster than avg" },
924
+ normal: { icon: Minus, color: "text-slate-400", label: "Normal" },
925
+ slow: { icon: TrendingUp, color: "text-amber-500", label: "Slower than avg" },
926
+ error: { icon: AlertCircle, color: "text-red-500", label: "Error" },
927
+ skipped: { icon: Minus, color: "text-slate-300", label: "Skipped" },
928
+ };
929
+
930
+ export default function History() {
931
+ const navigate = useNavigate();
932
+ const [searchParams, setSearchParams] = useSearchParams();
933
+ const [searchQuery, setSearchQuery] = useState("");
934
+ const [selectedStatus, setSelectedStatus] = useState("all");
935
+ const [expandedReport, setExpandedReport] = useState(null);
936
+ const [isExporting, setIsExporting] = useState(false);
937
+ const [history, setHistory] = useState([]);
938
+ const [isLoading, setIsLoading] = useState(true);
939
+ const [error, setError] = useState(null);
940
+
941
+ // Fetch history on component mount
942
+ useEffect(() => {
943
+ const fetchHistory = async () => {
944
+ setIsLoading(true);
945
+ setError(null);
946
+ try {
947
+ const data = await getHistory();
948
+ setHistory(data);
949
+
950
+ // Check if there's an extractionId in URL (from share link)
951
+ const extractionId = searchParams.get("extractionId");
952
+ if (extractionId) {
953
+ // Clear the query param and navigate to dashboard
954
+ setSearchParams({});
955
+ // Small delay to ensure history is loaded
956
+ setTimeout(() => {
957
+ navigate(`/?extractionId=${extractionId}`);
958
+ }, 100);
959
+ }
960
+ } catch (err) {
961
+ console.error("Failed to fetch history:", err);
962
+ setError(err.message || "Failed to load history");
963
+ setHistory([]); // Fallback to empty array
964
+ } finally {
965
+ setIsLoading(false);
966
+ }
967
+ };
968
+
969
+ fetchHistory();
970
+ }, [searchParams, setSearchParams, navigate]);
971
+
972
+ const filteredHistory = history.filter((item) => {
973
+ const matchesSearch = item.fileName?.toLowerCase().includes(searchQuery.toLowerCase()) ?? false;
974
+ const matchesStatus = selectedStatus === "all" || item.status === selectedStatus;
975
+ return matchesSearch && matchesStatus;
976
+ });
977
+
978
+ const formatTime = (ms) => {
979
+ if (ms >= 1000) {
980
+ return `${(ms / 1000).toFixed(2)}s`;
981
+ }
982
+ return `${ms}ms`;
983
+ };
984
+
985
+ const formatTimeForExport = (ms) => {
986
+ return ms >= 1000 ? `${(ms / 1000).toFixed(2)}s` : `${ms}ms`;
987
+ };
988
+
989
+ const formatDate = (dateString) => {
990
+ const date = new Date(dateString);
991
+ return date.toLocaleDateString("en-US", {
992
+ month: "short",
993
+ day: "numeric",
994
+ hour: "2-digit",
995
+ minute: "2-digit",
996
+ });
997
+ };
998
+
999
+ const formatDateForExport = (dateString) => {
1000
+ const date = new Date(dateString);
1001
+ return date.toISOString().replace("T", " ").slice(0, 19);
1002
+ };
1003
+
1004
+ const generateCSV = (data) => {
1005
+ const headers = [
1006
+ "File Name",
1007
+ "File Type",
1008
+ "File Size",
1009
+ "Extracted At",
1010
+ "Status",
1011
+ "Confidence (%)",
1012
+ "Fields Extracted",
1013
+ "Total Time (ms)",
1014
+ "Upload Time (ms)",
1015
+ "Upload Status",
1016
+ "Upload Variation",
1017
+ "AI Analysis Time (ms)",
1018
+ "AI Analysis Status",
1019
+ "AI Analysis Variation",
1020
+ "Data Extraction Time (ms)",
1021
+ "Data Extraction Status",
1022
+ "Data Extraction Variation",
1023
+ "Output Rendering Time (ms)",
1024
+ "Output Rendering Status",
1025
+ "Output Rendering Variation",
1026
+ "Error Message",
1027
+ ];
1028
+
1029
+ const rows = data.map((item) => [
1030
+ item.fileName,
1031
+ item.fileType,
1032
+ item.fileSize,
1033
+ formatDateForExport(item.extractedAt),
1034
+ item.status,
1035
+ item.confidence,
1036
+ item.fieldsExtracted,
1037
+ item.totalTime,
1038
+ item.stages.uploading.time,
1039
+ item.stages.uploading.status,
1040
+ item.stages.uploading.variation,
1041
+ item.stages.aiAnalysis.time,
1042
+ item.stages.aiAnalysis.status,
1043
+ item.stages.aiAnalysis.variation,
1044
+ item.stages.dataExtraction.time,
1045
+ item.stages.dataExtraction.status,
1046
+ item.stages.dataExtraction.variation,
1047
+ item.stages.outputRendering.time,
1048
+ item.stages.outputRendering.status,
1049
+ item.stages.outputRendering.variation,
1050
+ item.errorMessage || "",
1051
+ ]);
1052
+
1053
+ const csvContent = [
1054
+ headers.join(","),
1055
+ ...rows.map((row) => row.map((cell) => `"${cell}"`).join(",")),
1056
+ ].join("\n");
1057
+
1058
+ return csvContent;
1059
+ };
1060
+
1061
+ const downloadFile = (content, fileName, mimeType) => {
1062
+ const blob = new Blob([content], { type: mimeType });
1063
+ const url = URL.createObjectURL(blob);
1064
+ const link = document.createElement("a");
1065
+ link.href = url;
1066
+ link.download = fileName;
1067
+ document.body.appendChild(link);
1068
+ link.click();
1069
+ document.body.removeChild(link);
1070
+ URL.revokeObjectURL(url);
1071
+ };
1072
+
1073
+ const handleExportCSV = () => {
1074
+ setIsExporting(true);
1075
+ setTimeout(() => {
1076
+ const csvContent = generateCSV(filteredHistory);
1077
+ downloadFile(
1078
+ csvContent,
1079
+ `extraction_history_${new Date().toISOString().slice(0, 10)}.csv`,
1080
+ "text/csv;charset=utf-8;"
1081
+ );
1082
+ toastSuccess("CSV exported successfully");
1083
+ setIsExporting(false);
1084
+ }, 500);
1085
+ };
1086
+
1087
+ const generateExcelXML = (data) => {
1088
+ const headers = [
1089
+ "File Name",
1090
+ "File Type",
1091
+ "File Size",
1092
+ "Extracted At",
1093
+ "Status",
1094
+ "Confidence (%)",
1095
+ "Fields Extracted",
1096
+ "Total Time (ms)",
1097
+ "Upload Time (ms)",
1098
+ "Upload Status",
1099
+ "Upload Variation",
1100
+ "AI Analysis Time (ms)",
1101
+ "AI Analysis Status",
1102
+ "AI Analysis Variation",
1103
+ "Data Extraction Time (ms)",
1104
+ "Data Extraction Status",
1105
+ "Data Extraction Variation",
1106
+ "Output Rendering Time (ms)",
1107
+ "Output Rendering Status",
1108
+ "Output Rendering Variation",
1109
+ "Error Message",
1110
+ ];
1111
+
1112
+ const rows = data.map((item) => [
1113
+ item.fileName,
1114
+ item.fileType,
1115
+ item.fileSize,
1116
+ formatDateForExport(item.extractedAt),
1117
+ item.status,
1118
+ item.confidence,
1119
+ item.fieldsExtracted,
1120
+ item.totalTime,
1121
+ item.stages.uploading.time,
1122
+ item.stages.uploading.status,
1123
+ item.stages.uploading.variation,
1124
+ item.stages.aiAnalysis.time,
1125
+ item.stages.aiAnalysis.status,
1126
+ item.stages.aiAnalysis.variation,
1127
+ item.stages.dataExtraction.time,
1128
+ item.stages.dataExtraction.status,
1129
+ item.stages.dataExtraction.variation,
1130
+ item.stages.outputRendering.time,
1131
+ item.stages.outputRendering.status,
1132
+ item.stages.outputRendering.variation,
1133
+ item.errorMessage || "",
1134
+ ]);
1135
+
1136
+ let xml = `<?xml version="1.0" encoding="UTF-8"?>
1137
+ <?mso-application progid="Excel.Sheet"?>
1138
+ <Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
1139
+ xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet">
1140
+ <Worksheet ss:Name="Extraction History">
1141
+ <Table>
1142
+ <Row>`;
1143
+
1144
+ headers.forEach((header) => {
1145
+ xml += `<Cell><Data ss:Type="String">${header}</Data></Cell>`;
1146
+ });
1147
+ xml += `</Row>`;
1148
+
1149
+ rows.forEach((row) => {
1150
+ xml += `<Row>`;
1151
+ row.forEach((cell) => {
1152
+ const type = typeof cell === "number" ? "Number" : "String";
1153
+ xml += `<Cell><Data ss:Type="${type}">${cell}</Data></Cell>`;
1154
+ });
1155
+ xml += `</Row>`;
1156
+ });
1157
+
1158
+ xml += `</Table></Worksheet></Workbook>`;
1159
+ return xml;
1160
+ };
1161
+
1162
+ const handleExportExcel = () => {
1163
+ setIsExporting(true);
1164
+ setTimeout(() => {
1165
+ const excelContent = generateExcelXML(filteredHistory);
1166
+ downloadFile(
1167
+ excelContent,
1168
+ `extraction_history_${new Date().toISOString().slice(0, 10)}.xls`,
1169
+ "application/vnd.ms-excel"
1170
+ );
1171
+ toastSuccess("Excel file exported successfully");
1172
+ setIsExporting(false);
1173
+ }, 500);
1174
+ };
1175
+
1176
+ const handleExportSingleReport = (item, format) => {
1177
+ if (format === "csv") {
1178
+ const csvContent = generateCSV([item]);
1179
+ downloadFile(
1180
+ csvContent,
1181
+ `${item.fileName.replace(/\.[^/.]+$/, "")}_report.csv`,
1182
+ "text/csv;charset=utf-8;"
1183
+ );
1184
+ toastSuccess("Report exported as CSV");
1185
+ } else {
1186
+ const excelContent = generateExcelXML([item]);
1187
+ downloadFile(
1188
+ excelContent,
1189
+ `${item.fileName.replace(/\.[^/.]+$/, "")}_report.xls`,
1190
+ "application/vnd.ms-excel"
1191
+ );
1192
+ toastSuccess("Report exported as Excel");
1193
+ }
1194
+ };
1195
+
1196
+ return (
1197
+ <div className="min-h-screen bg-[#FAFAFA]">
1198
+ {/* Header */}
1199
+ <header className="bg-white border-b border-slate-200/80 sticky top-0 z-40 h-16">
1200
+ <div className="px-8 h-full flex items-center">
1201
+ <div>
1202
+ <h1 className="text-xl font-bold text-slate-900 tracking-tight leading-tight">
1203
+ Extraction History
1204
+ </h1>
1205
+ <p className="text-sm text-slate-500 leading-tight">
1206
+ View detailed reports and performance metrics for all extractions
1207
+ </p>
1208
+ </div>
1209
+ </div>
1210
+ </header>
1211
+
1212
+ {/* Content */}
1213
+ <div className="p-8">
1214
+ {/* Filters */}
1215
+ <div className="flex items-center gap-4 mb-6">
1216
+ <div className="relative flex-1 max-w-md">
1217
+ <Search className="absolute left-3 top-1/2 -translate-y-1/2 h-4 w-4 text-slate-400" />
1218
+ <Input
1219
+ placeholder="Search by file name..."
1220
+ value={searchQuery}
1221
+ onChange={(e) => setSearchQuery(e.target.value)}
1222
+ className="pl-10 h-11 rounded-xl border-slate-200"
1223
+ />
1224
+ </div>
1225
+ <Select
1226
+ value={selectedStatus}
1227
+ onValueChange={(value) => setSelectedStatus(value)}
1228
+ >
1229
+ <SelectTrigger className="w-40 h-11 rounded-xl border-slate-200">
1230
+ <Filter className="h-4 w-4 mr-2 text-slate-400" />
1231
+ <SelectValue placeholder="Status" />
1232
+ </SelectTrigger>
1233
+ <SelectContent>
1234
+ <SelectItem value="all">All Status</SelectItem>
1235
+ <SelectItem value="completed">Completed</SelectItem>
1236
+ <SelectItem value="failed">Failed</SelectItem>
1237
+ </SelectContent>
1238
+ </Select>
1239
+
1240
+ {/* Export All Button */}
1241
+ <DropdownMenu>
1242
+ <DropdownMenuTrigger asChild>
1243
+ <Button
1244
+ className="h-11 px-4 rounded-xl bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700 shadow-lg shadow-indigo-500/25"
1245
+ disabled={isExporting || filteredHistory.length === 0}
1246
+ >
1247
+ {isExporting ? (
1248
+ <motion.div
1249
+ animate={{ rotate: 360 }}
1250
+ transition={{
1251
+ duration: 1,
1252
+ repeat: Infinity,
1253
+ ease: "linear",
1254
+ }}
1255
+ className="mr-2"
1256
+ >
1257
+ <Download className="h-4 w-4" />
1258
+ </motion.div>
1259
+ ) : (
1260
+ <Download className="h-4 w-4 mr-2" />
1261
+ )}
1262
+ Export All
1263
+ </Button>
1264
+ </DropdownMenuTrigger>
1265
+ <DropdownMenuContent
1266
+ align="end"
1267
+ className="w-48 rounded-xl p-2"
1268
+ >
1269
+ <DropdownMenuItem
1270
+ className="rounded-lg cursor-pointer"
1271
+ onClick={handleExportCSV}
1272
+ >
1273
+ <Table2 className="h-4 w-4 mr-2 text-emerald-600" />
1274
+ Export as CSV
1275
+ </DropdownMenuItem>
1276
+ <DropdownMenuItem
1277
+ className="rounded-lg cursor-pointer"
1278
+ onClick={handleExportExcel}
1279
+ >
1280
+ <FileSpreadsheet className="h-4 w-4 mr-2 text-green-600" />
1281
+ Export as Excel
1282
+ </DropdownMenuItem>
1283
+ <DropdownMenuSeparator />
1284
+ <div className="px-2 py-1.5 text-xs text-slate-500">
1285
+ {filteredHistory.length} records will be exported
1286
+ </div>
1287
+ </DropdownMenuContent>
1288
+ </DropdownMenu>
1289
+ </div>
1290
+
1291
+ {/* Stats Overview */}
1292
+ <div className="grid grid-cols-1 md:grid-cols-4 gap-4 mb-8">
1293
+ {(() => {
1294
+ const total = history.length;
1295
+ const completed = history.filter((h) => h.status === "completed").length;
1296
+ const successRate = total > 0 ? ((completed / total) * 100).toFixed(1) : 0;
1297
+ const avgTime = history.length > 0
1298
+ ? history.reduce((sum, h) => sum + (h.totalTime || 0), 0) / history.length
1299
+ : 0;
1300
+ const totalFields = history.reduce((sum, h) => sum + (h.fieldsExtracted || 0), 0);
1301
+
1302
+ return [
1303
+ {
1304
+ label: "Total Extractions",
1305
+ value: total.toString(),
1306
+ change: "",
1307
+ color: "indigo",
1308
+ },
1309
+ {
1310
+ label: "Success Rate",
1311
+ value: `${successRate}%`,
1312
+ change: total > 0 ? `${completed}/${total} successful` : "No data",
1313
+ color: "emerald",
1314
+ },
1315
+ {
1316
+ label: "Avg. Processing Time",
1317
+ value: avgTime >= 1000 ? `${(avgTime / 1000).toFixed(1)}s` : `${Math.round(avgTime)}ms`,
1318
+ change: "",
1319
+ color: "violet",
1320
+ },
1321
+ {
1322
+ label: "Fields Extracted",
1323
+ value: totalFields.toLocaleString(),
1324
+ change: "",
1325
+ color: "amber",
1326
+ },
1327
+ ].map((stat, index) => (
1328
+ <motion.div
1329
+ key={stat.label}
1330
+ initial={{ opacity: 0, y: 20 }}
1331
+ animate={{ opacity: 1, y: 0 }}
1332
+ transition={{ delay: index * 0.1 }}
1333
+ className="bg-white rounded-2xl border border-slate-200 p-5"
1334
+ >
1335
+ <p className="text-sm text-slate-500 mb-1">{stat.label}</p>
1336
+ <p className="text-2xl font-bold text-slate-900">{stat.value}</p>
1337
+ <p className={`text-xs text-${stat.color}-600 mt-1`}>
1338
+ {stat.change}
1339
+ </p>
1340
+ </motion.div>
1341
+ ));
1342
+ })()}
1343
+ </div>
1344
+
1345
+ {/* Loading State */}
1346
+ {isLoading && (
1347
+ <div className="text-center py-16">
1348
+ <motion.div
1349
+ animate={{ rotate: 360 }}
1350
+ transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
1351
+ className="h-16 w-16 mx-auto rounded-2xl bg-indigo-100 flex items-center justify-center mb-4"
1352
+ >
1353
+ <Cpu className="h-8 w-8 text-indigo-600" />
1354
+ </motion.div>
1355
+ <p className="text-slate-500">Loading extraction history...</p>
1356
+ </div>
1357
+ )}
1358
+
1359
+ {/* History List */}
1360
+ {!isLoading && (
1361
+ <div className="space-y-4">
1362
+ {filteredHistory.map((item, index) => (
1363
+ <motion.div
1364
+ key={item.id}
1365
+ initial={{ opacity: 0, y: 20 }}
1366
+ animate={{ opacity: 1, y: 0 }}
1367
+ transition={{ delay: index * 0.05 }}
1368
+ className="bg-white rounded-2xl border border-slate-200 overflow-hidden"
1369
+ >
1370
+ {/* Main Row */}
1371
+ <div
1372
+ className="p-5 cursor-pointer hover:bg-slate-50/50 transition-colors"
1373
+ onClick={() =>
1374
+ setExpandedReport(
1375
+ expandedReport === item.id ? null : item.id
1376
+ )
1377
+ }
1378
+ >
1379
+ <div className="flex items-center gap-4">
1380
+ {/* File Icon */}
1381
+ <div
1382
+ className={cn(
1383
+ "h-12 w-12 rounded-xl flex items-center justify-center",
1384
+ item.status === "completed" ? "bg-indigo-50" : "bg-red-50"
1385
+ )}
1386
+ >
1387
+ <FileText
1388
+ className={cn(
1389
+ "h-6 w-6",
1390
+ item.status === "completed"
1391
+ ? "text-indigo-600"
1392
+ : "text-red-500"
1393
+ )}
1394
+ />
1395
+ </div>
1396
+
1397
+ {/* File Info */}
1398
+ <div className="flex-1 min-w-0">
1399
+ <div className="flex items-center gap-2">
1400
+ <h3 className="font-semibold text-slate-900 truncate">
1401
+ {item.fileName}
1402
+ </h3>
1403
+ <Badge variant="secondary" className="text-xs">
1404
+ {item.fileType}
1405
+ </Badge>
1406
+ </div>
1407
+ <div className="flex items-center gap-4 mt-1 text-sm text-slate-500">
1408
+ <span>{item.fileSize}</span>
1409
+ <span className="flex items-center gap-1">
1410
+ <Calendar className="h-3 w-3" />
1411
+ {formatDate(item.extractedAt)}
1412
+ </span>
1413
+ </div>
1414
+ </div>
1415
+
1416
+ {/* Stats */}
1417
+ <div className="hidden md:flex items-center gap-6">
1418
+ <div className="text-center">
1419
+ <p className="text-xs text-slate-400">Time</p>
1420
+ <p className="font-semibold text-slate-700">
1421
+ {formatTime(item.totalTime)}
1422
+ </p>
1423
+ </div>
1424
+ <div className="text-center">
1425
+ <p className="text-xs text-slate-400">Fields</p>
1426
+ <p className="font-semibold text-slate-700">
1427
+ {item.fieldsExtracted}
1428
+ </p>
1429
+ </div>
1430
+ <div className="text-center">
1431
+ <p className="text-xs text-slate-400">Confidence</p>
1432
+ <p
1433
+ className={cn(
1434
+ "font-semibold",
1435
+ item.confidence >= 95
1436
+ ? "text-emerald-600"
1437
+ : item.confidence >= 90
1438
+ ? "text-amber-600"
1439
+ : "text-red-600"
1440
+ )}
1441
+ >
1442
+ {item.confidence > 0 ? `${item.confidence}%` : "-"}
1443
+ </p>
1444
+ </div>
1445
+ </div>
1446
+
1447
+ {/* Status & Actions */}
1448
+ <div className="flex items-center gap-3">
1449
+ <Badge
1450
+ className={cn(
1451
+ "capitalize",
1452
+ item.status === "completed"
1453
+ ? "bg-emerald-50 text-emerald-700 border-emerald-200"
1454
+ : "bg-red-50 text-red-700 border-red-200"
1455
+ )}
1456
+ >
1457
+ {item.status === "completed" ? (
1458
+ <CheckCircle2 className="h-3 w-3 mr-1" />
1459
+ ) : (
1460
+ <AlertCircle className="h-3 w-3 mr-1" />
1461
+ )}
1462
+ {item.status}
1463
+ </Badge>
1464
+ <ChevronRight
1465
+ className={cn(
1466
+ "h-5 w-5 text-slate-400 transition-transform",
1467
+ expandedReport === item.id && "rotate-90"
1468
+ )}
1469
+ />
1470
+ </div>
1471
+ </div>
1472
+ </div>
1473
+
1474
+ {/* Expanded Report */}
1475
+ <AnimatePresence>
1476
+ {expandedReport === item.id && (
1477
+ <motion.div
1478
+ initial={{ height: 0, opacity: 0 }}
1479
+ animate={{ height: "auto", opacity: 1 }}
1480
+ exit={{ height: 0, opacity: 0 }}
1481
+ transition={{ duration: 0.2 }}
1482
+ className="overflow-hidden"
1483
+ >
1484
+ <div className="px-5 pb-5 pt-2 border-t border-slate-100">
1485
+ {/* Error Message */}
1486
+ {item.errorMessage && (
1487
+ <div className="mb-4 p-4 bg-red-50 border border-red-100 rounded-xl">
1488
+ <div className="flex items-center gap-2 text-red-700">
1489
+ <AlertCircle className="h-4 w-4" />
1490
+ <span className="font-medium">Error Details</span>
1491
+ </div>
1492
+ <p className="text-sm text-red-600 mt-1">
1493
+ {item.errorMessage}
1494
+ </p>
1495
+ </div>
1496
+ )}
1497
+
1498
+ {/* Performance Report Header */}
1499
+ <div className="flex items-center justify-between mb-4">
1500
+ <h4 className="font-semibold text-slate-800">
1501
+ Performance Report
1502
+ </h4>
1503
+ <div className="flex items-center gap-2">
1504
+ <Button
1505
+ variant="ghost"
1506
+ size="sm"
1507
+ className="h-8 text-xs"
1508
+ onClick={(e) => {
1509
+ e.stopPropagation();
1510
+ navigate(`/?extractionId=${item.id}`);
1511
+ }}
1512
+ >
1513
+ <Eye className="h-3 w-3 mr-1" />
1514
+ View Output
1515
+ </Button>
1516
+ <DropdownMenu>
1517
+ <DropdownMenuTrigger asChild>
1518
+ <Button
1519
+ variant="outline"
1520
+ size="sm"
1521
+ className="h-8 text-xs"
1522
+ >
1523
+ <Download className="h-3 w-3 mr-1" />
1524
+ Export Report
1525
+ </Button>
1526
+ </DropdownMenuTrigger>
1527
+ <DropdownMenuContent
1528
+ align="end"
1529
+ className="w-44 rounded-xl p-2"
1530
+ >
1531
+ <DropdownMenuItem
1532
+ className="rounded-lg cursor-pointer text-xs"
1533
+ onClick={(e) => {
1534
+ e.stopPropagation();
1535
+ handleExportSingleReport(item, "csv");
1536
+ }}
1537
+ >
1538
+ <Table2 className="h-3 w-3 mr-2 text-emerald-600" />
1539
+ Download CSV
1540
+ </DropdownMenuItem>
1541
+ <DropdownMenuItem
1542
+ className="rounded-lg cursor-pointer text-xs"
1543
+ onClick={(e) => {
1544
+ e.stopPropagation();
1545
+ handleExportSingleReport(item, "excel");
1546
+ }}
1547
+ >
1548
+ <FileSpreadsheet className="h-3 w-3 mr-2 text-green-600" />
1549
+ Download Excel
1550
+ </DropdownMenuItem>
1551
+ </DropdownMenuContent>
1552
+ </DropdownMenu>
1553
+ </div>
1554
+ </div>
1555
+
1556
+ {/* Stage Timing Cards */}
1557
+ <div className="grid grid-cols-1 md:grid-cols-4 gap-4">
1558
+ {Object.entries(item.stages).map(
1559
+ ([stageKey, stageData]) => {
1560
+ const config = stageConfig[stageKey];
1561
+ const variationInfo =
1562
+ variationConfig[stageData.variation];
1563
+ const Icon = config.icon;
1564
+ const VariationIcon = variationInfo.icon;
1565
+
1566
+ return (
1567
+ <div
1568
+ key={stageKey}
1569
+ className={cn(
1570
+ "relative p-4 rounded-xl border",
1571
+ stageData.status === "completed"
1572
+ ? "bg-slate-50 border-slate-200"
1573
+ : stageData.status === "failed"
1574
+ ? "bg-red-50 border-red-200"
1575
+ : "bg-slate-50/50 border-slate-100"
1576
+ )}
1577
+ >
1578
+ <div className="flex items-center gap-2 mb-3">
1579
+ <div
1580
+ className={cn(
1581
+ "h-8 w-8 rounded-lg flex items-center justify-center",
1582
+ `bg-${config.color}-100`
1583
+ )}
1584
+ >
1585
+ <Icon
1586
+ className={cn(
1587
+ "h-4 w-4",
1588
+ `text-${config.color}-600`
1589
+ )}
1590
+ />
1591
+ </div>
1592
+ <span className="text-sm font-medium text-slate-700">
1593
+ {config.label}
1594
+ </span>
1595
+ </div>
1596
+
1597
+ <div className="flex items-end justify-between">
1598
+ <div>
1599
+ <p
1600
+ className={cn(
1601
+ "text-2xl font-bold",
1602
+ stageData.status === "skipped"
1603
+ ? "text-slate-300"
1604
+ : stageData.status === "failed"
1605
+ ? "text-red-600"
1606
+ : "text-slate-900"
1607
+ )}
1608
+ >
1609
+ {stageData.status === "skipped"
1610
+ ? "-"
1611
+ : formatTime(stageData.time)}
1612
+ </p>
1613
+ {stageData.status !== "skipped" && (
1614
+ <div className="flex items-center gap-1 mt-1">
1615
+ <VariationIcon
1616
+ className={cn(
1617
+ "h-3 w-3",
1618
+ variationInfo.color
1619
+ )}
1620
+ />
1621
+ <span
1622
+ className={cn(
1623
+ "text-xs",
1624
+ variationInfo.color
1625
+ )}
1626
+ >
1627
+ {variationInfo.label}
1628
+ </span>
1629
+ </div>
1630
+ )}
1631
+ </div>
1632
+
1633
+ {stageData.status === "completed" && (
1634
+ <CheckCircle2 className="h-5 w-5 text-emerald-500" />
1635
+ )}
1636
+ {stageData.status === "failed" && (
1637
+ <X className="h-5 w-5 text-red-500" />
1638
+ )}
1639
+ </div>
1640
+
1641
+ {/* Progress bar */}
1642
+ <div className="mt-3 h-1.5 bg-slate-200 rounded-full overflow-hidden">
1643
+ <motion.div
1644
+ initial={{ width: 0 }}
1645
+ animate={{
1646
+ width:
1647
+ stageData.status === "completed"
1648
+ ? "100%"
1649
+ : stageData.status === "failed"
1650
+ ? "60%"
1651
+ : "0%",
1652
+ }}
1653
+ transition={{ duration: 0.5, delay: 0.2 }}
1654
+ className={cn(
1655
+ "h-full rounded-full",
1656
+ stageData.status === "failed"
1657
+ ? "bg-red-500"
1658
+ : `bg-${config.color}-500`
1659
+ )}
1660
+ />
1661
+ </div>
1662
+ </div>
1663
+ );
1664
+ }
1665
+ )}
1666
+ </div>
1667
+
1668
+ {/* Total Time Summary */}
1669
+ <div className="mt-4 flex items-center justify-between p-4 bg-gradient-to-r from-indigo-50 to-violet-50 rounded-xl border border-indigo-100">
1670
+ <div className="flex items-center gap-3">
1671
+ <Clock className="h-5 w-5 text-indigo-600" />
1672
+ <div>
1673
+ <p className="text-sm font-medium text-slate-700">
1674
+ Total Processing Time
1675
+ </p>
1676
+ <p className="text-xs text-slate-500">
1677
+ From upload to output ready
1678
+ </p>
1679
+ </div>
1680
+ </div>
1681
+ <div className="text-right">
1682
+ <p className="text-2xl font-bold text-indigo-600">
1683
+ {formatTime(item.totalTime)}
1684
+ </p>
1685
+ <p className="text-xs text-slate-500">
1686
+ {item.status === "completed"
1687
+ ? "Completed successfully"
1688
+ : "Process failed"}
1689
+ </p>
1690
+ </div>
1691
+ </div>
1692
+ </div>
1693
+ </motion.div>
1694
+ )}
1695
+ </AnimatePresence>
1696
+ </motion.div>
1697
+ ))}
1698
+ {filteredHistory.length === 0 && !error && (
1699
+ <div className="text-center py-16">
1700
+ <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
1701
+ <FileText className="h-10 w-10 text-slate-300" />
1702
+ </div>
1703
+ <p className="text-slate-500 mb-2">
1704
+ {history.length === 0
1705
+ ? "No extraction history yet"
1706
+ : "No extractions match your filters"}
1707
+ </p>
1708
+ {history.length === 0 && (
1709
+ <p className="text-sm text-slate-400">
1710
+ Upload a document to get started
1711
+ </p>
1712
+ )}
1713
+ </div>
1714
+ )}
1715
+ </div>
1716
+ )}
1717
+ </div>
1718
+ </div>
1719
+ );
1720
+ }
1721
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/pages/ShareHandler.jsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import React, { useEffect, useState } from "react";
2
  import { useParams, useNavigate } from "react-router-dom";
3
  import { useAuth } from "@/contexts/AuthContext";
@@ -93,3 +94,100 @@ export default function ShareHandler() {
93
  return null;
94
  }
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  import React, { useEffect, useState } from "react";
3
  import { useParams, useNavigate } from "react-router-dom";
4
  import { useAuth } from "@/contexts/AuthContext";
 
94
  return null;
95
  }
96
 
97
+ =======
98
+ import React, { useEffect, useState } from "react";
99
+ import { useParams, useNavigate } from "react-router-dom";
100
+ import { useAuth } from "@/contexts/AuthContext";
101
+ import { accessSharedExtraction } from "@/services/api";
102
+ import LoginForm from "@/components/auth/LoginForm";
103
+
104
+ export default function ShareHandler() {
105
+ const { token } = useParams();
106
+ const navigate = useNavigate();
107
+ const { isAuthenticated, loading } = useAuth();
108
+ const [isProcessing, setIsProcessing] = useState(false);
109
+ const [error, setError] = useState(null);
110
+
111
+ useEffect(() => {
112
+ const processShare = async () => {
113
+ if (loading) return; // Wait for auth to load
114
+
115
+ if (!isAuthenticated) {
116
+ // User not logged in - they'll be shown login form
117
+ // After login, AuthContext will trigger a re-render and this will run again
118
+ return;
119
+ }
120
+
121
+ // User is authenticated, process the share
122
+ if (isProcessing) return; // Prevent duplicate calls
123
+ setIsProcessing(true);
124
+ setError(null);
125
+
126
+ try {
127
+ const result = await accessSharedExtraction(token);
128
+ if (result.success && result.extraction_id) {
129
+ // Redirect to history page with the extraction ID
130
+ navigate(`/history?extractionId=${result.extraction_id}`);
131
+ } else {
132
+ setError("Failed to access shared extraction");
133
+ }
134
+ } catch (err) {
135
+ console.error("Share access error:", err);
136
+ setError(err.message || "Failed to access shared extraction");
137
+ // Still redirect to history after 3 seconds
138
+ setTimeout(() => {
139
+ navigate("/history");
140
+ }, 3000);
141
+ } finally {
142
+ setIsProcessing(false);
143
+ }
144
+ };
145
+
146
+ processShare();
147
+ // eslint-disable-next-line react-hooks/exhaustive-deps
148
+ }, [token, isAuthenticated, loading]);
149
+
150
+ // Show login form if not authenticated
151
+ if (!isAuthenticated && !loading) {
152
+ return <LoginForm />;
153
+ }
154
+
155
+ // Show loading state while processing
156
+ if (isProcessing || loading) {
157
+ return (
158
+ <div className="min-h-screen flex items-center justify-center bg-[#FAFAFA]">
159
+ <div className="text-center">
160
+ <div className="h-16 w-16 mx-auto rounded-2xl bg-indigo-100 flex items-center justify-center mb-4 animate-pulse">
161
+ <div className="h-8 w-8 rounded-lg bg-indigo-600"></div>
162
+ </div>
163
+ <p className="text-slate-600">Loading shared extraction...</p>
164
+ </div>
165
+ </div>
166
+ );
167
+ }
168
+
169
+ // Show error state
170
+ if (error) {
171
+ return (
172
+ <div className="min-h-screen flex items-center justify-center bg-[#FAFAFA]">
173
+ <div className="text-center max-w-md mx-4">
174
+ <div className="h-16 w-16 mx-auto rounded-2xl bg-red-100 flex items-center justify-center mb-4">
175
+ <div className="h-8 w-8 rounded-lg bg-red-600"></div>
176
+ </div>
177
+ <h2 className="text-xl font-semibold text-slate-900 mb-2">Error</h2>
178
+ <p className="text-slate-600 mb-4">{error}</p>
179
+ <button
180
+ onClick={() => navigate("/history")}
181
+ className="px-4 py-2 bg-indigo-600 text-white rounded-lg hover:bg-indigo-700"
182
+ >
183
+ Go to History
184
+ </button>
185
+ </div>
186
+ </div>
187
+ );
188
+ }
189
+
190
+ return null;
191
+ }
192
+
193
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/services/api.js CHANGED
@@ -1,3 +1,4 @@
 
1
  /**
2
  * API service for communicating with the FastAPI backend
3
  */
@@ -171,3 +172,178 @@ export async function ping() {
171
  return await response.json();
172
  }
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  /**
3
  * API service for communicating with the FastAPI backend
4
  */
 
172
  return await response.json();
173
  }
174
 
175
+ =======
176
+ /**
177
+ * API service for communicating with the FastAPI backend
178
+ */
179
+
180
+ const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || "";
181
+
182
+ /**
183
+ * Get authorization headers with token
184
+ */
185
+ function getAuthHeaders() {
186
+ const token = localStorage.getItem("auth_token");
187
+ return token ? { Authorization: `Bearer ${token}` } : {};
188
+ }
189
+
190
+ /**
191
+ * Extract data from a document
192
+ * @param {File} file - The file to extract data from
193
+ * @param {string} keyFields - Optional comma-separated list of fields to extract
194
+ * @returns {Promise<Object>} Extraction result with fields, confidence, etc.
195
+ */
196
+ export async function extractDocument(file, keyFields = "") {
197
+ const formData = new FormData();
198
+ formData.append("file", file);
199
+ if (keyFields && keyFields.trim()) {
200
+ formData.append("key_fields", keyFields.trim());
201
+ }
202
+
203
+ const response = await fetch(`${API_BASE_URL}/api/extract`, {
204
+ method: "POST",
205
+ headers: getAuthHeaders(),
206
+ body: formData,
207
+ });
208
+
209
+ if (!response.ok) {
210
+ const errorData = await response.json().catch(() => ({
211
+ error: `HTTP ${response.status}: ${response.statusText}`,
212
+ }));
213
+ throw new Error(errorData.error || errorData.detail || "Extraction failed");
214
+ }
215
+
216
+ return await response.json();
217
+ }
218
+
219
+ /**
220
+ * Get extraction history
221
+ * @returns {Promise<Array>} Array of extraction records
222
+ */
223
+ export async function getHistory() {
224
+ const response = await fetch(`${API_BASE_URL}/api/history`, {
225
+ headers: getAuthHeaders(),
226
+ });
227
+
228
+ if (!response.ok) {
229
+ const errorData = await response.json().catch(() => ({
230
+ error: `HTTP ${response.status}: ${response.statusText}`,
231
+ }));
232
+ throw new Error(errorData.error || errorData.detail || "Failed to fetch history");
233
+ }
234
+
235
+ return await response.json();
236
+ }
237
+
238
+ /**
239
+ * Get a specific extraction by ID with full fields data
240
+ * @param {number} extractionId - The extraction ID
241
+ * @returns {Promise<Object>} Extraction result with fields
242
+ */
243
+ export async function getExtractionById(extractionId) {
244
+ const response = await fetch(`${API_BASE_URL}/api/extraction/${extractionId}`, {
245
+ headers: getAuthHeaders(),
246
+ });
247
+
248
+ if (!response.ok) {
249
+ const errorData = await response.json().catch(() => ({
250
+ error: `HTTP ${response.status}: ${response.statusText}`,
251
+ }));
252
+ throw new Error(errorData.error || errorData.detail || "Failed to fetch extraction");
253
+ }
254
+
255
+ return await response.json();
256
+ }
257
+
258
+ /**
259
+ * Create a shareable link for an extraction
260
+ * @param {number} extractionId - The extraction ID to share
261
+ * @returns {Promise<Object>} Share link result with share_link
262
+ */
263
+ export async function createShareLink(extractionId) {
264
+ const response = await fetch(`${API_BASE_URL}/api/share/link`, {
265
+ method: "POST",
266
+ headers: {
267
+ "Content-Type": "application/json",
268
+ ...getAuthHeaders(),
269
+ },
270
+ body: JSON.stringify({
271
+ extraction_id: extractionId,
272
+ }),
273
+ });
274
+
275
+ if (!response.ok) {
276
+ const errorData = await response.json().catch(() => ({
277
+ error: `HTTP ${response.status}: ${response.statusText}`,
278
+ }));
279
+ throw new Error(errorData.error || errorData.detail || "Failed to create share link");
280
+ }
281
+
282
+ return await response.json();
283
+ }
284
+
285
+ /**
286
+ * Share an extraction with another user(s)
287
+ * @param {number} extractionId - The extraction ID to share
288
+ * @param {string|string[]} recipientEmails - Recipient email address(es) - can be a single email or array of emails
289
+ * @returns {Promise<Object>} Share result
290
+ */
291
+ export async function shareExtraction(extractionId, recipientEmails) {
292
+ // Ensure recipient_emails is always an array
293
+ const emailsArray = Array.isArray(recipientEmails) ? recipientEmails : [recipientEmails];
294
+
295
+ const response = await fetch(`${API_BASE_URL}/api/share`, {
296
+ method: "POST",
297
+ headers: {
298
+ "Content-Type": "application/json",
299
+ ...getAuthHeaders(),
300
+ },
301
+ body: JSON.stringify({
302
+ extraction_id: extractionId,
303
+ recipient_emails: emailsArray,
304
+ }),
305
+ });
306
+
307
+ if (!response.ok) {
308
+ const errorData = await response.json().catch(() => ({
309
+ error: `HTTP ${response.status}: ${response.statusText}`,
310
+ }));
311
+ throw new Error(errorData.error || errorData.detail || "Failed to share extraction");
312
+ }
313
+
314
+ return await response.json();
315
+ }
316
+
317
+ /**
318
+ * Access a shared extraction by token
319
+ * @param {string} token - Share token
320
+ * @returns {Promise<Object>} Share access result with extraction_id
321
+ */
322
+ export async function accessSharedExtraction(token) {
323
+ const response = await fetch(`${API_BASE_URL}/api/share/${token}`, {
324
+ headers: getAuthHeaders(),
325
+ });
326
+
327
+ if (!response.ok) {
328
+ const errorData = await response.json().catch(() => ({
329
+ error: `HTTP ${response.status}: ${response.statusText}`,
330
+ }));
331
+ throw new Error(errorData.error || errorData.detail || "Failed to access shared extraction");
332
+ }
333
+
334
+ return await response.json();
335
+ }
336
+
337
+ /**
338
+ * Health check endpoint
339
+ * @returns {Promise<Object>} Status object
340
+ */
341
+ export async function ping() {
342
+ const response = await fetch(`${API_BASE_URL}/ping`);
343
+ if (!response.ok) {
344
+ throw new Error("Backend is not available");
345
+ }
346
+ return await response.json();
347
+ }
348
+
349
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
frontend/src/services/auth.js CHANGED
@@ -1,3 +1,4 @@
 
1
  /**
2
  * Authentication service for Firebase and OTP authentication
3
  */
@@ -109,3 +110,116 @@ export async function logout() {
109
  return Promise.resolve();
110
  }
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  /**
3
  * Authentication service for Firebase and OTP authentication
4
  */
 
110
  return Promise.resolve();
111
  }
112
 
113
+ =======
114
+ /**
115
+ * Authentication service for Firebase and OTP authentication
116
+ */
117
+
118
+ const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || "";
119
+
120
+ /**
121
+ * Get the current authenticated user
122
+ * @returns {Promise<Object>} User object
123
+ */
124
+ export async function getCurrentUser() {
125
+ const token = localStorage.getItem("auth_token");
126
+ if (!token) {
127
+ throw new Error("No token found");
128
+ }
129
+
130
+ const response = await fetch(`${API_BASE_URL}/api/auth/me`, {
131
+ method: "GET",
132
+ headers: {
133
+ Authorization: `Bearer ${token}`,
134
+ },
135
+ });
136
+
137
+ if (!response.ok) {
138
+ if (response.status === 401) {
139
+ localStorage.removeItem("auth_token");
140
+ }
141
+ const errorData = await response.json().catch(() => ({}));
142
+ throw new Error(errorData.detail || "Failed to get user");
143
+ }
144
+
145
+ return await response.json();
146
+ }
147
+
148
+ /**
149
+ * Login with Firebase ID token
150
+ * @param {string} idToken - Firebase ID token
151
+ * @returns {Promise<Object>} Response with token and user
152
+ */
153
+ export async function firebaseLogin(idToken) {
154
+ const response = await fetch(`${API_BASE_URL}/api/auth/firebase/login`, {
155
+ method: "POST",
156
+ headers: {
157
+ "Content-Type": "application/json",
158
+ },
159
+ body: JSON.stringify({ id_token: idToken }),
160
+ });
161
+
162
+ if (!response.ok) {
163
+ const errorData = await response.json().catch(() => ({}));
164
+ throw new Error(errorData.detail || "Firebase login failed");
165
+ }
166
+
167
+ return await response.json();
168
+ }
169
+
170
+ /**
171
+ * Request OTP for email login
172
+ * @param {string} email - Email address
173
+ * @returns {Promise<Object>} Response with success message
174
+ */
175
+ export async function requestOTP(email) {
176
+ const response = await fetch(`${API_BASE_URL}/api/auth/otp/request`, {
177
+ method: "POST",
178
+ headers: {
179
+ "Content-Type": "application/json",
180
+ },
181
+ body: JSON.stringify({ email }),
182
+ });
183
+
184
+ if (!response.ok) {
185
+ const errorData = await response.json().catch(() => ({}));
186
+ throw new Error(errorData.detail || "Failed to send OTP");
187
+ }
188
+
189
+ return await response.json();
190
+ }
191
+
192
+ /**
193
+ * Verify OTP and login
194
+ * @param {string} email - Email address
195
+ * @param {string} otp - OTP code
196
+ * @returns {Promise<Object>} Response with token and user
197
+ */
198
+ export async function verifyOTP(email, otp) {
199
+ const response = await fetch(`${API_BASE_URL}/api/auth/otp/verify`, {
200
+ method: "POST",
201
+ headers: {
202
+ "Content-Type": "application/json",
203
+ },
204
+ body: JSON.stringify({ email, otp }),
205
+ });
206
+
207
+ if (!response.ok) {
208
+ const errorData = await response.json().catch(() => ({}));
209
+ throw new Error(errorData.detail || "OTP verification failed");
210
+ }
211
+
212
+ return await response.json();
213
+ }
214
+
215
+ /**
216
+ * Logout the current user
217
+ * @returns {Promise<void>}
218
+ */
219
+ export async function logout() {
220
+ // For JWT tokens, logout is handled client-side by removing the token
221
+ // No server-side logout needed
222
+ return Promise.resolve();
223
+ }
224
+
225
+ >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d