EZOFISOCR

Running

App Files Files Community

Seth commited on Jan 4

Commit

ced5eff

1 Parent(s): cb28f8c

update

Browse files

Files changed (10) hide show

backend/app/apollo_service.py +0 -448
backend/app/auth.py +0 -96
backend/app/auth_routes.py +1 -240
backend/app/email_validator.py +0 -65
backend/app/firebase_auth.py +0 -96
backend/app/models.py +1 -107
backend/app/monday_service.py +0 -395
backend/app/openrouter_client.py +0 -865
backend/app/otp_service.py +0 -201
backend/app/schemas.py +0 -29

backend/app/apollo_service.py CHANGED Viewed

@@ -1,449 +1,3 @@
-<<<<<<< HEAD
-"""
-Apollo.io API service for creating contacts, enriching contact data, and adding them to sequences.
-Reference:
-- Create contact: https://docs.apollo.io/reference/create-a-contact
-- Add to sequence: https://docs.apollo.io/reference/add-contacts-to-sequence
-- Enrich person: https://docs.apollo.io/reference/enrich-people-data
-"""
-import os
-import httpx
-from typing import Optional, Dict, Any
-APOLLO_API_KEY = os.environ.get("APOLLO_API_KEY", "")
-APOLLO_API_URL = "https://api.apollo.io/api/v1"
-APOLLO_TRIAL_LIST_NAME = "VPR TRIAL LEADS"
-# Allow list ID to be set directly via environment variable (more reliable than lookup)
-APOLLO_TRIAL_LIST_ID = os.environ.get("APOLLO_TRIAL_LIST_ID", None)
-# Sequence ID for adding contacts to email sequences (preferred over lists)
-APOLLO_TRIAL_SEQUENCE_ID = os.environ.get("APOLLO_TRIAL_SEQUENCE_ID", None)
-async def get_list_id(list_name: Optional[str] = None) -> Optional[str]:
-    """
-    Get Apollo list ID. First tries environment variable, then attempts API lookup.
-    Args:
-        list_name: Name of the list (for lookup if env var not set)
-    Returns:
-        List ID as string if found, None otherwise
-    """
-    # First, try to use the list ID from environment variable (most reliable)
-    if APOLLO_TRIAL_LIST_ID:
-        # Apollo list IDs are typically hexadecimal strings (MongoDB ObjectIds)
-        # Accept them as strings, just strip whitespace
-        list_id = str(APOLLO_TRIAL_LIST_ID).strip()
-        if list_id:
-            print(f"[INFO] Using Apollo list ID from environment variable: {list_id}")
-            return list_id
-        else:
-            print(f"[WARNING] APOLLO_TRIAL_LIST_ID is empty")
-    # If no env var, try to look up by name (this may not work if API endpoint is different)
-    if not list_name or not APOLLO_API_KEY:
-        return None
-    # Note: The /lists endpoint may not be available in all Apollo API versions
-    # Try alternative: search for lists using a different endpoint
-    try:
-        async with httpx.AsyncClient() as client:
-            # Try the lists endpoint (may return 404 in some API versions)
-            response = await client.get(
-                f"{APOLLO_API_URL}/lists",
-                headers={
-                    "Content-Type": "application/json",
-                    "Cache-Control": "no-cache",
-                    "X-Api-Key": APOLLO_API_KEY
-                },
-                timeout=10.0
-            )
-            if response.status_code == 200:
-                data = response.json()
-                lists = data.get("lists", [])
-                for list_item in lists:
-                    if list_item.get("name") == list_name:
-                        list_id = list_item.get("id")
-                        print(f"[INFO] Found Apollo list '{list_name}' with ID: {list_id}")
-                        # Return as string (Apollo IDs are typically hex strings)
-                        return str(list_id) if list_id else None
-                print(f"[WARNING] Apollo list '{list_name}' not found in available lists")
-            else:
-                print(f"[WARNING] Apollo lists endpoint returned {response.status_code}, cannot lookup list by name")
-    except Exception as e:
-        print(f"[WARNING] Failed to fetch Apollo list ID: {str(e)}")
-    return None
-async def add_contact_to_sequence(contact_id: str, sequence_id: str) -> bool:
-    """
-    Add a contact to an Apollo.io email sequence.
-    Args:
-        contact_id: The Apollo contact ID
-        sequence_id: The Apollo sequence ID
-    Returns:
-        True if contact was successfully added to sequence, False otherwise
-    """
-    if not APOLLO_API_KEY:
-        print("[WARNING] APOLLO_API_KEY not set, skipping sequence enrollment")
-        return False
-    try:
-        async with httpx.AsyncClient() as client:
-            response = await client.post(
-                f"{APOLLO_API_URL}/sequence_contacts",
-                headers={
-                    "Content-Type": "application/json",
-                    "Cache-Control": "no-cache",
-                    "X-Api-Key": APOLLO_API_KEY
-                },
-                json={
-                    "sequence_id": sequence_id,
-                    "contact_id": contact_id
-                },
-                timeout=10.0
-            )
-            if response.status_code in [200, 201]:
-                print(f"[INFO] Successfully added contact {contact_id} to sequence {sequence_id}")
-                return True
-            else:
-                error_data = response.text
-                print(f"[ERROR] Failed to add contact to sequence: {response.status_code} - {error_data}")
-                return False
-    except httpx.HTTPStatusError as e:
-        print(f"[ERROR] Apollo API HTTP error adding to sequence: {e.response.status_code} - {e.response.text}")
-        return False
-    except Exception as e:
-        print(f"[ERROR] Failed to add contact to sequence: {str(e)}")
-        return False
-async def create_apollo_contact(
-    email: str,
-    first_name: Optional[str] = None,
-    last_name: Optional[str] = None,
-    organization_name: Optional[str] = None,
-    title: Optional[str] = None,
-    list_name: Optional[str] = None,
-    sequence_id: Optional[str] = None
-) -> bool:
-    """
-    Create a contact in Apollo.io and optionally add to a sequence or list.
-    Args:
-        email: Contact email address (required)
-        first_name: Contact first name
-        last_name: Contact last name
-        organization_name: Organization name
-        title: Job title
-        list_name: Name of the list to add contact to (defaults to APOLLO_TRIAL_LIST_NAME)
-        sequence_id: ID of the sequence to add contact to (preferred over list)
-    Returns:
-        True if contact created successfully, False otherwise
-    Raises:
-        ValueError: If APOLLO_API_KEY is not set
-    """
-    if not APOLLO_API_KEY:
-        print("[WARNING] APOLLO_API_KEY not set, skipping Apollo contact creation")
-        return False
-    # Use default list name if not provided
-    if list_name is None:
-        list_name = APOLLO_TRIAL_LIST_NAME
-    # Parse name if full name is provided but first/last are not
-    if not first_name and not last_name:
-        # Try to extract from email or use email prefix
-        email_prefix = email.split('@')[0]
-        if '.' in email_prefix:
-            parts = email_prefix.split('.')
-            first_name = parts[0].capitalize() if parts else None
-            last_name = parts[1].capitalize() if len(parts) > 1 else None
-        else:
-            first_name = email_prefix.capitalize()
-    # Extract organization domain from email
-    organization_domain = None
-    if '@' in email:
-        organization_domain = email.split('@')[1]
-    # Prepare contact data
-    contact_data: Dict[str, Any] = {
-        "email": email.lower(),
-        "run_dedupe": True  # Prevent duplicate contacts
-    }
-    if first_name:
-        contact_data["first_name"] = first_name
-    if last_name:
-        contact_data["last_name"] = last_name
-    if organization_name:
-        contact_data["organization_name"] = organization_name
-    if organization_domain:
-        contact_data["organization_domain"] = organization_domain
-    if title:
-        contact_data["title"] = title
-    try:
-        async with httpx.AsyncClient() as client:
-            # Get the list ID if list_name is provided
-            list_ids = []
-            target_list_id = None  # Store for later use
-            if list_name:
-                list_id = await get_list_id(list_name)
-                if list_id:
-                    target_list_id = list_id  # Store for verification later
-                    # Apollo API accepts list_ids as an array of strings (hex IDs)
-                    list_ids = [str(list_id)]
-                    contact_data["list_ids"] = list_ids
-                    print(f"[INFO] Adding contact to list ID: {list_id}")
-                else:
-                    print(f"[WARNING] Could not find list '{list_name}'. Set APOLLO_TRIAL_LIST_ID environment variable with the list ID, or create contact without list assignment")
-            # Log the payload being sent (for debugging)
-            print(f"[DEBUG] Creating Apollo contact with payload: {contact_data}")
-            # Create the contact
-            response = await client.post(
-                f"{APOLLO_API_URL}/contacts",
-                headers={
-                    "Content-Type": "application/json",
-                    "Cache-Control": "no-cache",
-                    "X-Api-Key": APOLLO_API_KEY
-                },
-                json=contact_data,
-                timeout=10.0
-            )
-            # Log the full response for debugging
-            print(f"[DEBUG] Apollo API response status: {response.status_code}")
-            try:
-                response_json = response.json()
-                print(f"[DEBUG] Apollo API response (full): {response_json}")
-            except:
-                print(f"[DEBUG] Apollo API response body (text): {response.text[:1000]}")  # First 1000 chars
-            if response.status_code == 200 or response.status_code == 201:
-                result = response.json()
-                contact = result.get("contact", {})
-                contact_id = contact.get("id")
-                print(f"[INFO] Successfully created Apollo contact: {email} (ID: {contact_id})")
-                # Priority: Add to sequence if sequence_id is provided (this is supported by API)
-                target_sequence_id = sequence_id or APOLLO_TRIAL_SEQUENCE_ID
-                if contact_id and target_sequence_id:
-                    print(f"[INFO] Adding contact to sequence: {target_sequence_id}")
-                    sequence_success = await add_contact_to_sequence(contact_id, target_sequence_id)
-                    if sequence_success:
-                        print(f"[INFO] ✓ Contact successfully enrolled in sequence")
-                    else:
-                        print(f"[WARNING] Failed to add contact to sequence, but contact was created")
-                # Fallback: Try to add to list (API limitation - may not work)
-                if list_ids and contact_id and target_list_id and not target_sequence_id:
-                    print(f"[INFO] Contact created with list_ids parameter: {list_ids}")
-                    print(f"[INFO] ⚠️  Apollo.io API Limitation: The API does not return list_ids in responses,")
-                    print(f"[INFO]    so we cannot verify if the contact was added to the list via API.")
-                    print(f"[INFO]    Please verify manually in Apollo.io that contact '{email}' is in list '{list_name or target_list_id}'")
-                    print(f"[INFO]    Consider using sequences instead (APOLLO_TRIAL_SEQUENCE_ID) for better API support.")
-                return True
-            else:
-                error_data = response.text
-                print(f"[ERROR] Failed to create Apollo contact: {response.status_code} - {error_data}")
-                return False
-    except httpx.HTTPStatusError as e:
-        print(f"[ERROR] Apollo API HTTP error: {e.response.status_code} - {e.response.text}")
-        return False
-    except Exception as e:
-        print(f"[ERROR] Failed to create Apollo contact: {str(e)}")
-        return False
-async def enrich_contact_by_email(email: str) -> Optional[Dict[str, Any]]:
-    """
-    Enrich contact data from Apollo.io using email address.
-    Args:
-        email: Contact email address
-    Returns:
-        Dictionary with enriched contact data, or None if not found
-    """
-    if not APOLLO_API_KEY:
-        print("[WARNING] APOLLO_API_KEY not set, skipping Apollo enrichment")
-        return None
-    try:
-        async with httpx.AsyncClient() as client:
-            # Try people/match endpoint first (for exact email match)
-            print(f"[DEBUG] Attempting Apollo.io enrichment for {email} via /people/match endpoint")
-            response = await client.post(
-                f"{APOLLO_API_URL}/people/match",
-                headers={
-                    "Content-Type": "application/json",
-                    "Cache-Control": "no-cache",
-                    "X-Api-Key": APOLLO_API_KEY
-                },
-                json={
-                    "email": email.lower()
-                    # Note: reveal_phone_number requires webhook_url, so we skip it for now
-                },
-                timeout=10.0
-            )
-            print(f"[DEBUG] Apollo.io /people/match response status: {response.status_code}")
-            if response.status_code == 200:
-                data = response.json()
-                print(f"[DEBUG] Apollo.io /people/match response data keys: {list(data.keys())}")
-                person = data.get("person", {})
-                if person:
-                    print(f"[DEBUG] Found person data in Apollo.io response")
-                    # Extract enriched data
-                    enriched_data = {
-                        "first_name": person.get("first_name"),
-                        "last_name": person.get("last_name"),
-                        "title": person.get("title"),
-                        "phone_number": person.get("phone_numbers", [{}])[0].get("raw_number") if person.get("phone_numbers") else None,
-                        "linkedin_url": person.get("linkedin_url"),
-                        "headline": person.get("headline"),
-                        "organization_name": person.get("organization", {}).get("name") if person.get("organization") else None,
-                        "organization_website": person.get("organization", {}).get("website_url") if person.get("organization") else None,
-                        "organization_address": None,  # May need to parse from organization data
-                    }
-                    # Try to get organization address
-                    if person.get("organization"):
-                        org = person.get("organization", {})
-                        address_parts = []
-                        if org.get("street_address"):
-                            address_parts.append(org.get("street_address"))
-                        if org.get("city"):
-                            address_parts.append(org.get("city"))
-                        if org.get("state"):
-                            address_parts.append(org.get("state"))
-                        if org.get("postal_code"):
-                            address_parts.append(org.get("postal_code"))
-                        if org.get("country"):
-                            address_parts.append(org.get("country"))
-                        if address_parts:
-                            enriched_data["organization_address"] = ", ".join(address_parts)
-                    print(f"[INFO] Successfully enriched contact data for {email} from Apollo.io")
-                    return enriched_data
-                else:
-                    print(f"[DEBUG] Apollo.io /people/match returned 200 but no person data found")
-            elif response.status_code == 404:
-                print(f"[DEBUG] Apollo.io /people/match returned 404 - contact not found in database")
-            elif response.status_code == 401:
-                print(f"[ERROR] Apollo.io API authentication failed - check your API key")
-                try:
-                    error_data = response.json()
-                    print(f"[ERROR] Apollo.io error details: {error_data}")
-                except:
-                    print(f"[ERROR] Apollo.io error response: {response.text}")
-            else:
-                print(f"[DEBUG] Apollo.io /people/match returned status {response.status_code}")
-                try:
-                    error_data = response.json()
-                    print(f"[DEBUG] Apollo.io response: {error_data}")
-                except:
-                    print(f"[DEBUG] Apollo.io response text: {response.text[:500]}")
-            # If match fails, try the new search endpoint (api_search)
-            print(f"[DEBUG] Attempting Apollo.io enrichment for {email} via /mixed_people/api_search endpoint")
-            search_response = await client.post(
-                f"{APOLLO_API_URL}/mixed_people/api_search",
-                headers={
-                    "Content-Type": "application/json",
-                    "Cache-Control": "no-cache",
-                    "X-Api-Key": APOLLO_API_KEY
-                },
-                json={
-                    "email": email.lower(),
-                    "per_page": 1
-                },
-                timeout=10.0
-            )
-            print(f"[DEBUG] Apollo.io /mixed_people/api_search response status: {search_response.status_code}")
-            if search_response.status_code == 200:
-                search_data = search_response.json()
-                print(f"[DEBUG] Apollo.io /mixed_people/api_search response data keys: {list(search_data.keys())}")
-                people = search_data.get("people", [])
-                print(f"[DEBUG] Found {len(people)} people in search results")
-                if people:
-                    person = people[0]
-                    # Extract enriched data (same structure as above)
-                    enriched_data = {
-                        "first_name": person.get("first_name"),
-                        "last_name": person.get("last_name"),
-                        "title": person.get("title"),
-                        "phone_number": person.get("phone_numbers", [{}])[0].get("raw_number") if person.get("phone_numbers") else None,
-                        "linkedin_url": person.get("linkedin_url"),
-                        "headline": person.get("headline"),
-                        "organization_name": person.get("organization", {}).get("name") if person.get("organization") else None,
-                        "organization_website": person.get("organization", {}).get("website_url") if person.get("organization") else None,
-                        "organization_address": None,
-                    }
-                    if person.get("organization"):
-                        org = person.get("organization", {})
-                        address_parts = []
-                        if org.get("street_address"):
-                            address_parts.append(org.get("street_address"))
-                        if org.get("city"):
-                            address_parts.append(org.get("city"))
-                        if org.get("state"):
-                            address_parts.append(org.get("state"))
-                        if org.get("postal_code"):
-                            address_parts.append(org.get("postal_code"))
-                        if org.get("country"):
-                            address_parts.append(org.get("country"))
-                        if address_parts:
-                            enriched_data["organization_address"] = ", ".join(address_parts)
-                    print(f"[INFO] Successfully enriched contact data for {email} from Apollo.io (via search)")
-                    return enriched_data
-                else:
-                    print(f"[DEBUG] Apollo.io /mixed_people/api_search returned 200 but no people in results")
-            elif search_response.status_code == 404:
-                print(f"[DEBUG] Apollo.io /mixed_people/api_search returned 404 - contact not found")
-            elif search_response.status_code == 401:
-                print(f"[ERROR] Apollo.io API authentication failed on search - check your API key")
-                try:
-                    error_data = search_response.json()
-                    print(f"[ERROR] Apollo.io search error details: {error_data}")
-                except:
-                    print(f"[ERROR] Apollo.io search error response: {search_response.text}")
-            else:
-                print(f"[DEBUG] Apollo.io /mixed_people/api_search returned status {search_response.status_code}")
-                try:
-                    error_data = search_response.json()
-                    print(f"[DEBUG] Apollo.io search response: {error_data}")
-                except:
-                    print(f"[DEBUG] Apollo.io search response text: {search_response.text[:500]}")
-            print(f"[INFO] No contact data found in Apollo.io for {email} - contact may not exist in Apollo's database")
-            return None
-    except httpx.HTTPStatusError as e:
-        print(f"[ERROR] Apollo API HTTP error during enrichment: {e.response.status_code} - {e.response.text}")
-        return None
-    except Exception as e:
-        print(f"[ERROR] Failed to enrich contact from Apollo.io: {str(e)}")
-        return None
-=======
 """
 Apollo.io API service for creating contacts, enriching contact data, and adding them to sequences.
 Reference:
@@ -887,5 +441,3 @@ async def enrich_contact_by_email(email: str) -> Optional[Dict[str, Any]]:
     except Exception as e:
         print(f"[ERROR] Failed to enrich contact from Apollo.io: {str(e)}")
         return None
->>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

 """
 Apollo.io API service for creating contacts, enriching contact data, and adding them to sequences.
 Reference:
     except Exception as e:
         print(f"[ERROR] Failed to enrich contact from Apollo.io: {str(e)}")
         return None

backend/app/auth.py CHANGED Viewed

@@ -1,97 +1,3 @@
-<<<<<<< HEAD
-import os
-import jwt
-from datetime import datetime, timedelta
-from typing import Optional
-from fastapi import Depends, HTTPException, status
-from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
-from sqlalchemy.orm import Session
-from .db import SessionLocal
-from .models import User
-# JWT Configuration
-SECRET_KEY = os.environ.get("JWT_SECRET_KEY", "your-secret-key-change-in-production")
-ALGORITHM = "HS256"
-ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7  # 7 days
-security = HTTPBearer()
-def get_db():
-    """Database dependency."""
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
-def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
-    """Create a JWT access token."""
-    to_encode = data.copy()
-    # Ensure 'sub' (subject) is a string, not an integer
-    if "sub" in to_encode:
-        to_encode["sub"] = str(to_encode["sub"])
-    if expires_delta:
-        expire = datetime.utcnow() + expires_delta
-    else:
-        expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
-    to_encode.update({"exp": expire})
-    encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
-    return encoded_jwt
-def verify_token(token: str) -> dict:
-    """Verify and decode a JWT token."""
-    try:
-        payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
-        return payload
-    except jwt.ExpiredSignatureError:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Token has expired",
-        )
-    except jwt.InvalidTokenError:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Could not validate credentials",
-        )
-def get_current_user(
-    credentials: HTTPAuthorizationCredentials = Depends(security),
-    db: Session = Depends(get_db)
-) -> User:
-    """Get the current authenticated user from JWT token."""
-    token = credentials.credentials
-    payload = verify_token(token)
-    user_id: int = payload.get("sub")
-    if user_id is None:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Could not validate credentials",
-        )
-    # Convert user_id back to integer for database query
-    try:
-        user_id_int = int(user_id)
-    except (ValueError, TypeError):
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Invalid user ID in token",
-        )
-    user = db.query(User).filter(User.id == user_id_int).first()
-    if user is None:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="User not found",
-        )
-    return user
-=======
 import os
 import jwt
 from datetime import datetime, timedelta
@@ -183,5 +89,3 @@ def get_current_user(
         )
     return user
->>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

 import os
 import jwt
 from datetime import datetime, timedelta
         )
     return user

backend/app/auth_routes.py CHANGED Viewed

@@ -1,4 +1,3 @@
-<<<<<<< HEAD
 import os
 from fastapi import APIRouter, Depends, HTTPException, Body
 from pydantic import BaseModel, EmailStr
@@ -241,6 +240,7 @@ async def get_current_user_info(current_user: User = Depends(get_current_user)):
     }
 @router.post("/api/auth/api-key/create")
 async def create_api_key(
     request: CreateAPIKeyRequest,
@@ -345,242 +345,3 @@ async def delete_api_key(
         "success": True,
         "message": "API key deactivated successfully"
     }
-=======
-import os
-from fastapi import APIRouter, Depends, HTTPException, Body
-from pydantic import BaseModel, EmailStr
-from sqlalchemy.orm import Session
-from .models import User
-from .auth import create_access_token, get_current_user
-from .firebase_auth import verify_firebase_token
-from .otp_service import request_otp, verify_otp
-from .email_validator import validate_business_email, is_business_email
-from .db import SessionLocal
-def get_db():
-    """Database dependency."""
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
-router = APIRouter()
-class FirebaseLoginRequest(BaseModel):
-    id_token: str
-class OTPRequestRequest(BaseModel):
-    email: EmailStr
-class OTPVerifyRequest(BaseModel):
-    email: EmailStr
-    otp: str
-@router.post("/api/auth/firebase/login")
-async def firebase_login(
-    request: FirebaseLoginRequest,
-    db: Session = Depends(get_db)
-):
-    """
-    Login with Firebase ID token.
-    Validates business email and creates/updates user.
-    """
-    try:
-        # Verify Firebase token
-        user_info = await verify_firebase_token(request.id_token)
-        email = user_info.get('email')
-        if not email:
-            raise HTTPException(status_code=400, detail="Email not found in Firebase token")
-        # Validate business email
-        if not is_business_email(email):
-            raise HTTPException(
-                status_code=400,
-                detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
-            )
-        # Get or create user
-        user = db.query(User).filter(
-            (User.email == email.lower()) | (User.firebase_uid == user_info['uid'])
-        ).first()
-        if not user:
-            user = User(
-                email=email.lower(),
-                name=user_info.get('name'),
-                picture=user_info.get('picture'),
-                firebase_uid=user_info['uid'],
-                auth_method='firebase',
-                email_verified=True
-            )
-            db.add(user)
-            db.commit()
-            db.refresh(user)
-            print(f"[INFO] New user created via Firebase: {email}")
-            # Enrich contact data from Apollo.io and update Brevo + Monday.com
-            try:
-                from .apollo_service import enrich_contact_by_email
-                from .brevo_service import create_brevo_contact, BREVO_TRIAL_LIST_ID
-                from .monday_service import create_monday_lead
-                # Enrich contact data from Apollo.io
-                enriched_data = await enrich_contact_by_email(email)
-                # Use enriched data if available, otherwise use basic data
-                first_name = enriched_data.get("first_name") if enriched_data else None
-                last_name = enriched_data.get("last_name") if enriched_data else None
-                org_name = enriched_data.get("organization_name") if enriched_data else None
-                # Fallback to Firebase data if Apollo didn't provide it
-                if not first_name or not last_name:
-                    full_name = user_info.get('name', '')
-                    if full_name:
-                        name_parts = full_name.strip().split(' ', 1)
-                        first_name = first_name or (name_parts[0] if name_parts else None)
-                        last_name = last_name or (name_parts[1] if len(name_parts) > 1 else None)
-                if not org_name:
-                    org_domain = email.split('@')[1] if '@' in email else None
-                    org_name = org_domain.split('.')[0].capitalize() if org_domain else None
-                # Update Brevo contact with enriched data
-                await create_brevo_contact(
-                    email=email,
-                    first_name=first_name,
-                    last_name=last_name,
-                    organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
-                    phone_number=enriched_data.get("phone_number") if enriched_data else None,
-                    linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
-                    title=enriched_data.get("title") if enriched_data else None,
-                    headline=enriched_data.get("headline") if enriched_data else None,
-                    organization_website=enriched_data.get("organization_website") if enriched_data else None,
-                    organization_address=enriched_data.get("organization_address") if enriched_data else None,
-                    list_id=BREVO_TRIAL_LIST_ID
-                )
-                # Create lead in Monday.com
-                await create_monday_lead(
-                    email=email,
-                    first_name=first_name,
-                    last_name=last_name,
-                    phone_number=enriched_data.get("phone_number") if enriched_data else None,
-                    linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
-                    title=enriched_data.get("title") if enriched_data else None,
-                    headline=enriched_data.get("headline") if enriched_data else None,
-                    organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
-                    organization_website=enriched_data.get("organization_website") if enriched_data else None,
-                    organization_address=enriched_data.get("organization_address") if enriched_data else None,
-                )
-            except Exception as e:
-                # Don't fail user creation if integrations fail
-                print(f"[WARNING] Failed to enrich/update contact for {email}: {str(e)}")
-        else:
-            # Update user info
-            user.firebase_uid = user_info['uid']
-            user.email_verified = True
-            user.name = user_info.get('name', user.name)
-            user.picture = user_info.get('picture', user.picture)
-            if user.auth_method != 'firebase':
-                user.auth_method = 'firebase'
-            db.commit()
-            print(f"[INFO] User logged in via Firebase: {email}")
-        # Generate JWT token
-        token = create_access_token(data={"sub": user.id})
-        return {
-            "token": token,
-            "user": {
-                "id": user.id,
-                "email": user.email,
-                "name": user.name,
-                "picture": user.picture,
-                "auth_method": user.auth_method
-            }
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        print(f"[ERROR] Firebase login failed: {str(e)}")
-        raise HTTPException(status_code=400, detail=f"Authentication failed: {str(e)}")
-@router.post("/api/auth/otp/request")
-async def request_otp_endpoint(
-    request: OTPRequestRequest,
-    db: Session = Depends(get_db)
-):
-    """
-    Request OTP for email login.
-    Validates business email before sending OTP.
-    """
-    try:
-        # Validate business email
-        validate_business_email(request.email)
-        # Request OTP
-        result = await request_otp(request.email, db)
-        return result
-    except HTTPException:
-        raise
-    except Exception as e:
-        print(f"[ERROR] OTP request failed: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Failed to send OTP: {str(e)}")
-@router.post("/api/auth/otp/verify")
-async def verify_otp_endpoint(
-    request: OTPVerifyRequest,
-    db: Session = Depends(get_db)
-):
-    """
-    Verify OTP and login.
-    Validates business email and OTP code.
-    """
-    try:
-        # Validate business email
-        validate_business_email(request.email)
-        # Verify OTP
-        user = await verify_otp(request.email, request.otp, db)
-        # Generate JWT token
-        token = create_access_token(data={"sub": user.id})
-        return {
-            "token": token,
-            "user": {
-                "id": user.id,
-                "email": user.email,
-                "name": user.name,
-                "picture": user.picture,
-                "auth_method": user.auth_method
-            }
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        print(f"[ERROR] OTP verification failed: {str(e)}")
-        raise HTTPException(status_code=400, detail=f"OTP verification failed: {str(e)}")
-@router.get("/api/auth/me")
-async def get_current_user_info(current_user: User = Depends(get_current_user)):
-    """Get current user information."""
-    return {
-        "id": current_user.id,
-        "email": current_user.email,
-        "name": current_user.name,
-        "picture": current_user.picture,
-        "auth_method": current_user.auth_method,
-    }
->>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

 import os
 from fastapi import APIRouter, Depends, HTTPException, Body
 from pydantic import BaseModel, EmailStr
     }
+# API Key Management Endpoints (newly added for external API access)
 @router.post("/api/auth/api-key/create")
 async def create_api_key(
     request: CreateAPIKeyRequest,
         "success": True,
         "message": "API key deactivated successfully"
     }

backend/app/email_validator.py CHANGED Viewed

@@ -1,66 +1,3 @@
-<<<<<<< HEAD
-"""
-Email validation utilities to ensure only business emails are allowed.
-"""
-from fastapi import HTTPException
-# List of personal email domains to block
-PERSONAL_EMAIL_DOMAINS = {
-    'gmail.com', 'yahoo.com', 'hotmail.com', 'outlook.com',
-    'aol.com', 'icloud.com', 'mail.com', 'protonmail.com',
-    'yandex.com', 'zoho.com', 'gmx.com', 'live.com', 'msn.com',
-    'me.com', 'mac.com', 'yahoo.co.uk', 'yahoo.co.jp', 'yahoo.fr',
-    'yahoo.de', 'yahoo.it', 'yahoo.es', 'yahoo.in', 'yahoo.com.au',
-    'gmail.co.uk', 'gmail.fr', 'gmail.de', 'gmail.it', 'gmail.es',
-    'gmail.in', 'gmail.com.au', 'hotmail.co.uk', 'hotmail.fr',
-    'hotmail.de', 'hotmail.it', 'hotmail.es', 'outlook.co.uk',
-    'outlook.fr', 'outlook.de', 'outlook.it', 'outlook.es',
-    'rediffmail.com', 'sina.com', 'qq.com', '163.com', '126.com',
-    'mail.ru', 'inbox.com', 'fastmail.com', 'tutanota.com',
-    'hey.com', 'pm.me'
-}
-def is_business_email(email: str) -> bool:
-    """
-    Check if email is a business email (not personal).
-    Args:
-        email: Email address to validate
-    Returns:
-        True if business email, False if personal email
-    """
-    if not email or '@' not in email:
-        return False
-    domain = email.split('@')[1].lower().strip()
-    return domain not in PERSONAL_EMAIL_DOMAINS
-def validate_business_email(email: str) -> None:
-    """
-    Raise exception if email is not a business email.
-    Args:
-        email: Email address to validate
-    Raises:
-        HTTPException: If email is a personal email domain
-    """
-    if not email:
-        raise HTTPException(
-            status_code=400,
-            detail="Email address is required"
-        )
-    if not is_business_email(email):
-        raise HTTPException(
-            status_code=400,
-            detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
-        )
-=======
 """
 Email validation utilities to ensure only business emails are allowed.
 """
@@ -121,5 +58,3 @@ def validate_business_email(email: str) -> None:
             status_code=400,
             detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
         )
->>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

 """
 Email validation utilities to ensure only business emails are allowed.
 """
             status_code=400,
             detail="Only business email addresses are allowed. Personal email accounts (Gmail, Yahoo, Outlook, etc.) are not permitted. Please use your work email address."
         )

backend/app/firebase_auth.py CHANGED Viewed

@@ -1,97 +1,3 @@
-<<<<<<< HEAD
-"""
-Firebase Authentication utilities.
-"""
-import os
-import json
-import firebase_admin
-from firebase_admin import auth, credentials
-from fastapi import HTTPException
-# Initialize Firebase Admin SDK
-_firebase_initialized = False
-def initialize_firebase():
-    """Initialize Firebase Admin SDK."""
-    global _firebase_initialized
-    if _firebase_initialized:
-        return
-    if not firebase_admin._apps:
-        # Try to get service account from environment variable (JSON string)
-        service_account_json = os.environ.get("FIREBASE_SERVICE_ACCOUNT_JSON")
-        if service_account_json:
-            try:
-                service_account_info = json.loads(service_account_json)
-                cred = credentials.Certificate(service_account_info)
-                firebase_admin.initialize_app(cred)
-                _firebase_initialized = True
-                print("[INFO] Firebase Admin SDK initialized from environment variable")
-                return
-            except json.JSONDecodeError:
-                print("[WARNING] Failed to parse FIREBASE_SERVICE_ACCOUNT_JSON")
-        # Try to get service account from file path
-        service_account_path = os.environ.get("FIREBASE_SERVICE_ACCOUNT_KEY")
-        if service_account_path and os.path.exists(service_account_path):
-            cred = credentials.Certificate(service_account_path)
-            firebase_admin.initialize_app(cred)
-            _firebase_initialized = True
-            print(f"[INFO] Firebase Admin SDK initialized from file: {service_account_path}")
-            return
-        # Try to use default credentials (for Google Cloud environments)
-        try:
-            firebase_admin.initialize_app()
-            _firebase_initialized = True
-            print("[INFO] Firebase Admin SDK initialized with default credentials")
-            return
-        except Exception as e:
-            print(f"[WARNING] Firebase initialization failed: {e}")
-            raise HTTPException(
-                status_code=500,
-                detail="Firebase not configured. Please set FIREBASE_SERVICE_ACCOUNT_JSON or FIREBASE_SERVICE_ACCOUNT_KEY environment variable."
-            )
-async def verify_firebase_token(id_token: str) -> dict:
-    """
-    Verify Firebase ID token and return user info.
-    Args:
-        id_token: Firebase ID token from client
-    Returns:
-        Dictionary with user information (uid, email, name, picture)
-    Raises:
-        HTTPException: If token is invalid
-    """
-    initialize_firebase()
-    try:
-        decoded_token = auth.verify_id_token(id_token)
-        return {
-            'uid': decoded_token['uid'],
-            'email': decoded_token.get('email'),
-            'name': decoded_token.get('name'),
-            'picture': decoded_token.get('picture'),
-        }
-    except ValueError as e:
-        raise HTTPException(
-            status_code=401,
-            detail=f"Invalid Firebase token: {str(e)}"
-        )
-    except Exception as e:
-        raise HTTPException(
-            status_code=401,
-            detail=f"Firebase authentication failed: {str(e)}"
-        )
-=======
 """
 Firebase Authentication utilities.
 """
@@ -183,5 +89,3 @@ async def verify_firebase_token(id_token: str) -> dict:
             status_code=401,
             detail=f"Firebase authentication failed: {str(e)}"
         )
->>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

 """
 Firebase Authentication utilities.
 """
             status_code=401,
             detail=f"Firebase authentication failed: {str(e)}"
         )

backend/app/models.py CHANGED Viewed

@@ -1,4 +1,3 @@
-<<<<<<< HEAD
 from sqlalchemy import Column, Integer, String, Float, DateTime, Text, ForeignKey, Boolean
 from sqlalchemy.orm import relationship
 from sqlalchemy.sql import func
@@ -39,7 +38,7 @@ class User(Base):
         primaryjoin="User.id == ExtractionRecord.user_id"
     )
-    # Relationship to API keys
     api_keys = relationship(
         "APIKey",
         back_populates="user",
@@ -135,108 +134,3 @@ class APIKey(Base):
         "User",
         back_populates="api_keys"
     )
-=======
-from sqlalchemy import Column, Integer, String, Float, DateTime, Text, ForeignKey, Boolean
-from sqlalchemy.orm import relationship
-from sqlalchemy.sql import func
-from .db import Base
-class User(Base):
-    """
-    Stores user information from Firebase or OTP authentication.
-    """
-    __tablename__ = "users"
-    id = Column(Integer, primary_key=True, index=True)
-    email = Column(String, unique=True, index=True, nullable=False)
-    name = Column(String, nullable=True)
-    picture = Column(String, nullable=True)
-    # Auth method: 'firebase' or 'otp'
-    auth_method = Column(String, default='firebase')
-    # Firebase-specific
-    firebase_uid = Column(String, unique=True, index=True, nullable=True)
-    # OTP-specific
-    email_verified = Column(Boolean, default=False)
-    created_at = Column(
-        DateTime(timezone=True),
-        server_default=func.now(),
-    )
-    # Relationship to extraction records (explicitly specify user_id as the foreign key)
-    # Note: primaryjoin must be specified because ExtractionRecord has multiple foreign keys to User
-    extractions = relationship(
-        "ExtractionRecord",
-        back_populates="user",
-        primaryjoin="User.id == ExtractionRecord.user_id"
-    )
-class ExtractionRecord(Base):
-    """
-    Stores one extraction run so the History page can show past jobs.
-    We'll fill it from the /api/extract endpoint later.
-    """
-    __tablename__ = "extractions"
-    id = Column(Integer, primary_key=True, index=True)
-    user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
-    file_name = Column(String, index=True)
-    file_type = Column(String)
-    file_size = Column(String)
-    status = Column(String)              # "completed" | "failed"
-    confidence = Column(Float)           # overall confidence (0–100)
-    fields_extracted = Column(Integer)   # number of fields extracted
-    total_time_ms = Column(Integer)      # total processing time in ms
-    raw_output = Column(Text)            # JSON string from the model
-    file_base64 = Column(Text, nullable=True)  # Base64 encoded original file for preview
-    error_message = Column(Text, nullable=True)
-    created_at = Column(
-        DateTime(timezone=True),
-        server_default=func.now(),
-    )
-    # Relationship to user (explicitly specify user_id as the foreign key)
-    # Note: primaryjoin must be specified because ExtractionRecord has multiple foreign keys to User
-    user = relationship(
-        "User",
-        back_populates="extractions",
-        primaryjoin="ExtractionRecord.user_id == User.id"
-    )
-    # Track if this extraction was shared (original extraction ID)
-    shared_from_extraction_id = Column(Integer, ForeignKey("extractions.id"), nullable=True, index=True)
-    shared_by_user_id = Column(Integer, ForeignKey("users.id"), nullable=True, index=True)
-class ShareToken(Base):
-    """
-    Stores share tokens for sharing extractions with other users.
-    """
-    __tablename__ = "share_tokens"
-    id = Column(Integer, primary_key=True, index=True)
-    token = Column(String, unique=True, index=True, nullable=False)  # Unique share token
-    extraction_id = Column(Integer, ForeignKey("extractions.id"), nullable=False, index=True)
-    sender_user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
-    recipient_email = Column(String, nullable=True, index=True)  # Nullable for public share links
-    expires_at = Column(DateTime(timezone=True), nullable=True)  # Optional expiration
-    accessed = Column(Boolean, default=False)  # Track if link was accessed
-    accessed_at = Column(DateTime(timezone=True), nullable=True)
-    accessed_by_user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
-    created_at = Column(
-        DateTime(timezone=True),
-        server_default=func.now(),
-    )
->>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

 from sqlalchemy import Column, Integer, String, Float, DateTime, Text, ForeignKey, Boolean
 from sqlalchemy.orm import relationship
 from sqlalchemy.sql import func
         primaryjoin="User.id == ExtractionRecord.user_id"
     )
+    # Relationship to API keys (newly added for API key authentication)
     api_keys = relationship(
         "APIKey",
         back_populates="user",
         "User",
         back_populates="api_keys"
     )

backend/app/monday_service.py CHANGED Viewed

@@ -1,396 +1,3 @@
-<<<<<<< HEAD
-"""
-Monday.com API service for creating leads with automatic field matching.
-Reference: https://developer.monday.com/api-reference/docs
-"""
-import os
-import httpx
-import json
-from typing import Optional, Dict, Any, List, Tuple
-from difflib import SequenceMatcher
-MONDAY_API_KEY = os.environ.get("MONDAY_API_KEY", "")
-MONDAY_API_URL = "https://api.monday.com/v2"
-MONDAY_BOARD_ID = os.environ.get("MONDAY_BOARD_ID", None)  # Your "New Leads" board ID
-# Cache for board columns to avoid repeated API calls
-_board_columns_cache: Dict[str, List[Dict[str, Any]]] = {}
-def _calculate_similarity(str1: str, str2: str) -> float:
-    """
-    Calculate similarity between two strings using SequenceMatcher.
-    Returns a value between 0.0 and 1.0.
-    """
-    return SequenceMatcher(None, str1.lower(), str2.lower()).ratio()
-def _find_best_column_match(
-    field_name: str,
-    available_columns: List[Dict[str, Any]],
-    min_similarity: float = 0.3
-) -> Optional[Tuple[str, str, float]]:
-    """
-    Find the best matching column for a field name using semantic similarity.
-    Args:
-        field_name: The field name to match (e.g., "first_name", "email")
-        available_columns: List of column dicts with 'id' and 'title' keys
-        min_similarity: Minimum similarity threshold (0.0 to 1.0)
-    Returns:
-        Tuple of (column_id, column_title, similarity_score) or None if no match found
-    """
-    best_match = None
-    best_score = 0.0
-    # Normalize field name for matching
-    normalized_field = field_name.lower().replace("_", " ").replace("-", " ")
-    # Common field name variations
-    field_variations = [
-        normalized_field,
-        field_name.lower(),
-        field_name.replace("_", ""),
-    ]
-    # Add common synonyms
-    synonyms = {
-        "first_name": ["first name", "firstname", "fname", "given name"],
-        "last_name": ["last name", "lastname", "lname", "surname", "family name"],
-        "email": ["email address", "email", "e-mail", "mail"],
-        "phone_number": ["phone", "phone number", "telephone", "mobile", "cell"],
-        "linkedin_url": ["linkedin", "linkedin profile", "linkedin url", "linkedin link"],
-        "title": ["job title", "position", "role", "job"],
-        "headline": ["headline", "tagline", "bio"],
-        "organization_name": ["company", "organization", "org", "company name", "employer"],
-        "organization_website": ["website", "company website", "url", "web"],
-        "organization_address": ["address", "company address", "location"],
-    }
-    if field_name in synonyms:
-        field_variations.extend(synonyms[field_name])
-    for column in available_columns:
-        column_title = column.get("title", "").lower()
-        column_id = column.get("id", "")
-        if not column_title or not column_id:
-            continue
-        # Calculate similarity for each variation
-        for variation in field_variations:
-            score = _calculate_similarity(variation, column_title)
-            if score > best_score:
-                best_score = score
-                best_match = (column_id, column.get("title", ""), score)
-    if best_match and best_score >= min_similarity:
-        return best_match
-    return None
-async def _get_board_columns(board_id: str) -> List[Dict[str, Any]]:
-    """
-    Fetch board columns from Monday.com API.
-    Args:
-        board_id: Monday.com board ID
-    Returns:
-        List of column dictionaries with 'id', 'title', and 'type' keys
-    """
-    # Check cache first
-    if board_id in _board_columns_cache:
-        return _board_columns_cache[board_id]
-    if not MONDAY_API_KEY:
-        print("[WARNING] MONDAY_API_KEY not set, cannot fetch board columns")
-        return []
-    query = """
-    query ($boardId: ID!) {
-        boards(ids: [$boardId]) {
-            columns {
-                id
-                title
-                type
-            }
-        }
-    }
-    """
-    headers = {
-        "Authorization": MONDAY_API_KEY,
-        "Content-Type": "application/json"
-    }
-    try:
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            response = await client.post(
-                MONDAY_API_URL,
-                json={
-                    "query": query,
-                    "variables": {"boardId": board_id}
-                },
-                headers=headers
-            )
-            if response.status_code == 200:
-                result = response.json()
-                if result.get("data") and result["data"].get("boards"):
-                    boards = result["data"]["boards"]
-                    if boards and boards[0].get("columns"):
-                        columns = boards[0]["columns"]
-                        # Cache the result
-                        _board_columns_cache[board_id] = columns
-                        print(f"[INFO] Fetched {len(columns)} columns from Monday.com board {board_id}")
-                        return columns
-                elif result.get("errors"):
-                    print(f"[ERROR] Failed to fetch board columns: {result['errors']}")
-            else:
-                print(f"[ERROR] Failed to fetch board columns: {response.status_code} - {response.text}")
-    except Exception as e:
-        print(f"[ERROR] Exception while fetching board columns: {str(e)}")
-    return []
-def _format_column_value(value: Any, column_type: str, column_id: Optional[str] = None) -> Any:
-    """
-    Format a value according to Monday.com column type.
-    Args:
-        value: The value to format
-        column_type: Monday.com column type (email, phone, link, text, etc.)
-        column_id: Column ID (for special handling)
-    Returns:
-        For email/phone/link: Python dict object
-        For text/other types: Plain string
-    """
-    if value is None:
-        return ""
-    value_str = str(value)
-    if column_type == "email":
-        # Monday.com email format requires dict object (will be JSON encoded later)
-        return {"email": value_str, "text": value_str}
-    elif column_type == "phone":
-        return {"phone": value_str, "countryShortName": "US"}
-    elif column_type == "link":
-        # If it's already a URL, use it; otherwise create a link
-        if value_str.startswith("http://") or value_str.startswith("https://"):
-            return {"url": value_str, "text": value_str}
-        else:
-            return {"url": f"https://{value_str}", "text": value_str}
-    else:
-        # Text, status, and other types - just return the string
-        return value_str
-async def create_monday_lead(
-    email: str,
-    first_name: Optional[str] = None,
-    last_name: Optional[str] = None,
-    phone_number: Optional[str] = None,
-    linkedin_url: Optional[str] = None,
-    title: Optional[str] = None,
-    headline: Optional[str] = None,
-    organization_name: Optional[str] = None,
-    organization_website: Optional[str] = None,
-    organization_address: Optional[str] = None,
-    board_id: Optional[str] = None
-) -> bool:
-    """
-    Create a new lead item in Monday.com board.
-    Args:
-        email: Contact email address (required)
-        first_name: Contact first name
-        last_name: Contact last name
-        phone_number: Phone number
-        linkedin_url: LinkedIn profile URL
-        title: Job title
-        headline: Professional headline
-        organization_name: Company name
-        organization_website: Company website
-        organization_address: Company address
-        board_id: Monday.com board ID as string (defaults to MONDAY_BOARD_ID env var)
-    Returns:
-        True if lead created successfully, False otherwise
-    """
-    if not MONDAY_API_KEY:
-        print("[WARNING] MONDAY_API_KEY not set, skipping Monday.com lead creation")
-        return False
-    target_board_id = board_id or MONDAY_BOARD_ID
-    if not target_board_id:
-        print("[WARNING] MONDAY_BOARD_ID not set, skipping Monday.com lead creation")
-        return False
-    # Prepare item name (use full name or email)
-    item_name = email
-    if first_name and last_name:
-        item_name = f"{first_name} {last_name}"
-    elif first_name:
-        item_name = first_name
-    elif last_name:
-        item_name = last_name
-    # Fetch board columns to automatically match fields
-    print(f"[INFO] Fetching Monday.com board columns for automatic field matching...")
-    board_columns = await _get_board_columns(str(target_board_id))
-    if not board_columns:
-        print("[WARNING] Could not fetch board columns, skipping Monday.com lead creation")
-        return False
-    # Create a mapping of column IDs to column types for formatting
-    column_types = {col["id"]: col.get("type", "text") for col in board_columns}
-    # Prepare data fields to map
-    data_fields = {
-        "email": email,
-        "first_name": first_name,
-        "last_name": last_name,
-        "phone_number": phone_number,
-        "linkedin_url": linkedin_url,
-        "title": title,
-        "headline": headline,
-        "organization_name": organization_name,
-        "organization_website": organization_website,
-        "organization_address": organization_address,
-    }
-    # Automatically match fields to columns using semantic similarity
-    column_values = {}
-    matched_fields = []
-    # Track which columns have been matched to handle duplicates (e.g., first_name and last_name -> Name)
-    column_matches = {}  # column_id -> (field_name, value)
-    for field_name, field_value in data_fields.items():
-        if not field_value:
-            continue
-        match = _find_best_column_match(field_name, board_columns)
-        if match:
-            column_id, column_title, similarity = match
-            column_type = column_types.get(column_id, "text")
-            # Handle special case: if first_name and last_name both match to the same "Name" column
-            if column_id in column_matches:
-                existing_field, existing_value = column_matches[column_id]
-                # If both first_name and last_name match to the same column, combine them
-                if (field_name in ["first_name", "last_name"] and
-                    existing_field in ["first_name", "last_name"] and
-                    field_name != existing_field):
-                    # Combine first and last name
-                    if field_name == "first_name":
-                        combined_value = f"{field_value} {existing_value}"
-                    else:
-                        combined_value = f"{existing_value} {field_value}"
-                    formatted_value = _format_column_value(combined_value, column_type, column_id)
-                    column_values[column_id] = formatted_value
-                    matched_fields.append(f"{existing_field}+{field_name} -> {column_title} (combined)")
-                    print(f"[INFO] Combined '{existing_field}' and '{field_name}' to column '{column_title}' (ID: {column_id})")
-                    continue
-                else:
-                    # Different fields matching to same column - use the one with higher similarity
-                    print(f"[DEBUG] Column '{column_title}' already matched to '{existing_field}', skipping '{field_name}'")
-                    continue
-            formatted_value = _format_column_value(field_value, column_type, column_id)
-            column_values[column_id] = formatted_value
-            column_matches[column_id] = (field_name, field_value)
-            matched_fields.append(f"{field_name} -> {column_title} (similarity: {similarity:.2f})")
-            print(f"[INFO] Matched '{field_name}' to column '{column_title}' (ID: {column_id}, type: {column_type}, value: {formatted_value[:100] if len(str(formatted_value)) > 100 else formatted_value})")
-        else:
-            print(f"[DEBUG] No suitable column match found for '{field_name}' (skipping)")
-    if not column_values:
-        print("[WARNING] No fields could be matched to board columns")
-        return False
-    print(f"[INFO] Successfully matched {len(matched_fields)} fields to Monday.com columns")
-    # Convert column_values to JSON string for GraphQL mutation
-    # Monday.com expects column values as a JSON string where:
-    # - Text columns: plain string values
-    # - Email/Phone/Link columns: dict objects (properly JSON encoded)
-    column_values_json = json.dumps(column_values)
-    print(f"[DEBUG] Monday.com column_values JSON: {column_values_json[:500]}")
-    # GraphQL mutation
-    # Note: Monday.com uses ID! (string) type for board_id, not Int!
-    mutation = """
-    mutation ($boardId: ID!, $itemName: String!, $columnValues: JSON!) {
-        create_item (board_id: $boardId, item_name: $itemName, column_values: $columnValues) {
-            id
-        }
-    }
-    """
-    # Convert board_id to string (Monday.com expects ID! which is a string)
-    board_id_str = str(target_board_id)
-    variables = {
-        "boardId": board_id_str,
-        "itemName": item_name,
-        "columnValues": column_values_json
-    }
-    headers = {
-        "Authorization": MONDAY_API_KEY,
-        "Content-Type": "application/json"
-    }
-    try:
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            response = await client.post(
-                MONDAY_API_URL,
-                json={
-                    "query": mutation,
-                    "variables": variables
-                },
-                headers=headers
-            )
-            if response.status_code == 200:
-                result = response.json()
-                if result.get("data") and result["data"].get("create_item"):
-                    item_id = result["data"]["create_item"].get("id")
-                    print(f"[INFO] Successfully created Monday.com lead: {item_name} (ID: {item_id})")
-                    return True
-                elif result.get("errors"):
-                    errors = result.get("errors", [])
-                    for error in errors:
-                        error_msg = error.get("message", "Unknown error")
-                        error_path = error.get("path", [])
-                        print(f"[ERROR] Monday.com API error: {error_msg}")
-                        if error_path:
-                            print(f"[ERROR] Error path: {error_path}")
-                    # Log full error for debugging
-                    print(f"[DEBUG] Full Monday.com error response: {json.dumps(errors, indent=2)}")
-                    return False
-                else:
-                    print(f"[ERROR] Unexpected Monday.com API response: {result}")
-                    return False
-            else:
-                error_data = response.text
-                print(f"[ERROR] Failed to create Monday.com lead: {response.status_code} - {error_data}")
-                return False
-    except httpx.HTTPStatusError as e:
-        print(f"[ERROR] Monday.com API HTTP error: {e.response.status_code} - {e.response.text}")
-        return False
-    except Exception as e:
-        print(f"[ERROR] Failed to create Monday.com lead: {str(e)}")
-        return False
-=======
 """
 Monday.com API service for creating leads with automatic field matching.
 Reference: https://developer.monday.com/api-reference/docs
@@ -781,5 +388,3 @@ async def create_monday_lead(
     except Exception as e:
         print(f"[ERROR] Failed to create Monday.com lead: {str(e)}")
         return False
->>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

 """
 Monday.com API service for creating leads with automatic field matching.
 Reference: https://developer.monday.com/api-reference/docs
     except Exception as e:
         print(f"[ERROR] Failed to create Monday.com lead: {str(e)}")
         return False

backend/app/openrouter_client.py CHANGED Viewed

@@ -1,867 +1,3 @@
-<<<<<<< HEAD
-import os
-import base64
-import json
-import re
-import time
-import asyncio
-from io import BytesIO
-from typing import Any, Dict, List, Optional, Tuple
-import httpx
-try:
-    import fitz  # PyMuPDF
-    from PIL import Image
-    PDF_SUPPORT = True
-except ImportError as e:
-    PDF_SUPPORT = False
-    print(f"[WARNING] PDF support libraries not available: {e}. PDF conversion will not work.")
-# RunPod Serverless OCR Configuration
-RUNPOD_ENDPOINT = os.environ.get("RUNPOD_ENDPOINT", "https://api.runpod.ai/v2/j2jvf8t6n0rk5c/run")
-RUNPOD_API_KEY = os.environ.get("RUNPOD_API_KEY", "rpa_0UJOK33ZO7SID9B3ASFSKKPUHNPBQC5Z2128RB4O4qi9ts")
-# Extract endpoint ID from endpoint URL for status polling
-# URL format: https://api.runpod.ai/v2/{endpoint_id}/run
-_endpoint_id = RUNPOD_ENDPOINT.split("/v2/")[1].split("/")[0] if "/v2/" in RUNPOD_ENDPOINT else None
-RUNPOD_STATUS_ENDPOINT = f"https://api.runpod.ai/v2/{_endpoint_id}/status" if _endpoint_id else None
-def _pdf_to_images(pdf_bytes: bytes) -> List[bytes]:
-    """
-    Convert PDF pages to PNG images.
-    Returns a list of PNG image bytes, one per page.
-    """
-    if not PDF_SUPPORT:
-        raise RuntimeError("PyMuPDF not installed. Cannot convert PDF to images.")
-    pdf_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
-    images = []
-    print(f"[INFO] PDF has {len(pdf_doc)} page(s)")
-    for page_num in range(len(pdf_doc)):
-        page = pdf_doc[page_num]
-        # Render page to image (zoom factor 2 for better quality)
-        mat = fitz.Matrix(2.0, 2.0)  # 2x zoom for better quality
-        pix = page.get_pixmap(matrix=mat)
-        # Convert to PIL Image
-        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
-        # Resize if too large to avoid GPU memory issues (max 1920px on longest side)
-        max_size = 1920
-        w, h = img.size
-        if w > max_size or h > max_size:
-            if w > h:
-                new_w = max_size
-                new_h = int(h * (max_size / w))
-            else:
-                new_h = max_size
-                new_w = int(w * (max_size / h))
-            img = img.resize((new_w, new_h), Image.LANCZOS)
-            print(f"[INFO] Resized page {page_num + 1} from {w}x{h} to {new_w}x{new_h}")
-        else:
-            print(f"[INFO] Converted page {page_num + 1} to image ({w}x{h})")
-        # Convert to JPEG bytes (better compression)
-        img_bytes = BytesIO()
-        img.save(img_bytes, format="JPEG", quality=95)
-        images.append(img_bytes.getvalue())
-    pdf_doc.close()
-    return images
-def _image_bytes_to_base64(image_bytes: bytes) -> str:
-    """Convert image bytes to base64 data URL (JPEG format)."""
-    b64 = base64.b64encode(image_bytes).decode("utf-8")
-    data_url = f"data:image/jpeg;base64,{b64}"
-    print(f"[DEBUG] Base64 encoded image: {len(image_bytes)} bytes -> {len(data_url)} chars")
-    return data_url
-def _parse_markdown_table(text: str) -> Optional[Tuple[List[str], List[List[str]]]]:
-    """
-    Parse a markdown table from text.
-    Returns (headers, rows) if table found, None otherwise.
-    Handles various table formats including malformed ones.
-    """
-    lines = [line.strip() for line in text.split('\n')]
-    # Find potential table start (line with multiple | and actual text content)
-    table_start = None
-    for i, line in enumerate(lines):
-        if '|' in line and line.count('|') >= 2:
-            # Skip separator lines (only |, -, :, spaces)
-            if re.match(r'^[\s\|\-:]+$', line):
-                continue
-            # Check if line has meaningful text (not just | characters)
-            cells = [cell.strip() for cell in line.split('|')]
-            if cells and not cells[0]:
-                cells = cells[1:]
-            if cells and not cells[-1]:
-                cells = cells[:-1]
-            # Must have at least 2 columns with some text
-            meaningful_cells = [c for c in cells if len(c) > 0]
-            if len(meaningful_cells) >= 2:
-                table_start = i
-                break
-    if table_start is None:
-        return None
-    # Find table end (first non-empty line without | after table start)
-    table_end = None
-    for i in range(table_start + 1, len(lines)):
-        line = lines[i]
-        if not line:  # Empty line, continue
-            continue
-        if '|' not in line:
-            # Non-empty line without | means table ended
-            table_end = i
-            break
-    if table_end is None:
-        table_end = len(lines)
-    table_lines = lines[table_start:table_end]
-    # Find the actual header row (should have meaningful text, not just | or separators)
-    headers = None
-    header_idx = None
-    for i, line in enumerate(table_lines):
-        if not line or '|' not in line:
-            continue
-        # Skip separator lines (lines with only |, -, :, spaces)
-        if re.match(r'^[\s\|\-:]+$', line):
-            continue
-        # Check if this line has meaningful content (not just | characters)
-        cells = [cell.strip() for cell in line.split('|')]
-        # Remove empty cells at start/end
-        if cells and not cells[0]:
-            cells = cells[1:]
-        if cells and not cells[-1]:
-            cells = cells[:-1]
-        # Header should have at least 3 columns and meaningful text
-        if len(cells) >= 3:
-            # Check if cells have actual text (not just empty or single char)
-            meaningful_cells = [c for c in cells if len(c) > 1]
-            if len(meaningful_cells) >= 3:
-                headers = cells
-                header_idx = i
-                break
-    if not headers or header_idx is None:
-        return None
-    # Parse data rows (skip separator line after header if present)
-    rows = []
-    num_columns = len(headers)
-    for i in range(header_idx + 1, len(table_lines)):
-        line = table_lines[i]
-        if not line:
-            continue
-        # Skip separator lines
-        if re.match(r'^[\s\|\-:]+$', line):
-            continue
-        if '|' not in line:
-            # No more table rows
-            break
-        cells = [cell.strip() for cell in line.split('|')]
-        # Remove empty cells at start/end
-        if cells and not cells[0]:
-            cells = cells[1:]
-        if cells and not cells[-1]:
-            cells = cells[:-1]
-        # Only add rows that match header column count (allow some flexibility)
-        if len(cells) == num_columns or (len(cells) >= num_columns - 1 and len(cells) <= num_columns + 1):
-            # Pad or trim to match header count
-            if len(cells) < num_columns:
-                cells.extend([''] * (num_columns - len(cells)))
-            elif len(cells) > num_columns:
-                cells = cells[:num_columns]
-            # Only add if row has at least one non-empty cell
-            if any(cell for cell in cells):
-                rows.append(cells)
-    if not rows:
-        return None
-    return (headers, rows)
-def _extract_metadata(text: str) -> Dict[str, str]:
-    """
-    Extract metadata from document header text.
-    Looks for title, office, notice number, and description.
-    """
-    metadata = {
-        "title": "",
-        "office": "",
-        "notice_no": "",
-        "description": ""
-    }
-    lines = [line.strip() for line in text.split('\n') if line.strip()]
-    # Extract office (usually first non-empty line)
-    if lines:
-        metadata["office"] = lines[0]
-    # Look for notice number pattern (like "पत्रक सं- 1239" or "सं- 1239")
-    notice_pattern = r'(?:पत्रक\s+)?सं[-\s:]*(\d+)'
-    for line in lines[:10]:  # Check first 10 lines
-        match = re.search(notice_pattern, line)
-        if match:
-            metadata["notice_no"] = match.group(1)
-            break
-    # Look for title - usually in quotes or contains specific keywords
-    # Check for quoted text first
-    quoted_title = re.search(r'["""]([^"""]+)["""]', text[:1000])
-    if quoted_title:
-        metadata["title"] = quoted_title.group(1).strip()
-    else:
-        # Look for title patterns
-        title_keywords = ['सम्पत्ति', 'सूचना', 'विज्ञप्ति', 'नाम परिवर्तन']
-        for line in lines[:5]:
-            if any(keyword in line for keyword in title_keywords):
-                # Extract the title phrase
-                title_match = re.search(r'(सम्पत्ति[^।]*|सूचना[^।]*|विज्ञप्ति[^।]*)', line)
-                if title_match:
-                    metadata["title"] = title_match.group(1).strip()
-                    break
-    # Extract description (text before table, usually contains key phrases)
-    description_keywords = ['नाम परिवर्तन', 'अधिनियम', 'धारा', 'प्रकाशन', 'आवेदन']
-    description_parts = []
-    for i, line in enumerate(lines[:15]):  # Check first 15 lines
-        if any(keyword in line for keyword in description_keywords):
-            description_parts.append(line)
-            # Get a few surrounding lines for context
-            if i > 0:
-                description_parts.insert(0, lines[i-1])
-            if i < len(lines) - 1:
-                description_parts.append(lines[i+1])
-            break
-    if description_parts:
-        description = ' '.join(description_parts).strip()
-        if len(description) > 30:  # Only if substantial
-            # Clean up and limit length
-            description = re.sub(r'\s+', ' ', description)
-            metadata["description"] = description[:300]  # Limit length
-    return metadata
-def _parse_model_response(response_text: str) -> Tuple[str, Dict[str, Any]]:
-    """
-    Parse model response to extract text and metadata.
-    The model may return text and metadata in various formats.
-    Returns: (extracted_text, metadata_dict)
-    """
-    metadata = {}
-    text = response_text
-    # Try to find JSON metadata section
-    # Look for METADATA: or metadata: section
-    metadata_patterns = [
-        r'METADATA:\s*\n?\s*({.*?})(?:\n\n|\nTEXT|$)',
-        r'metadata:\s*\n?\s*({.*?})(?:\n\n|\nTEXT|$)',
-        r'METADATA:\s*\n?\s*```json\s*({.*?})\s*```',
-        r'METADATA:\s*\n?\s*```\s*({.*?})\s*```',
-    ]
-    for pattern in metadata_patterns:
-        match = re.search(pattern, response_text, re.DOTALL | re.IGNORECASE)
-        if match:
-            try:
-                metadata_json = match.group(1).strip()
-                metadata = json.loads(metadata_json)
-                # Remove metadata section from text
-                text = response_text[:match.start()] + response_text[match.end():]
-                break
-            except (json.JSONDecodeError, IndexError):
-                continue
-    # If no JSON found, try to extract metadata from structured text format
-    if not metadata:
-        # Look for key-value pairs in METADATA section
-        metadata_section = re.search(r'METADATA:\s*\n(.*?)(?:\n\n|\nTEXT|$)', response_text, re.DOTALL | re.IGNORECASE)
-        if metadata_section:
-            metadata_text = metadata_section.group(1)
-            # Parse key-value pairs
-            for line in metadata_text.split('\n'):
-                if ':' in line:
-                    parts = line.split(':', 1)
-                    if len(parts) == 2:
-                        key = parts[0].strip().lower().replace(' ', '_')
-                        value = parts[1].strip()
-                        if value:
-                            metadata[key] = value
-    # Extract TEXT section if present
-    text_match = re.search(r'TEXT:\s*\n(.*?)(?:\n\nMETADATA|$)', response_text, re.DOTALL | re.IGNORECASE)
-    if text_match:
-        text = text_match.group(1).strip()
-    else:
-        # If no TEXT section, remove METADATA section if found
-        text = re.sub(r'METADATA:.*', '', response_text, flags=re.DOTALL | re.IGNORECASE).strip()
-    # Clean up text
-    text = text.strip()
-    # Clean up metadata - remove empty values
-    metadata = {k: v for k, v in metadata.items() if v and str(v).strip()}
-    return text, metadata
-def _extract_footer_notes(text: str) -> List[str]:
-    """
-    Extract footer notes from document.
-    Usually appears after the table.
-    """
-    notes = []
-    # Find table end
-    lines = text.split('\n')
-    table_end_idx = len(lines)
-    for i, line in enumerate(lines):
-        if '|' in line:
-            # Find last table line
-            j = i + 1
-            while j < len(lines) and ('|' in lines[j] or re.match(r'^[\s\|\-:]+$', lines[j])):
-                j += 1
-            table_end_idx = j
-            break
-    # Extract footer text (after table)
-    footer_lines = lines[table_end_idx:]
-    footer_text = '\n'.join(footer_lines).strip()
-    # Split into sentences/notes
-    # Look for sentences ending with period, exclamation, or specific keywords
-    sentences = re.split(r'[।\.!]\s+', footer_text)
-    for sentence in sentences:
-        sentence = sentence.strip()
-        if len(sentence) > 20:  # Only substantial notes
-            # Clean up
-            sentence = re.sub(r'\s+', ' ', sentence)
-            if sentence:
-                notes.append(sentence)
-    # Limit to most relevant notes (usually 2-4)
-    return notes[:5]
-def _parse_text_with_tables(text: str, page_metadata: Dict[str, Any] = None) -> Dict[str, Any]:
-    """
-    Parse text and extract structured data including tables.
-    Uses model-extracted metadata if provided, otherwise falls back to basic extraction.
-    Returns structured JSON format with metadata, table, and footer_notes.
-    """
-    result = {
-        "text": text,  # Keep original text
-        "metadata": page_metadata if page_metadata else {},
-        "table": [],
-        "footer_notes": []
-    }
-    # Check if text contains a table
-    table_data = _parse_markdown_table(text)
-    if table_data:
-        headers, rows = table_data
-        print(f"[INFO] Found table with {len(headers)} columns and {len(rows)} rows")
-        # Use provided metadata or extract basic metadata as fallback
-        if not result["metadata"]:
-            result["metadata"] = _extract_metadata(text)
-        # Map headers to field names using original header text
-        # Keep original language, just make valid JSON keys and handle duplicates
-        header_mapping = {}
-        header_counts = {}  # Track occurrences of each header
-        for i, header in enumerate(headers):
-            header_clean = header.strip()
-            # Create a valid JSON key from the original header
-            # Remove special characters that aren't valid in JSON keys, but keep the text
-            # Replace spaces and special chars with underscores, but preserve the original text
-            header_key = header_clean
-            # Track how many times we've seen this exact header
-            if header_key not in header_counts:
-                header_counts[header_key] = 0
-            header_counts[header_key] += 1
-            # If this header appears multiple times, append a number
-            if header_counts[header_key] > 1:
-                header_key = f"{header_key}_{header_counts[header_key]}"
-            # Clean the key to be valid for JSON (remove/replace problematic characters)
-            # Keep the original text but make it JSON-safe
-            header_key = re.sub(r'[^\w\s\u0900-\u097F]', '', header_key)  # Keep Unicode Hindi chars
-            header_key = re.sub(r'\s+', '_', header_key)  # Replace spaces with underscores
-            # If key is empty after cleaning, use column index
-            if not header_key:
-                header_key = f"column_{i+1}"
-            header_mapping[i] = header_key
-        # Parse table rows - each row becomes a separate section
-        table_rows_dict = {}
-        for idx, row in enumerate(rows, start=1):
-            row_dict = {}
-            for i, header_idx in header_mapping.items():
-                if i < len(row):
-                    row_dict[header_idx] = row[i].strip()
-            if row_dict:
-                # Each row is a separate section: row_1, row_2, etc.
-                table_rows_dict[f"row_{idx}"] = row_dict
-        # Store rows as separate sections instead of array
-        result["table"] = table_rows_dict
-        # Extract footer notes
-        result["footer_notes"] = _extract_footer_notes(text)
-    else:
-        # No table found, just extract basic metadata
-        result["metadata"] = _extract_metadata(text)
-        result["footer_notes"] = _extract_footer_notes(text)
-    return result
-async def _poll_runpod_job(job_id: str, client: httpx.AsyncClient, max_wait_time: int = 300) -> Dict[str, Any]:
-    """
-    Poll RunPod job status until completion.
-    Returns the final job result with output.
-    """
-    headers = {
-        "Content-Type": "application/json",
-        "Authorization": f"Bearer {RUNPOD_API_KEY}"
-    }
-    start_time = time.time()
-    poll_interval = 2  # Poll every 2 seconds
-    while True:
-        # Check timeout
-        elapsed = time.time() - start_time
-        if elapsed > max_wait_time:
-            raise RuntimeError(f"Job {job_id} timed out after {max_wait_time} seconds")
-        # Poll job status
-        status_url = f"{RUNPOD_STATUS_ENDPOINT}/{job_id}"
-        response = await client.get(status_url, headers=headers)
-        response.raise_for_status()
-        status_result = response.json()
-        status = status_result.get("status", "").upper()
-        if status == "COMPLETED":
-            print(f"[INFO] Job {job_id} completed successfully")
-            return status_result
-        elif status == "FAILED":
-            error_msg = status_result.get("error", "Unknown error")
-            raise RuntimeError(f"Job {job_id} failed: {error_msg}")
-        elif status in ["IN_QUEUE", "IN_PROGRESS"]:
-            print(f"[INFO] Job {job_id} status: {status}, waiting...")
-            await asyncio.sleep(poll_interval)
-        else:
-            # Unknown status, wait and retry
-            print(f"[INFO] Job {job_id} status: {status}, waiting...")
-            await asyncio.sleep(poll_interval)
-async def _extract_text_with_ocr(image_bytes: bytes, page_num: int, total_pages: int, custom_prompt: str = None) -> Dict[str, Any]:
-    """
-    Extract text and metadata from a single page/image using the RunPod serverless OCR model.
-    Uses model-driven extraction to identify and extract metadata fields dynamically.
-    Returns text output in full_text field and extracted metadata.
-    Args:
-        image_bytes: Image bytes to process
-        page_num: Page number
-        total_pages: Total number of pages
-        custom_prompt: Optional custom prompt for field extraction
-    """
-    # Convert image bytes to base64
-    image_base64 = base64.b64encode(image_bytes).decode("utf-8")
-    print(f"[INFO] OCR: Processing page {page_num}/{total_pages} with RunPod endpoint")
-    try:
-        # Use custom prompt if provided, otherwise use default
-        if custom_prompt:
-            metadata_prompt = custom_prompt
-        else:
-            # Default prompt for general text extraction
-            metadata_prompt = """Extract all text from this image."""
-        # Prepare request payload for RunPod
-        # RunPod serverless endpoints expect image_base64, image_url, or image_path
-        payload = {
-            "input": {
-                "prompt": metadata_prompt,
-                "image_base64": image_base64  # Base64 encoded image
-            }
-        }
-        # Make HTTP request to RunPod endpoint
-        headers = {
-            "Content-Type": "application/json",
-            "Authorization": f"Bearer {RUNPOD_API_KEY}"
-        }
-        async with httpx.AsyncClient(timeout=300.0) as client:
-            # Submit job
-            response = await client.post(
-                RUNPOD_ENDPOINT,
-                headers=headers,
-                json=payload
-            )
-            response.raise_for_status()
-            result = response.json()
-            # Check if this is an async job (has job ID and status)
-            job_id = result.get("id")
-            status = result.get("status", "").upper()
-            if job_id and status in ["IN_QUEUE", "IN_PROGRESS"]:
-                # This is an async job, need to poll for completion
-                print(f"[INFO] Job submitted with ID: {job_id}, status: {status}")
-                if not RUNPOD_STATUS_ENDPOINT:
-                    raise RuntimeError("RunPod status endpoint not configured. Cannot poll async job.")
-                # Poll until completion
-                result = await _poll_runpod_job(job_id, client)
-            # Extract text from RunPod response
-            # RunPod serverless typically returns: {"id": "...", "status": "...", "output": "..."}
-            # The output might be a string or a dict depending on the model
-            extracted_text = ""
-            if "output" in result:
-                output = result["output"]
-                if isinstance(output, str):
-                    extracted_text = output
-                elif isinstance(output, dict):
-                    # If output is a dict, try common fields
-                    extracted_text = output.get("text", output.get("result", output.get("content", "")))
-                    if not extracted_text and isinstance(output.get("text"), str):
-                        extracted_text = output["text"]
-                elif isinstance(output, list) and len(output) > 0:
-                    # If output is a list, take the first element
-                    extracted_text = str(output[0])
-            elif "result" in result:
-                extracted_text = str(result["result"])
-            elif "text" in result:
-                extracted_text = str(result["text"])
-            else:
-                # Fallback: convert entire response to string
-                extracted_text = str(result)
-            if not extracted_text:
-                extracted_text = ""
-            print(f"[INFO] OCR: Extracted {len(extracted_text)} characters from page {page_num}")
-            # Parse model response to extract text and metadata
-            parsed_text, parsed_metadata = _parse_model_response(extracted_text)
-            # Calculate confidence based on response quality
-            # Create a mock response object for compatibility with confidence calculation
-            mock_response = type('obj', (object,), {
-                'choices': [type('obj', (object,), {'finish_reason': 'stop'})()],
-                'usage': type('obj', (object,), {'completion_tokens': len(parsed_text.split())})()
-            })()
-            confidence = _calculate_ocr_confidence(mock_response, parsed_text)
-            # Determine document type from metadata if available
-            doc_type = parsed_metadata.get("document_type", "other")
-            if doc_type == "other" and parsed_metadata.get("title"):
-                # Try to infer from title
-                title_lower = parsed_metadata.get("title", "").lower()
-                if any(kw in title_lower for kw in ["tender", "bid", "quotation"]):
-                    doc_type = "tender"
-                elif any(kw in title_lower for kw in ["recruitment", "appointment", "vacancy"]):
-                    doc_type = "recruitment"
-                elif any(kw in title_lower for kw in ["notice", "notification", "circular"]):
-                    doc_type = "notice"
-            # Return text and extracted metadata
-            return {
-                "doc_type": doc_type,
-                "confidence": confidence,
-                "full_text": parsed_text,
-                "fields": parsed_metadata if parsed_metadata else {}  # Model-extracted metadata
-            }
-    except httpx.HTTPStatusError as e:
-        error_msg = f"HTTP {e.response.status_code}: {e.response.text}"
-        print(f"[ERROR] OCR API HTTP error for page {page_num}: {error_msg}")
-        raise RuntimeError(f"OCR API error for page {page_num}: {error_msg}")
-    except Exception as e:
-        error_msg = str(e)
-        print(f"[ERROR] OCR API error for page {page_num}: {error_msg}")
-        raise RuntimeError(f"OCR API error for page {page_num}: {error_msg}")
-def _calculate_ocr_confidence(response, extracted_text: str) -> float:
-    """
-    Calculate confidence score based on OCR response quality.
-    Returns a score from 0-100, with higher scores for better extraction quality.
-    """
-    # Start with a higher base confidence for successful extractions
-    base_confidence = 92.0
-    # Adjust confidence based on text quality heuristics
-    text_length = len(extracted_text.strip())
-    if text_length == 0:
-        return 0.0
-    elif text_length < 10:
-        # Very short text - might be error or empty
-        return max(30.0, base_confidence - 40.0)
-    elif text_length < 50:
-        # Short text - might be incomplete
-        return max(60.0, base_confidence - 20.0)
-    elif text_length > 1000:
-        # Long text - likely good extraction
-        confidence = min(100.0, base_confidence + 5.0)
-    elif text_length > 500:
-        # Medium-long text - good extraction
-        confidence = min(100.0, base_confidence + 3.0)
-    else:
-        confidence = base_confidence
-    # Check for structured content (tables, etc.) - indicates good extraction
-    if '|' in extracted_text and extracted_text.count('|') > 5:
-        # Table detected - boost confidence significantly
-        confidence = min(100.0, confidence + 6.0)
-    # Check for meaningful content (non-whitespace ratio)
-    non_whitespace = len([c for c in extracted_text if not c.isspace()])
-    if text_length > 0:
-        content_ratio = non_whitespace / text_length
-        if content_ratio > 0.85:
-            # Very high content ratio - excellent extraction
-            confidence = min(100.0, confidence + 5.0)
-        elif content_ratio > 0.75:
-            # High content ratio - good extraction
-            confidence = min(100.0, confidence + 3.0)
-        elif content_ratio > 0.6:
-            # Moderate content ratio - decent extraction
-            confidence = min(100.0, confidence + 1.0)
-        elif content_ratio < 0.3:
-            # Low content ratio - mostly whitespace
-            confidence = max(60.0, confidence - 15.0)
-    # Check for common OCR quality indicators
-    # Presence of numbers, dates, and structured patterns indicates good extraction
-    has_numbers = any(c.isdigit() for c in extracted_text)
-    has_letters = any(c.isalpha() for c in extracted_text)
-    has_punctuation = any(c in '.,;:!?()[]{}' for c in extracted_text)
-    if has_numbers and has_letters and has_punctuation:
-        # Well-structured text with mixed content - high confidence
-        confidence = min(100.0, confidence + 2.0)
-    # Cap at 100% and ensure minimum quality threshold
-    return round(min(100.0, max(0.0, confidence)), 1)
-async def extract_fields_from_document(
-    file_bytes: bytes,
-    content_type: str,
-    filename: str,
-    key_fields: str = None,
-) -> Dict[str, Any]:
-    """
-    Extract text from document using OCR model.
-    Processes pages separately for better reliability.
-    Returns text output in full_text, keeps JSON/XML fields empty for now.
-    """
-    # Get raw image bytes for processing
-    if content_type == "application/pdf" or content_type.endswith("/pdf"):
-        if not PDF_SUPPORT:
-            raise RuntimeError("PDF support requires PyMuPDF. Please install it.")
-        # For PDFs, convert to images
-        pdf_images = _pdf_to_images(file_bytes)
-        image_bytes_list = pdf_images
-    else:
-        # For regular images, process the file bytes
-        # Convert to JPEG for consistency
-        try:
-            img = Image.open(BytesIO(file_bytes))
-            if img.mode != "RGB":
-                img = img.convert("RGB")
-            # Resize if too large (max 1920px on longest side)
-            max_size = 1920
-            w, h = img.size
-            if w > max_size or h > max_size:
-                if w > h:
-                    new_w = max_size
-                    new_h = int(h * (max_size / w))
-                else:
-                    new_h = max_size
-                    new_w = int(w * (max_size / h))
-                img = img.resize((new_w, new_h), Image.LANCZOS)
-                print(f"[INFO] Resized image from {w}x{h} to {new_w}x{new_h}")
-            # Convert to JPEG bytes
-            img_bytes = BytesIO()
-            img.save(img_bytes, format="JPEG", quality=95)
-            image_bytes_list = [img_bytes.getvalue()]
-        except Exception as e:
-            # Fallback: use original file bytes
-            print(f"[WARNING] Could not process image with PIL: {e}. Using original bytes.")
-        image_bytes_list = [file_bytes]
-    total_pages = len(image_bytes_list)
-    print(f"[INFO] Processing {total_pages} page(s) with OCR model...")
-    # Process each page separately
-    page_results = []
-    for page_num, img_bytes in enumerate(image_bytes_list):
-        print(f"[INFO] Processing page {page_num + 1}/{total_pages}...")
-        try:
-            page_result = await _extract_text_with_ocr(img_bytes, page_num + 1, total_pages, None)
-            page_results.append({
-                "page_number": page_num + 1,
-                "text": page_result.get("full_text", ""),
-                "fields": page_result.get("fields", {}),
-                "confidence": page_result.get("confidence", 0),
-                "doc_type": page_result.get("doc_type", "other"),
-            })
-            print(f"[INFO] Page {page_num + 1} processed successfully")
-        except Exception as e:
-            print(f"[ERROR] Failed to process page {page_num + 1}: {e}")
-            page_results.append({
-                "page_number": page_num + 1,
-                "text": "",
-                "fields": {},
-                "confidence": 0,
-                "error": str(e)
-            })
-    # Combine results from all pages
-    combined_full_text = "\n\n".join([f"=== PAGE {p['page_number']} ===\n\n{p['text']}" for p in page_results if p.get("text")])
-    # Extract user-specified fields if key_fields provided
-    extracted_fields = {}
-    if key_fields and key_fields.strip():
-        # Parse user input: "Invoice Number, Invoice Date, PO Number" -> ['Invoice Number', 'Invoice Date', 'PO Number']
-        field_list = [f.strip() for f in key_fields.split(',') if f.strip()]
-        if field_list:
-            print(f"[INFO] Extracting user-specified fields: {field_list}")
-            # Format fields as JSON array string for prompt
-            fields_json = json.dumps(field_list)
-            custom_prompt = f"Extract the following fields from this image and return as JSON: {fields_json}. Return only a valid JSON object with the field names as keys and their extracted values."
-            # Run second OCR pass on first page (usually has most metadata) with custom prompt
-            if image_bytes_list and len(image_bytes_list) > 0:
-                try:
-                    print("[INFO] Running second OCR pass for field extraction...")
-                    field_result = await _extract_text_with_ocr(image_bytes_list[0], 1, 1, custom_prompt)
-                    field_text = field_result.get("full_text", "")
-                    # Try to parse JSON from the response
-                    try:
-                        # Look for JSON in the response
-                        json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', field_text, re.DOTALL)
-                        if json_match:
-                            extracted_fields = json.loads(json_match.group(0))
-                            print(f"[INFO] Successfully extracted {len(extracted_fields)} fields from second OCR pass")
-                        else:
-                            # Try parsing the entire response as JSON
-                            extracted_fields = json.loads(field_text)
-                            print(f"[INFO] Successfully extracted {len(extracted_fields)} fields from second OCR pass")
-                    except json.JSONDecodeError:
-                        print(f"[WARNING] Could not parse JSON from field extraction response: {field_text[:200]}")
-                        extracted_fields = {}
-                except Exception as e:
-                    print(f"[WARNING] Field extraction failed: {e}")
-                    extracted_fields = {}
-    # Parse each page for tables and structure the output
-    structured_pages = {}
-    for page_result in page_results:
-        if page_result.get("text"):
-            page_num = page_result.get("page_number", 1)
-            page_text = page_result.get("text", "")
-            # Parse text for tables and structure
-            parsed_data = _parse_text_with_tables(page_text, {})
-            # Build structured page output (without Fields - moved to root level)
-            page_key = f"page_{page_num}"
-            structured_pages[page_key] = {
-                "text": parsed_data["text"],
-                "table": parsed_data["table"],
-                "footer_notes": parsed_data["footer_notes"],
-                "confidence": page_result.get("confidence", 0),
-                "doc_type": page_result.get("doc_type", "other")
-            }
-    # If we have structured pages, use them; otherwise keep fields empty
-    if structured_pages:
-        # Always return pages with page_X keys (even for single page)
-        combined_fields = structured_pages
-    else:
-        combined_fields = {}
-    # Calculate average confidence
-    confidences = [p.get("confidence", 0) for p in page_results if p.get("confidence", 0) > 0]
-    avg_confidence = sum(confidences) / len(confidences) if confidences else 0
-    # Determine doc_type from first successful page
-    doc_type = "other"
-    for page_result in page_results:
-        if page_result.get("doc_type") and page_result["doc_type"] != "other":
-            doc_type = page_result["doc_type"]
-            break
-    # Build return object - add Fields at root level only if extracted_fields is not empty
-    return_obj = {
-        "doc_type": doc_type,
-        "confidence": avg_confidence,
-        "full_text": combined_full_text,
-        "fields": combined_fields,  # Now contains structured data with tables
-        "pages": page_results
-    }
-    # Add Fields at root level only if user provided key_fields and extraction succeeded
-    if extracted_fields:
-        return_obj["Fields"] = extracted_fields
-    return return_obj
-=======
 import os
 import base64
 import json
@@ -1724,4 +860,3 @@ async def extract_fields_from_document(
         return_obj["Fields"] = extracted_fields
     return return_obj
->>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

 import os
 import base64
 import json
         return_obj["Fields"] = extracted_fields
     return return_obj

backend/app/otp_service.py CHANGED Viewed

@@ -1,202 +1,3 @@
-<<<<<<< HEAD
-"""
-OTP (One-Time Password) service for email-based authentication.
-"""
-import random
-import string
-from datetime import datetime, timedelta
-from typing import Dict, Optional
-from sqlalchemy.orm import Session
-from fastapi import HTTPException
-from .models import User
-from .brevo_service import send_otp_email
-# Store OTPs in memory (in production, use Redis or database)
-otp_store: Dict[str, dict] = {}
-def generate_otp(length: int = 6) -> str:
-    """
-    Generate a random OTP code.
-    Args:
-        length: Length of OTP (default: 6)
-    Returns:
-        Random OTP string
-    """
-    return ''.join(random.choices(string.digits, k=length))
-async def request_otp(email: str, db: Session) -> dict:
-    """
-    Generate and send OTP to email using Brevo.
-    Args:
-        email: Email address to send OTP to
-        db: Database session
-    Returns:
-        Dictionary with success message
-    """
-    # Generate OTP
-    otp = generate_otp()
-    expires_at = datetime.utcnow() + timedelta(minutes=10)
-    # Store OTP (in production, use Redis or database with TTL)
-    otp_store[email.lower()] = {
-        'otp': otp,
-        'expires_at': expires_at,
-        'attempts': 0,
-        'max_attempts': 5
-    }
-    # Send OTP via Brevo
-    try:
-        await send_otp_email(email, otp)
-        print(f"[INFO] OTP generated and sent to {email}")
-    except Exception as e:
-        # Remove OTP from store if email sending failed
-        if email.lower() in otp_store:
-            del otp_store[email.lower()]
-        raise HTTPException(
-            status_code=500,
-            detail=f"Failed to send OTP email: {str(e)}"
-        )
-    return {
-        "message": "OTP sent to your email address",
-        "expires_in_minutes": 10
-    }
-async def verify_otp(email: str, otp: str, db: Session) -> User:
-    """
-    Verify OTP and return/create user.
-    Args:
-        email: Email address
-        otp: OTP code to verify
-        db: Database session
-    Returns:
-        User object
-    Raises:
-        HTTPException: If OTP is invalid, expired, or max attempts exceeded
-    """
-    email_lower = email.lower()
-    stored = otp_store.get(email_lower)
-    if not stored:
-        raise HTTPException(
-            status_code=400,
-            detail="OTP not found. Please request a new OTP."
-        )
-    # Check if expired
-    if datetime.utcnow() > stored['expires_at']:
-        del otp_store[email_lower]
-        raise HTTPException(
-            status_code=400,
-            detail="OTP has expired. Please request a new OTP."
-        )
-    # Check max attempts
-    if stored['attempts'] >= stored['max_attempts']:
-        del otp_store[email_lower]
-        raise HTTPException(
-            status_code=400,
-            detail="Maximum verification attempts exceeded. Please request a new OTP."
-        )
-    # Verify OTP
-    if stored['otp'] != otp:
-        stored['attempts'] += 1
-        remaining_attempts = stored['max_attempts'] - stored['attempts']
-        raise HTTPException(
-            status_code=400,
-            detail=f"Invalid OTP. {remaining_attempts} attempt(s) remaining."
-        )
-    # OTP verified successfully
-    # Get or create user
-    user = db.query(User).filter(User.email == email_lower).first()
-    if not user:
-        user = User(
-            email=email_lower,
-            auth_method='otp',
-            email_verified=True
-        )
-        db.add(user)
-        db.commit()
-        db.refresh(user)
-        print(f"[INFO] New user created via OTP: {email_lower}")
-        # Enrich contact data from Apollo.io and update Brevo + Monday.com
-        try:
-            from .apollo_service import enrich_contact_by_email
-            from .brevo_service import create_brevo_contact, BREVO_TRIAL_LIST_ID
-            from .monday_service import create_monday_lead
-            # Enrich contact data from Apollo.io
-            enriched_data = await enrich_contact_by_email(email_lower)
-            # Use enriched data if available
-            first_name = enriched_data.get("first_name") if enriched_data else None
-            last_name = enriched_data.get("last_name") if enriched_data else None
-            org_name = enriched_data.get("organization_name") if enriched_data else None
-            # Fallback to email domain if Apollo didn't provide organization
-            if not org_name:
-                org_domain = email_lower.split('@')[1] if '@' in email_lower else None
-                org_name = org_domain.split('.')[0].capitalize() if org_domain else None
-            # Update Brevo contact with enriched data
-            await create_brevo_contact(
-                email=email_lower,
-                first_name=first_name,
-                last_name=last_name,
-                organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
-                phone_number=enriched_data.get("phone_number") if enriched_data else None,
-                linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
-                title=enriched_data.get("title") if enriched_data else None,
-                headline=enriched_data.get("headline") if enriched_data else None,
-                organization_website=enriched_data.get("organization_website") if enriched_data else None,
-                organization_address=enriched_data.get("organization_address") if enriched_data else None,
-                list_id=BREVO_TRIAL_LIST_ID
-            )
-            # Create lead in Monday.com
-            await create_monday_lead(
-                email=email_lower,
-                first_name=first_name,
-                last_name=last_name,
-                phone_number=enriched_data.get("phone_number") if enriched_data else None,
-                linkedin_url=enriched_data.get("linkedin_url") if enriched_data else None,
-                title=enriched_data.get("title") if enriched_data else None,
-                headline=enriched_data.get("headline") if enriched_data else None,
-                organization_name=org_name or (enriched_data.get("organization_name") if enriched_data else None),
-                organization_website=enriched_data.get("organization_website") if enriched_data else None,
-                organization_address=enriched_data.get("organization_address") if enriched_data else None,
-            )
-        except Exception as e:
-            # Don't fail user creation if integrations fail
-            print(f"[WARNING] Failed to enrich/update contact for {email_lower}: {str(e)}")
-    else:
-        user.email_verified = True
-        if user.auth_method != 'otp':
-            user.auth_method = 'otp'
-        db.commit()
-        print(f"[INFO] User verified via OTP: {email_lower}")
-    # Remove OTP from store after successful verification
-    del otp_store[email_lower]
-    return user
-=======
 """
 OTP (One-Time Password) service for email-based authentication.
 """
@@ -393,5 +194,3 @@ async def verify_otp(email: str, otp: str, db: Session) -> User:
     del otp_store[email_lower]
     return user
->>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

 """
 OTP (One-Time Password) service for email-based authentication.
 """
     del otp_store[email_lower]
     return user

backend/app/schemas.py CHANGED Viewed

@@ -1,31 +1,3 @@
-<<<<<<< HEAD
-from pydantic import BaseModel
-from typing import Dict, Optional
-from datetime import datetime
-class ExtractionStage(BaseModel):
-    time: int
-    status: str
-    variation: str
-class ExtractionRecordBase(BaseModel):
-    id: int
-    fileName: str
-    fileType: str
-    fileSize: str
-    extractedAt: datetime
-    status: str
-    confidence: float
-    fieldsExtracted: int
-    totalTime: int
-    stages: Dict[str, ExtractionStage]
-    errorMessage: Optional[str] = None
-    class Config:
-        from_attributes = True
-=======
 from pydantic import BaseModel
 from typing import Dict, Optional
 from datetime import datetime
@@ -52,4 +24,3 @@ class ExtractionRecordBase(BaseModel):
     class Config:
         from_attributes = True
->>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d

 from pydantic import BaseModel
 from typing import Dict, Optional
 from datetime import datetime
     class Config:
         from_attributes = True