import fitz  # PyMuPDF
import pytesseract
import camelot
import re
import shutil
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
from pdf2image import convert_from_path
import os
import uuid
import tempfile
from typing import List
from loguru import logger
import logging

# Configure logging
logger.add("app.log", rotation="500 MB", level="INFO")

app = FastAPI(title="Policy Verification API", version="1.0.0")

# ========== Core Utilities ==========

def extract_text_from_pdf(pdf_path: str) -> str:
    """
    Extracts text from a PDF using PyMuPDF; falls back to OCR if text is insufficient.
    
    Args:
        pdf_path (str): Path to the PDF file.
    
    Returns:
        str: Extracted text.
    
    Raises:
        RuntimeError: If text extraction or OCR fails.
    """
    try:
        logger.info(f"Extracting text from PDF: {pdf_path}")
        doc = fitz.open(pdf_path)
        text = "".join(page.get_text() for page in doc)
        doc.close()
        
        if len(text.strip()) < 50:
            logger.warning("Insufficient text extracted, falling back to OCR")
            text = ocr_pdf(pdf_path)
        return text
    except Exception as e:
        logger.error(f"Failed to extract text from PDF: {e}")
        raise RuntimeError(f"Error reading PDF with PyMuPDF: {e}")

def ocr_pdf(pdf_path: str) -> str:
    """
    Performs OCR on a PDF using Tesseract and pdf2image.
    
    Args:
        pdf_path (str): Path to the PDF file.
    
    Returns:
        str: OCR-extracted text.
    
    Raises:
        RuntimeError: If OCR process fails.
    """
    try:
        logger.info(f"Performing OCR on PDF: {pdf_path}")
        images = convert_from_path(pdf_path)
        text = "".join(pytesseract.image_to_string(img) for img in images)
        logger.info("OCR completed successfully")
        return text
    except Exception as e:
        logger.error(f"OCR failed: {e}")
        raise RuntimeError(f"OCR failed: {e}")

def find_uins(text: str) -> List[str]:
    """
    Finds UINs matching IRDA format in the provided text.
    
    Args:
        text (str): Text to search for UINs.
    
    Returns:
        List[str]: List of unique, normalized UINs.
    """
    pattern = r"UIN[:\-]?\s*(IRDAN\d+\s*[A-Z0-9]+V\d{2,})"
    matches = re.findall(pattern, text, flags=re.IGNORECASE)
    normalized = [re.sub(r"\s+", "", m.strip()) for m in matches]
    unique_uins = list(dict.fromkeys(normalized))
    logger.info(f"Found {len(unique_uins)} unique UINs")
    return unique_uins

def extract_uin_list_from_pdf_db(pdf_db_path: str) -> List[str]:
    """
    Extracts UINs from a database-like PDF using Camelot.
    
    Args:
        pdf_db_path (str): Path to the database PDF file.
    
    Returns:
        List[str]: List of unique, normalized UINs.
    
    Raises:
        RuntimeError: If UIN extraction fails.
    """
    try:
        logger.info(f"Extracting UINs from DB PDF: {pdf_db_path}")
        tables = camelot.read_pdf(pdf_db_path, pages='all', flavor='stream')
        all_uins = []
        for table in tables:
            df = table.df
            for row in df.values:
                for cell in row:
                    matches = re.findall(r"IRDAN\d+\s*[A-Z0-9]+V\d{2,}", str(cell), flags=re.IGNORECASE)
                    all_uins.extend(re.sub(r"\s+", "", match.strip()) for match in matches)
        unique_uins = list(dict.fromkeys(all_uins))
        logger.info(f"Extracted {len(unique_uins)} unique UINs from DB")
        return unique_uins
    except Exception as e:
        logger.error(f"Error extracting UINs from DB PDF: {e}")
        raise RuntimeError(f"Error extracting UINs from DB PDF: {e}")

def match_found(policy_uins: List[str], db_uins: List[str]) -> bool:
    """
    Checks if any policy UIN exists in the database UINs.
    
    Args:
        policy_uins (List[str]): List of policy UINs.
        db_uins (List[str]): List of database UINs.
    
    Returns:
        bool: True if a match is found, False otherwise.
    """
    db_set = {u.replace(" ", "").replace("-", "").lower() for u in db_uins}
    for u in policy_uins:
        normalized = u.replace(" ", "").replace("-", "").lower()
        if normalized in db_set:
            logger.info(f"UIN match found: {normalized}")
            return True
    logger.info("No UIN matches found")
    return False

# ========== FastAPI Endpoint ==========

@app.post("/verify-policy")
async def verify_policy(
    policy_file: UploadFile = File(...)
):
    """
    Verifies if a policy PDF's UIN exists in a database PDF.
    """
    logger.info("Received policy verification request")
    db_file_path = "/app/list-of-products.pdf"

    # Validate uploaded policy file
    if not policy_file.filename.endswith(".pdf"):
        logger.error("Invalid file type for policy")
        raise HTTPException(status_code=400, detail="Policy file must be a PDF.")

    if not os.path.exists(db_file_path):
        logger.error(f"Database PDF file not found at {db_file_path}")
        raise HTTPException(status_code=500, detail="Database file missing on server.")

    try:
        with tempfile.TemporaryDirectory() as tmpdirname:
            # Save uploaded policy file
            policy_path = os.path.join(tmpdirname, f"{uuid.uuid4()}_policy.pdf")
            logger.info(f"Saving policy file to: {policy_path}")
            with open(policy_path, "wb") as f:
                shutil.copyfileobj(policy_file.file, f)

            # Use static DB file path directly
            logger.info(f"Using DB file from: {db_file_path}")
            policy_text = extract_text_from_pdf(policy_path)
            policy_uins = find_uins(policy_text)
            db_uins = extract_uin_list_from_pdf_db(db_file_path)
            is_matched = match_found(policy_uins, db_uins)

            return JSONResponse(
                status_code=200,
                content={
                    "match": is_matched,
                    "policy-text":policy_text,
                }
            )
    except Exception as e:
        logger.error(f"Verification failed: {e}")
        raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")

# ========== Run Application ==========

if __name__ == "__main__":
    import uvicorn
    logger.info("Starting FastAPI server")
    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)