Spaces:

credent007
/

easyocr-phi3

Paused

File size: 2,131 Bytes

from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse
from PIL import Image
import io
import asyncio
import fitz  # PyMuPDF
from llm import call_llm

app = FastAPI()

@app.get("/")
def home():
    return {"message": "home page"}


# 🔥 Process single image
async def process_image(image: Image.Image, page_num: int):
    prompt = """
    You are an intelligent document parser.
    <image>\n
    Extract structured key-value pairs from the invoice.
    
    Rules:
    - Return ONLY valid JSON
    - Each entry must be: {"key": "...", "value": "..."}
    - Do NOT return bounding boxes
    - Ignore layout info
    - Map related fields (e.g., Invoice No → 12345)
    
    Example:
    [
      {"key": "Invoice Number", "value": "12345"},
      {"key": "Date", "value": "01-01-2024"}
    ]
    """

    result = await call_llm(image, prompt)

    return {
        "page": page_num,
        "llm": result
    }


@app.post("/ocr-llm")
async def ocr_llm_endpoint(file: UploadFile = File(...)):

    
    if not file.filename.lower().endswith((".pdf", ".png", ".jpg", ".jpeg")):
        raise HTTPException(status_code=400, detail="File must be PDF or image")

    try:
        results = []

        # ✅ HANDLE PDF
        if file.filename.lower().endswith(".pdf"):
            file_bytes = await file.read()
            doc = fitz.open(stream=file_bytes, filetype="pdf")

            tasks = []

            for i, page in enumerate(doc):
                pix = page.get_pixmap()
                img_bytes = pix.tobytes("png")
                image = Image.open(io.BytesIO(img_bytes)).convert("RGB")

                tasks.append(process_image(image, i + 1))

            results = await asyncio.gather(*tasks)

        # ✅ HANDLE IMAGE
        else:
            image_bytes = await file.read()
            image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

            result = await process_image(image, 1)
            results.append(result)

        return {"results": results}

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))