from fastapi import FastAPI, UploadFile, File, HTTPException from fastapi.responses import JSONResponse from PIL import Image import io import asyncio import fitz # PyMuPDF from llm import call_llm app = FastAPI() @app.get("/") def home(): return {"message": "home page"} # 🔥 Process single image async def process_image(image: Image.Image, page_num: int): prompt = """ You are an intelligent document parser. \n Extract structured key-value pairs from the invoice. Rules: - Return ONLY valid JSON - Each entry must be: {"key": "...", "value": "..."} - Do NOT return bounding boxes - Ignore layout info - Map related fields (e.g., Invoice No → 12345) Example: [ {"key": "Invoice Number", "value": "12345"}, {"key": "Date", "value": "01-01-2024"} ] """ result = await call_llm(image, prompt) return { "page": page_num, "llm": result } @app.post("/ocr-llm") async def ocr_llm_endpoint(file: UploadFile = File(...)): if not file.filename.lower().endswith((".pdf", ".png", ".jpg", ".jpeg")): raise HTTPException(status_code=400, detail="File must be PDF or image") try: results = [] # ✅ HANDLE PDF if file.filename.lower().endswith(".pdf"): file_bytes = await file.read() doc = fitz.open(stream=file_bytes, filetype="pdf") tasks = [] for i, page in enumerate(doc): pix = page.get_pixmap() img_bytes = pix.tobytes("png") image = Image.open(io.BytesIO(img_bytes)).convert("RGB") tasks.append(process_image(image, i + 1)) results = await asyncio.gather(*tasks) # ✅ HANDLE IMAGE else: image_bytes = await file.read() image = Image.open(io.BytesIO(image_bytes)).convert("RGB") result = await process_image(image, 1) results.append(result) return {"results": results} except Exception as e: raise HTTPException(status_code=500, detail=str(e))