Spaces:
Paused
Paused
File size: 2,131 Bytes
b7032a8 24d4193 b7032a8 24d4193 b7032a8 24d4193 b7032a8 5ad8e49 c637105 5ad8e49 b7032a8 24d4193 fe07d61 b7032a8 24d4193 b7032a8 24d4193 b7032a8 24d4193 b7032a8 24d4193 b7032a8 24d4193 b7032a8 24d4193 b7032a8 24d4193 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse
from PIL import Image
import io
import asyncio
import fitz # PyMuPDF
from llm import call_llm
app = FastAPI()
@app.get("/")
def home():
return {"message": "home page"}
# 🔥 Process single image
async def process_image(image: Image.Image, page_num: int):
prompt = """
You are an intelligent document parser.
<image>\n
Extract structured key-value pairs from the invoice.
Rules:
- Return ONLY valid JSON
- Each entry must be: {"key": "...", "value": "..."}
- Do NOT return bounding boxes
- Ignore layout info
- Map related fields (e.g., Invoice No → 12345)
Example:
[
{"key": "Invoice Number", "value": "12345"},
{"key": "Date", "value": "01-01-2024"}
]
"""
result = await call_llm(image, prompt)
return {
"page": page_num,
"llm": result
}
@app.post("/ocr-llm")
async def ocr_llm_endpoint(file: UploadFile = File(...)):
if not file.filename.lower().endswith((".pdf", ".png", ".jpg", ".jpeg")):
raise HTTPException(status_code=400, detail="File must be PDF or image")
try:
results = []
# ✅ HANDLE PDF
if file.filename.lower().endswith(".pdf"):
file_bytes = await file.read()
doc = fitz.open(stream=file_bytes, filetype="pdf")
tasks = []
for i, page in enumerate(doc):
pix = page.get_pixmap()
img_bytes = pix.tobytes("png")
image = Image.open(io.BytesIO(img_bytes)).convert("RGB")
tasks.append(process_image(image, i + 1))
results = await asyncio.gather(*tasks)
# ✅ HANDLE IMAGE
else:
image_bytes = await file.read()
image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
result = await process_image(image, 1)
results.append(result)
return {"results": results}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) |