File size: 2,131 Bytes
b7032a8
24d4193
 
 
 
b7032a8
 
 
24d4193
 
b7032a8
 
 
24d4193
 
b7032a8
 
 
5ad8e49
c637105
5ad8e49
 
 
 
 
 
 
 
 
 
 
 
 
 
b7032a8
 
 
 
 
 
 
 
24d4193
 
 
 
fe07d61
b7032a8
24d4193
 
b7032a8
24d4193
 
 
b7032a8
24d4193
 
b7032a8
 
 
24d4193
b7032a8
 
 
 
24d4193
b7032a8
 
 
 
 
24d4193
 
 
 
b7032a8
 
 
 
24d4193
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse
from PIL import Image
import io
import asyncio
import fitz  # PyMuPDF
from llm import call_llm

app = FastAPI()

@app.get("/")
def home():
    return {"message": "home page"}


# 🔥 Process single image
async def process_image(image: Image.Image, page_num: int):
    prompt = """
    You are an intelligent document parser.
    <image>\n
    Extract structured key-value pairs from the invoice.
    
    Rules:
    - Return ONLY valid JSON
    - Each entry must be: {"key": "...", "value": "..."}
    - Do NOT return bounding boxes
    - Ignore layout info
    - Map related fields (e.g., Invoice No → 12345)
    
    Example:
    [
      {"key": "Invoice Number", "value": "12345"},
      {"key": "Date", "value": "01-01-2024"}
    ]
    """

    result = await call_llm(image, prompt)

    return {
        "page": page_num,
        "llm": result
    }


@app.post("/ocr-llm")
async def ocr_llm_endpoint(file: UploadFile = File(...)):

    
    if not file.filename.lower().endswith((".pdf", ".png", ".jpg", ".jpeg")):
        raise HTTPException(status_code=400, detail="File must be PDF or image")

    try:
        results = []

        # ✅ HANDLE PDF
        if file.filename.lower().endswith(".pdf"):
            file_bytes = await file.read()
            doc = fitz.open(stream=file_bytes, filetype="pdf")

            tasks = []

            for i, page in enumerate(doc):
                pix = page.get_pixmap()
                img_bytes = pix.tobytes("png")
                image = Image.open(io.BytesIO(img_bytes)).convert("RGB")

                tasks.append(process_image(image, i + 1))

            results = await asyncio.gather(*tasks)

        # ✅ HANDLE IMAGE
        else:
            image_bytes = await file.read()
            image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

            result = await process_image(image, 1)
            results.append(result)

        return {"results": results}

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))