File size: 2,536 Bytes
e4b7f54
2f465bd
e4b7f54
8029506
2f465bd
e4b7f54
2f465bd
8029506
 
e4b7f54
2f465bd
 
 
 
 
e4b7f54
9adb515
0dbdcad
9adb515
2f465bd
0dbdcad
8029506
0dbdcad
2f465bd
8029506
0dbdcad
e4b7f54
2f465bd
 
 
 
 
 
 
 
 
8029506
 
 
 
 
4b32c6f
8029506
 
4b32c6f
 
 
8029506
4b32c6f
 
8029506
 
 
 
 
2f465bd
 
e4b7f54
 
 
8029506
 
 
 
 
 
 
 
 
 
 
 
 
 
e4b7f54
8029506
2f465bd
8029506
 
 
 
 
2f465bd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os
import io
import easyocr
import numpy as np
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
from PIL import Image
from pdf2image import convert_from_bytes
from concurrent.futures import ThreadPoolExecutor

# =========================
# EasyOCR config
# =========================
MODEL_DIR = "/app/.EasyOCR"
USER_NET_DIR = os.path.join(MODEL_DIR, "user_network")

os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(USER_NET_DIR, exist_ok=True)

# βœ… preload reader with cached models
reader = easyocr.Reader(
    ['en', 'hi'],  # langs (reduce if only English needed)
    model_storage_directory=MODEL_DIR,
    user_network_directory=USER_NET_DIR,
    download_enabled=False   # 🚫 block downloads at runtime
)

# =========================
# FastAPI app
# =========================
app = FastAPI()

@app.get("/")
async def root():
    return {"message": "OCR API is running on Hugging Face πŸš€"}


def run_ocr_on_image(image: Image.Image):
    """Convert PIL β†’ numpy and run OCR"""
    image_np = np.array(image)
    results = reader.readtext(image_np)

    text_results = []
    for bbox, text, prob in results:
        # βœ… convert bbox coords to plain Python floats
        bbox_py = [[float(x), float(y)] for x, y in bbox]

        text_results.append({
            "bbox": bbox_py,
            "text": str(text),
            "confidence": float(prob)
        })
    return text_results


@app.post("/ocr")
async def ocr(file: UploadFile = File(...)):
    try:
        contents = await file.read()

        # Detect file type
        if file.filename.lower().endswith(".pdf"):
            # βœ… Convert PDF to images
            pages = convert_from_bytes(contents)

            # βœ… Run OCR in parallel
            text_results = []
            with ThreadPoolExecutor() as executor:
                results_list = list(executor.map(run_ocr_on_image, pages))
                for i, page_results in enumerate(results_list, start=1):
                    text_results.append({
                        "page": i,
                        "results": page_results
                    })

            return JSONResponse(content={"pdf_results": text_results})

        else:
            # βœ… Normal image case
            image = Image.open(io.BytesIO(contents))
            text_results = run_ocr_on_image(image)
            return JSONResponse(content={"results": text_results})

    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=500)