ai-worker / app.py
dead031's picture
Update app.py
7b148a9 verified
import os
import io
import json
import uvicorn
from fastapi import FastAPI, UploadFile, File, HTTPException
from pdf2image import convert_from_bytes
import layoutparser as lp
import numpy as np
import cv2
import pytesseract
from PIL import Image
app = FastAPI()
import urllib.request
import os
def download_model():
"""Download model files to /tmp (the only writable area on HF)"""
print("Checking model files in /tmp...")
base_dir = "/tmp/models"
model_dir = os.path.join(base_dir, "faster_rcnn_R_50_FPN_3x")
os.makedirs(model_dir, exist_ok=True)
files = {
os.path.join(model_dir, "config.yaml"):
"https://huggingface.co/layoutparser/detectron2/resolve/main/PubLayNet/faster_rcnn_R_50_FPN_3x/config.yml",
os.path.join(model_dir, "model_final.pth"):
"https://huggingface.co/layoutparser/detectron2/resolve/main/PubLayNet/faster_rcnn_R_50_FPN_3x/model_final.pth"
}
for path, url in files.items():
if not os.path.exists(path):
print(f"Downloading {path} from {url}...")
urllib.request.urlretrieve(url, path)
print("Model files ready.")
# Ensure models are downloaded before initialization
download_model()
# Initialize LayoutParser with paths pointing to /tmp
model = lp.Detectron2LayoutModel(
config_path="/tmp/models/faster_rcnn_R_50_FPN_3x/config.yaml",
model_path="/tmp/models/faster_rcnn_R_50_FPN_3x/model_final.pth",
extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.5],
label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"}
)
@app.get("/")
def home():
return {"message": "Deshonnati AI Worker is running"}
@app.post("/process")
async def process_pdf(file: UploadFile = File(...)):
try:
# 1. Read PDF bytes
pdf_bytes = await file.read()
# 2. Convert PDF to Image (first page for demo)
images = convert_from_bytes(pdf_bytes, dpi=200)
if not images:
raise HTTPException(status_code=400, detail="Could not convert PDF to images")
results = []
for i, image in enumerate(images):
# 3. Convert PIL image to CV2 format for LayoutParser
open_cv_image = np.array(image)
open_cv_image = open_cv_image[:, :, ::-1].copy() # RGB to BGR
# 4. Detect Layout
layout = model.detect(open_cv_image)
page_articles = []
# 5. Process each detected block
for block in layout:
if block.type in ['Text', 'Title']:
# Get coordinates
x0, y0, x1, y1 = block.coordinates
# Crop image for better OCR
cropped_img = image.crop((x0, y0, x1, y1))
# 6. Run OCR (Multi-language)
text = pytesseract.image_to_string(cropped_img, lang='mar+eng+hin')
page_articles.append({
"id": f"art_{i}_{len(page_articles)}",
"type": block.type,
"bbox": {
"x0": x0,
"y0": y0,
"x1": x1,
"y1": y1
},
"text": text.strip()
})
results.append({
"page": i + 1,
"articles": page_articles
})
return {"success": True, "data": results}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
# HF Spaces requires port 7860
uvicorn.run(app, host="0.0.0.0", port=7860)