import os import uuid from fastapi import FastAPI, UploadFile, File, HTTPException from fastapi.responses import JSONResponse from typing import List import fitz # ------------------------------------------------------------------- # FORCE PADDLEX / PADDLEOCR CACHE DIRECTORIES TO WRITABLE LOCATIONS # ------------------------------------------------------------------- os.environ["PADDLE_HOME"] = "/app/paddle_home" os.environ["XDG_CACHE_HOME"] = "/app/xdg_cache" os.makedirs("/app/paddle_home", exist_ok=True) os.makedirs("/app/xdg_cache", exist_ok=True) # now safe to import paddlex/paddleocr from paddleocr import PaddleOCR # ------------------------------------------------------------------- # PDF → IMAGE # ------------------------------------------------------------------- def pdf_to_images(pdf_path: str, max_pages: int | None = 3) -> List[str]: if not os.path.exists(pdf_path): raise FileNotFoundError(pdf_path) doc = fitz.open(pdf_path) page_count = len(doc) limit = page_count if max_pages is None else min(max_pages, page_count) output_paths: List[str] = [] out_dir = "/app/pdf_images" os.makedirs(out_dir, exist_ok=True) for i in range(limit): page = doc.load_page(i) pix = page.get_pixmap(dpi=220) img_name = f"{uuid.uuid4()}.jpg" img_path = os.path.join(out_dir, img_name) pix.save(img_path) output_paths.append(img_path) return output_paths # ------------------------------------------------------------------- # OCR ENGINE # ------------------------------------------------------------------- ocr_engine = PaddleOCR( lang="mr", text_recognition_model_name="devanagari_PP-OCRv5_mobile_rec", use_doc_orientation_classify=False, use_doc_unwarping=False, use_textline_orientation=False ) def extract_text(image_path: str): result = ocr_engine.predict(input=image_path) output = [] for block in result: texts = block["rec_texts"] scores = block["rec_scores"] for t, s in zip(texts, scores): output.append({"text": t, "confidence": float(s)}) return output # ------------------------------------------------------------------- # FASTAPI # ------------------------------------------------------------------- app = FastAPI() UPLOAD_DIR = "/app/uploads" os.makedirs(UPLOAD_DIR, exist_ok=True) @app.post("/ocr") async def ocr_endpoint(files: List[UploadFile] = File(...), max_pages: int | None = 3): if len(files) > 15: raise HTTPException(status_code=400, detail="Maximum 15 files allowed.") structured_output = {"files": []} for index, file in enumerate(files, start=1): filename = file.filename.lower() ext = filename.split(".")[-1] temp_name = f"{uuid.uuid4()}.{ext}" temp_path = os.path.join(UPLOAD_DIR, temp_name) with open(temp_path, "wb") as f: f.write(await file.read()) file_record = { "file_id": f"file_{index}", "filename": filename, "pages": [] } # ------------------------------- # PDF # ------------------------------- if filename.endswith(".pdf"): img_paths = pdf_to_images(temp_path, max_pages=max_pages) for page_idx, img_path in enumerate(img_paths): ocr_results = extract_text(img_path) file_record["pages"].append({ "page_index": page_idx, "results": ocr_results }) # ------------------------------- # IMAGE # ------------------------------- elif filename.endswith((".jpg", ".jpeg", ".png")): ocr_results = extract_text(temp_path) file_record["pages"].append({ "page_index": 0, "results": ocr_results }) else: raise HTTPException(status_code=400, detail=f"Unsupported type: {filename}") structured_output["files"].append(file_record) return JSONResponse(structured_output)