Spaces:

triflix
/

sortitout

Sleeping

App Files Files Community

triflix commited on Nov 20, 2025

Commit

bfb796a

verified ·

1 Parent(s): f401e05

Create app.py

Browse files

Files changed (1) hide show

app.py +188 -0

app.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import io
+import gc
+import logging
+from typing import List, Dict, Any
+from PIL import Image
+import numpy as np
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from paddleocr import PaddleOCR
+from pdf2image import convert_from_bytes
+import asyncio
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Global OCR instance (loaded once at startup)
+ocr_engine = None
+def get_ocr_engine():
+    """Singleton pattern for OCR model"""
+    global ocr_engine
+    if ocr_engine is None:
+        logger.info("Initializing PaddleOCR model...")
+        ocr_engine = PaddleOCR(
+            text_recognition_model_name="devanagari_PP-OCRv5_mobile_rec",
+            lang="mr",
+            use_doc_orientation_classify=False,
+            use_doc_unwarping=False,
+            use_textline_orientation=False,
+            show_log=False  # Reduce clutter
+        )
+    return ocr_engine
+app = FastAPI(title="PaddleOCR Marathi API")
+def resize_image(image: Image.Image, max_pixels: int = 2500) -> Image.Image:
+    """Resize if any dimension exceeds limit to control memory usage"""
+    if max(image.size) > max_pixels:
+        ratio = max_pixels / max(image.size)
+        new_size = (int(image.width * ratio), int(image.height * ratio))
+        logger.info(f"Resizing {image.size} -> {new_size}")
+        return image.resize(new_size, Image.Resampling.LANCZOS)
+    return image
+def process_image(contents: bytes, filename: str) -> Dict[str, Any]:
+    """Process single image entirely in memory"""
+    try:
+        image = Image.open(io.BytesIO(contents)).convert('RGB')
+        image = resize_image(image)
+        img_array = np.array(image)
+        ocr = get_ocr_engine()
+        result = ocr.ocr(img_array, cls=False)
+        texts, scores, bboxes = [], [], []
+        if result and result[0]:
+            for line in result[0]:
+                bbox, (text, score) = line
+                texts.append(text)
+                scores.append(float(score))
+                bboxes.append(bbox)
+        # Immediate cleanup
+        del image, img_array
+        gc.collect()
+        return {
+            "filename": filename,
+            "type": "image",
+            "success": True,
+            "results": [{"text": t, "confidence": s, "bbox": b}
+                       for t, s, b in zip(texts, scores, bboxes)]
+        }
+    except Exception as e:
+        logger.error(f"Image processing failed: {e}")
+        return {"filename": filename, "type": "image", "success": False, "error": str(e)}
+def process_pdf(contents: bytes, filename: str) -> Dict[str, Any]:
+    """Process PDF page-by-page with memory cleanup between pages"""
+    try:
+        # Convert PDF to images (poppler handles memory efficiently)
+        images = convert_from_bytes(contents, dpi=200, fmt='png')
+        pages = []
+        for page_num, image in enumerate(images, 1):
+            image = resize_image(image.convert('RGB'))
+            img_array = np.array(image)
+            ocr = get_ocr_engine()
+            result = ocr.ocr(img_array, cls=False)
+            texts, scores, bboxes = [], [], []
+            if result and result[0]:
+                for line in result[0]:
+                    bbox, (text, score) = line
+                    texts.append(text)
+                    scores.append(float(score))
+                    bboxes.append(bbox)
+            pages.append({
+                "page_number": page_num,
+                "results": [{"text": t, "confidence": s, "bbox": b}
+                           for t, s, b in zip(texts, scores, bboxes)]
+            })
+            # Clean up per page
+            del image, img_array
+            gc.collect()
+            await asyncio.sleep(0.05)  # Brief pause to let GC work
+        # Final cleanup
+        del images
+        gc.collect()
+        return {
+            "filename": filename,
+            "type": "pdf",
+            "success": True,
+            "page_count": len(pages),
+            "pages": pages
+        }
+    except Exception as e:
+        logger.error(f"PDF processing failed: {e}")
+        return {"filename": filename, "type": "pdf", "success": False, "error": str(e)}
+@app.post("/ocr/image")
+async def ocr_image(file: UploadFile = File(...)):
+    """Single image endpoint"""
+    if not file.content_type.startswith('image/'):
+        raise HTTPException(400, "Invalid image file")
+    try:
+        contents = await file.read()
+        return process_image(contents, file.filename)
+    finally:
+        await file.close()
+@app.post("/ocr/pdf")
+async def ocr_pdf(file: UploadFile = File(...)):
+    """Single PDF endpoint"""
+    if not (file.content_type == 'application/pdf' or file.filename.endswith('.pdf')):
+        raise HTTPException(400, "Invalid PDF file")
+    try:
+        contents = await file.read()
+        return process_pdf(contents, file.filename)
+    finally:
+        await file.close()
+@app.post("/ocr/batch")
+async def ocr_batch(files: List[UploadFile] = File(...)):
+    """Batch processing endpoint - max 5 files to prevent OOM"""
+    if len(files) > 5:
+        raise HTTPException(400, "Maximum 5 files per batch")
+    results = []
+    for file in files:
+        try:
+            contents = await file.read()
+            is_pdf = file.content_type == 'application/pdf' or file.filename.endswith('.pdf')
+            result = process_pdf(contents, file.filename) if is_pdf else process_image(contents, file.filename)
+            results.append(result)
+        except Exception as e:
+            results.append({"filename": file.filename, "success": False, "error": str(e)})
+        finally:
+            await file.close()
+    return {"processed": len(results), "files": results}
+@app.get("/health")
+async def health():
+    """Check if model is loaded"""
+    try:
+        get_ocr_engine()
+        return {"status": "ready", "model": "loaded"}
+    except:
+        raise HTTPException(503, "Model not loaded")
+@app.on_event("startup")
+async def load_model():
+    logger.info("Preloading OCR model...")
+    get_ocr_engine()
+@app.on_event("shutdown")
+async def cleanup():
+    global ocr_engine
+    ocr_engine = None
+    gc.collect()