Spaces:
Runtime error
Runtime error
| import warnings | |
| warnings.filterwarnings("ignore", message=".*pin_memory.*") | |
| import os | |
| import io | |
| import easyocr | |
| import numpy as np | |
| from fastapi import FastAPI, UploadFile, File | |
| from fastapi.responses import JSONResponse | |
| from PIL import Image | |
| from pdf2image import convert_from_bytes | |
| from concurrent.futures import ThreadPoolExecutor | |
| # ========================= | |
| # EasyOCR config | |
| # ========================= | |
| MODEL_DIR = "/app/.EasyOCR" | |
| USER_NET_DIR = os.path.join(MODEL_DIR, "user_network") | |
| os.makedirs(MODEL_DIR, exist_ok=True) | |
| os.makedirs(USER_NET_DIR, exist_ok=True) | |
| # β preload reader | |
| reader = easyocr.Reader( | |
| ['en', 'hi'], | |
| model_storage_directory=MODEL_DIR, | |
| user_network_directory=USER_NET_DIR, | |
| download_enabled=False | |
| ) | |
| # ========================= | |
| # FastAPI app | |
| # ========================= | |
| app = FastAPI() | |
| async def root(): | |
| return {"message": "OCR API is running on Hugging Face π"} | |
| def run_ocr_on_image(image: Image.Image) -> str: | |
| """Convert PIL β numpy, run OCR, return plain text""" | |
| image_np = np.array(image) | |
| results = reader.readtext(image_np) | |
| # β only text, join all detections | |
| extracted_text = " ".join([str(text) for _, text, _ in results]) | |
| return extracted_text.strip() | |
| async def ocr(file: UploadFile = File(...)): | |
| try: | |
| contents = await file.read() | |
| if file.filename.lower().endswith(".pdf"): | |
| # β Convert PDF β images | |
| pages = convert_from_bytes(contents) | |
| # β Run OCR in parallel | |
| text_results = [] | |
| with ThreadPoolExecutor() as executor: | |
| page_texts = list(executor.map(run_ocr_on_image, pages)) | |
| for i, text in enumerate(page_texts, start=1): | |
| text_results.append({ | |
| "page": i, | |
| "text": text | |
| }) | |
| return JSONResponse(content={"pdf_results": text_results}) | |
| else: | |
| # β Single image case | |
| image = Image.open(io.BytesIO(contents)) | |
| text = run_ocr_on_image(image) | |
| return JSONResponse(content={"text": text}) | |
| except Exception as e: | |
| return JSONResponse(content={"error": str(e)}, status_code=500) | |