Spaces:

Rivalcoder
/

OCR

Runtime error

App Files Files Community

Rivalcoder commited on Sep 18, 2025

Commit

2f465bd

1 Parent(s): 210935f

Add files

Browse files

Files changed (2) hide show

Dockerfile +11 -6
app.py +40 -37

Dockerfile CHANGED Viewed

@@ -12,22 +12,27 @@ RUN apt-get update && apt-get install -y \
 WORKDIR /app
-# Create writable dirs for EasyOCR
-RUN mkdir -p /app/.EasyOCR/user_network
-# Copy requirements first (cache layer)
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# ⚡ Pre-download EasyOCR model files at build time
 RUN python3 - <<'EOF'
 import easyocr, os
 MODEL_DIR = "/app/.EasyOCR"
 USER_NET_DIR = os.path.join(MODEL_DIR, "user_network")
 os.makedirs(MODEL_DIR, exist_ok=True)
 os.makedirs(USER_NET_DIR, exist_ok=True)
-# this will download weights into /app/.EasyOCR
-reader = easyocr.Reader(['en'], model_storage_directory=MODEL_DIR, user_network_directory=USER_NET_DIR)
 EOF
 # Copy app

 WORKDIR /app
+# EasyOCR model dir (must be writable)
+ENV EASY_OCR_DIR=/app/.EasyOCR
+RUN mkdir -p $EASY_OCR_DIR/user_network
+# Copy requirements
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# ⚡ Pre-download EasyOCR detection + recognition models
 RUN python3 - <<'EOF'
 import easyocr, os
 MODEL_DIR = "/app/.EasyOCR"
 USER_NET_DIR = os.path.join(MODEL_DIR, "user_network")
 os.makedirs(MODEL_DIR, exist_ok=True)
 os.makedirs(USER_NET_DIR, exist_ok=True)
+# preload both en + hi recognition + detection
+reader = easyocr.Reader(['en', 'hi'],
+    model_storage_directory=MODEL_DIR,
+    user_network_directory=USER_NET_DIR,
+    download_enabled=True
+)
 EOF
 # Copy app

app.py CHANGED Viewed

@@ -1,53 +1,56 @@
 import os
-import numpy as np
 import easyocr
-from fastapi import FastAPI, File, UploadFile
 from fastapi.responses import JSONResponse
-from pdf2image import convert_from_bytes
-from tempfile import NamedTemporaryFile
-import uvicorn
-app = FastAPI(title="OCR Backend API", description="Extract text from PDF or Images using EasyOCR")
-# Force EasyOCR to use writable directories inside /app
-MODEL_DIR = os.path.join(os.getcwd(), ".EasyOCR")
-USER_NET_DIR = os.path.join(os.getcwd(), ".EasyOCR", "user_network")
 os.makedirs(MODEL_DIR, exist_ok=True)
 os.makedirs(USER_NET_DIR, exist_ok=True)
-# Initialize EasyOCR Reader with both directories
 reader = easyocr.Reader(
-    ['en', 'hi'],
     model_storage_directory=MODEL_DIR,
-    user_network_directory=USER_NET_DIR
 )
-@app.post("/extract-text/")
-async def extract_text(file: UploadFile = File(...)):
     try:
         contents = await file.read()
-        extracted_text = ""
-        if file.filename.lower().endswith(".pdf"):
-            # Convert PDF to images
-            images = convert_from_bytes(contents)
-            for i, image in enumerate(images):
-                image_np = np.array(image)
-                result = reader.readtext(image_np)
-                page_text = " ".join([text for _, text, _ in result])
-                extracted_text += f"--- Page {i+1} ---\n{page_text}\n\n"
-        else:
-            # Treat as image
-            with NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
-                temp_file.write(contents)
-                temp_file.flush()
-                results = reader.readtext(temp_file.name)
-                extracted_text = " ".join([text for _, text, _ in results])
-        return JSONResponse({"extracted_text": extracted_text})
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))

 import os
+import io
 import easyocr
+from fastapi import FastAPI, UploadFile, File
 from fastapi.responses import JSONResponse
+from PIL import Image
+# =========================
+# EasyOCR config
+# =========================
+MODEL_DIR = "/app/.EasyOCR"
+USER_NET_DIR = os.path.join(MODEL_DIR, "user_network")
 os.makedirs(MODEL_DIR, exist_ok=True)
 os.makedirs(USER_NET_DIR, exist_ok=True)
+# ✅ preload reader with cached models
 reader = easyocr.Reader(
+    ['en', 'hi'],  # langs (can reduce to ['en'] if you want smaller image)
     model_storage_directory=MODEL_DIR,
+    user_network_directory=USER_NET_DIR,
+    download_enabled=False   # 🚫 no runtime downloads
 )
+# =========================
+# FastAPI app
+# =========================
+app = FastAPI()
+@app.get("/")
+async def root():
+    return {"message": "OCR API is running on Hugging Face 🚀"}
+@app.post("/ocr")
+async def ocr(file: UploadFile = File(...)):
     try:
+        # read image into memory
         contents = await file.read()
+        image = Image.open(io.BytesIO(contents))
+        # run OCR
+        results = reader.readtext(image)
+        # format results
+        text_results = []
+        for bbox, text, prob in results:
+            text_results.append({
+                "bbox": bbox,
+                "text": text,
+                "confidence": float(prob)
+            })
+        return JSONResponse(content={"results": text_results})
+    except Exception as e:
+        return JSONResponse(content={"error": str(e)}, status_code=500)