Rivalcoder commited on
Commit
2f465bd
Β·
1 Parent(s): 210935f
Files changed (2) hide show
  1. Dockerfile +11 -6
  2. app.py +40 -37
Dockerfile CHANGED
@@ -12,22 +12,27 @@ RUN apt-get update && apt-get install -y \
12
 
13
  WORKDIR /app
14
 
15
- # Create writable dirs for EasyOCR
16
- RUN mkdir -p /app/.EasyOCR/user_network
 
17
 
18
- # Copy requirements first (cache layer)
19
  COPY requirements.txt .
20
  RUN pip install --no-cache-dir -r requirements.txt
21
 
22
- # ⚑ Pre-download EasyOCR model files at build time
23
  RUN python3 - <<'EOF'
24
  import easyocr, os
25
  MODEL_DIR = "/app/.EasyOCR"
26
  USER_NET_DIR = os.path.join(MODEL_DIR, "user_network")
27
  os.makedirs(MODEL_DIR, exist_ok=True)
28
  os.makedirs(USER_NET_DIR, exist_ok=True)
29
- # this will download weights into /app/.EasyOCR
30
- reader = easyocr.Reader(['en'], model_storage_directory=MODEL_DIR, user_network_directory=USER_NET_DIR)
 
 
 
 
31
  EOF
32
 
33
  # Copy app
 
12
 
13
  WORKDIR /app
14
 
15
+ # EasyOCR model dir (must be writable)
16
+ ENV EASY_OCR_DIR=/app/.EasyOCR
17
+ RUN mkdir -p $EASY_OCR_DIR/user_network
18
 
19
+ # Copy requirements
20
  COPY requirements.txt .
21
  RUN pip install --no-cache-dir -r requirements.txt
22
 
23
+ # ⚑ Pre-download EasyOCR detection + recognition models
24
  RUN python3 - <<'EOF'
25
  import easyocr, os
26
  MODEL_DIR = "/app/.EasyOCR"
27
  USER_NET_DIR = os.path.join(MODEL_DIR, "user_network")
28
  os.makedirs(MODEL_DIR, exist_ok=True)
29
  os.makedirs(USER_NET_DIR, exist_ok=True)
30
+ # preload both en + hi recognition + detection
31
+ reader = easyocr.Reader(['en', 'hi'],
32
+ model_storage_directory=MODEL_DIR,
33
+ user_network_directory=USER_NET_DIR,
34
+ download_enabled=True
35
+ )
36
  EOF
37
 
38
  # Copy app
app.py CHANGED
@@ -1,53 +1,56 @@
1
  import os
2
- import numpy as np
3
  import easyocr
4
- from fastapi import FastAPI, File, UploadFile
5
  from fastapi.responses import JSONResponse
6
- from pdf2image import convert_from_bytes
7
- from tempfile import NamedTemporaryFile
8
- import uvicorn
9
 
10
- app = FastAPI(title="OCR Backend API", description="Extract text from PDF or Images using EasyOCR")
 
 
 
 
11
 
12
- # Force EasyOCR to use writable directories inside /app
13
- MODEL_DIR = os.path.join(os.getcwd(), ".EasyOCR")
14
- USER_NET_DIR = os.path.join(os.getcwd(), ".EasyOCR", "user_network")
15
  os.makedirs(MODEL_DIR, exist_ok=True)
16
  os.makedirs(USER_NET_DIR, exist_ok=True)
17
 
18
- # Initialize EasyOCR Reader with both directories
19
  reader = easyocr.Reader(
20
- ['en', 'hi'],
21
  model_storage_directory=MODEL_DIR,
22
- user_network_directory=USER_NET_DIR
 
23
  )
24
 
25
- @app.post("/extract-text/")
26
- async def extract_text(file: UploadFile = File(...)):
 
 
 
 
 
 
 
 
 
27
  try:
 
28
  contents = await file.read()
29
- extracted_text = ""
30
-
31
- if file.filename.lower().endswith(".pdf"):
32
- # Convert PDF to images
33
- images = convert_from_bytes(contents)
34
- for i, image in enumerate(images):
35
- image_np = np.array(image)
36
- result = reader.readtext(image_np)
37
- page_text = " ".join([text for _, text, _ in result])
38
- extracted_text += f"--- Page {i+1} ---\n{page_text}\n\n"
39
- else:
40
- # Treat as image
41
- with NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
42
- temp_file.write(contents)
43
- temp_file.flush()
44
- results = reader.readtext(temp_file.name)
45
- extracted_text = " ".join([text for _, text, _ in results])
46
-
47
- return JSONResponse({"extracted_text": extracted_text})
48
 
49
- except Exception as e:
50
- return JSONResponse({"error": str(e)}, status_code=500)
51
 
52
- if __name__ == "__main__":
53
- uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import io
3
  import easyocr
4
+ from fastapi import FastAPI, UploadFile, File
5
  from fastapi.responses import JSONResponse
6
+ from PIL import Image
 
 
7
 
8
+ # =========================
9
+ # EasyOCR config
10
+ # =========================
11
+ MODEL_DIR = "/app/.EasyOCR"
12
+ USER_NET_DIR = os.path.join(MODEL_DIR, "user_network")
13
 
 
 
 
14
  os.makedirs(MODEL_DIR, exist_ok=True)
15
  os.makedirs(USER_NET_DIR, exist_ok=True)
16
 
17
+ # βœ… preload reader with cached models
18
  reader = easyocr.Reader(
19
+ ['en', 'hi'], # langs (can reduce to ['en'] if you want smaller image)
20
  model_storage_directory=MODEL_DIR,
21
+ user_network_directory=USER_NET_DIR,
22
+ download_enabled=False # 🚫 no runtime downloads
23
  )
24
 
25
+ # =========================
26
+ # FastAPI app
27
+ # =========================
28
+ app = FastAPI()
29
+
30
+ @app.get("/")
31
+ async def root():
32
+ return {"message": "OCR API is running on Hugging Face πŸš€"}
33
+
34
+ @app.post("/ocr")
35
+ async def ocr(file: UploadFile = File(...)):
36
  try:
37
+ # read image into memory
38
  contents = await file.read()
39
+ image = Image.open(io.BytesIO(contents))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # run OCR
42
+ results = reader.readtext(image)
43
 
44
+ # format results
45
+ text_results = []
46
+ for bbox, text, prob in results:
47
+ text_results.append({
48
+ "bbox": bbox,
49
+ "text": text,
50
+ "confidence": float(prob)
51
+ })
52
+
53
+ return JSONResponse(content={"results": text_results})
54
+
55
+ except Exception as e:
56
+ return JSONResponse(content={"error": str(e)}, status_code=500)