Spaces:

frkhan
/

bangla-ocr

Runtime error

frkhan commited on Oct 8, 2025

Commit

a2f903f

1 Parent(s): 6893f63

-- Included Github Rep, Story Link, Tech Stack Badges in the UI

-- Added observability (langfuse)
-- Changed the main docker image and updated docker-compose accordingly
-- Included changes in Readme
-- Added torch, torchvision, langfuse as dependencies

Files changed (9) hide show

.env.example +3 -0
.gitignore +4 -0
Dockerfile +15 -11
Dockerfile.dev +21 -0
README.md +46 -20
app.py +73 -18
docker-compose.dev.yml +18 -0
docker-compose.yml +13 -6
requirements.txt +7 -2

.env.example ADDED Viewed

	@@ -0,0 +1,3 @@

+LANGFUSE_PUBLIC_KEY="pk-lf-..."
+LANGFUSE_SECRET_KEY="sk-lf-..."
+LANGFUSE_HOST="https://cloud.langfuse.com" # Or your self-hosted instance

.gitignore CHANGED Viewed

@@ -22,4 +22,8 @@ docker-compose.override.yml
 .DS_Store
 Thumbs.db
 .EasyOCR/bengali.pth
 .EasyOCR/craft_mlt_25k.pth

 .DS_Store
 Thumbs.db
 .EasyOCR/bengali.pth
+.EasyOCR/english.pth
 .EasyOCR/craft_mlt_25k.pth
+# Persisted EasyOCR models from Docker volume
+easyocr_models/

Dockerfile CHANGED Viewed

@@ -1,17 +1,21 @@
-FROM docker.io/pytorch/pytorch
-RUN apt-get update && apt-get install -y \
-    libgl1-mesa-glx \
-    libglib2.0-0 \
-    libsm6 \
-    libxext6 \
-    libxrender-dev \
-    && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
-COPY requirements.txt app.py ./
-RUN pip install --upgrade pip
 RUN pip install --no-cache-dir -r requirements.txt
 CMD ["python", "app.py"]

+# Use an official Python runtime as a parent image
+FROM python:3.10-slim-bookworm
+# Set the working directory in the container
 WORKDIR /app
+# Upgrade system packages to patch vulnerabilities
+RUN apt-get update && apt-get upgrade -y && apt-get clean
+# Copy only the requirements file to leverage Docker cache
+COPY requirements.txt .
+# Install dependencies
+# Using --no-cache-dir reduces image size
 RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code
+COPY . .
+# Command to run the application
 CMD ["python", "app.py"]

Dockerfile.dev ADDED Viewed

	@@ -0,0 +1,21 @@

+# Use an official Python runtime as a parent image
+FROM python:3.10-slim-bookworm
+# Set the working directory in the container
+WORKDIR /app
+# Prevent python from writing pyc files to disc and buffer logs
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1
+# Copy only the requirements file to leverage Docker cache
+# This is the layer that takes a long time, but it will be cached after the first build.
+COPY requirements.txt .
+# Install dependencies
+# Using --no-cache-dir reduces image size
+RUN pip install -r requirements.txt
+# Command to run the application
+# The CMD will run the app from the volume mounted by docker-compose
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🧠
 colorFrom: indigo
 colorTo: pink
 sdk: gradio
-sdk_version: 5.42.0
 app_file: app.py
 pinned: false
 ---
@@ -28,7 +28,8 @@ This project is a lightweight Optical Character Recognition (OCR) web applicatio
 ## 🔗 Live Demo
-Try it out here: **[Bangla OCR Demo App](https://huggingface.co/spaces/frkhan/bangla-ocr)**
 ---
@@ -38,8 +39,9 @@ Try it out here: **[Bangla OCR Demo App](https://huggingface.co/spaces/frkhan/ba
 | ---------------- | -------------------------------- |
 | `Gradio`         | Web interface for user input     |
 | `EasyOCR`        | Text extraction from images      |
-| `OpenCV`         | Image preprocessing              |
 | `NumPy`          | Array manipulation               |
 | `Docker`         | Containerized deployment         |
 | `Docker Compose` | Service orchestration            |
 | `PyTorch`        | Backend for EasyOCR              |
@@ -89,7 +91,7 @@ You can run the app in three different ways:
     python app.py
     ```
-The app will be running at `http://127.0.0.1:7860`.
 *(When you're finished, you can leave the virtual environment by simply running the `deactivate` command.)*
@@ -97,22 +99,42 @@ The app will be running at `http://127.0.0.1:7860`.
 1.  **Build the Docker image:**
     ```bash
-    docker build -t bangla-ocr .
     ```
 2.  **Run the container:**
     ```bash
-    docker run -p 7860:7860 bangla-ocr
     ```
-    Open your browser and visit: [http://localhost:7860](http://localhost:7860)
 ### 🧱 Option 3: Docker Compose
-1.  **Start the app using Docker Compose:**
-    ```bash
-    docker-compose up --build
-    ```
-    Open your browser and visit: [http://localhost:7860](http://localhost:7860)
 ---
@@ -136,11 +158,13 @@ Extracted Text:
 ```text
 bangla-ocr-app/
-├── app.py               # Main application logic
-├── requirements.txt     # Python dependencies
-├── Dockerfile           # Container setup
-├── docker-compose.yml   # Multi-container orchestration
-└── README.md            # Project documentation
 ```
 ## 🌍 Language Support
@@ -156,8 +180,8 @@ To add more languages, modify the following line in `app.py`:
 # From
 reader = easyocr.Reader(['bn', 'en'], gpu=True)
-# To (for example, adding Hindi)
-reader = easyocr.Reader(['bn', 'en', 'hi'], gpu=True)
 ```
@@ -172,6 +196,8 @@ This project is open-source and distributed under the **[MIT License](https://op
 - **[EasyOCR](https://github.com/JaidedAI/EasyOCR)** for its powerful and accessible multilingual OCR library.
 - **[Gradio](https://www.gradio.app/)** for making it incredibly simple to create machine learning interfaces.
 - **[PyTorch](https://pytorch.org/)** for powering the deep learning backend.
 > “Small tools, big impact.” — Let’s make machine learning approachable, one project at a time.

 colorFrom: indigo
 colorTo: pink
 sdk: gradio
+sdk_version: 5.46.1
 app_file: app.py
 pinned: false
 ---
 ## 🔗 Live Demo
+- **Live App**: Try it out here: **[Bangla OCR Demo App](https://huggingface.co/spaces/frkhan/bangla-ocr)**
+- **Project Story**: Curious about how this app was built? **[Read the full story on Medium](https://frkhan.medium.com/turning-pages-into-pixels-the-making-of-a-bangla-ocr-app-9022bbffcd60)** to see the journey from idea to deployment.
 ---
 | ---------------- | -------------------------------- |
 | `Gradio`         | Web interface for user input     |
 | `EasyOCR`        | Text extraction from images      |
+| `OpenCV`         | Image processing backend for EasyOCR |
 | `NumPy`          | Array manipulation               |
+| `Langfuse`       | Observability and tracing        |
 | `Docker`         | Containerized deployment         |
 | `Docker Compose` | Service orchestration            |
 | `PyTorch`        | Backend for EasyOCR              |
     python app.py
     ```
+The app will be running at `http://127.0.0.1:12300`.
 *(When you're finished, you can leave the virtual environment by simply running the `deactivate` command.)*
 1.  **Build the Docker image:**
     ```bash
+    docker build -t bangla-ocr-app .
     ```
 2.  **Run the container:**
     ```bash
+    docker run -p 12300:7860 bangla-ocr-app
     ```
+    Open your browser and visit: http://localhost:12300
 ### 🧱 Option 3: Docker Compose
+```bash
+# To Run in Live environment. It automatically uses the docker-compose.yml
+docker-compose up --build
+# Or If you use the latest docker compose command, use the following
+docker compose up --build
+```
+Access the app at http://localhost:12300
+---
+```bash
+# To Run in local environment use docker-compose.dev.yml if you want to reflect your code changes without rebuilding docker container
+docker-compose -f docker-compose.dev.yml up --build
+# Or If you use the latest docker compose command, use the following
+docker compose -f docker-compose.dev.yml up --build
+```
+Access the app at http://localhost:12300
 ---
 ```text
 bangla-ocr-app/
+├── app.py                  # Main application logic
+├── requirements.txt        # Python dependencies
+├── Dockerfile              # Container setup
+├── docker-compose.yml      # Multi-container orchestration for production
+├── docker-compose.dev.yml  # Multi-container orchestration for development
+├── .env                    # Environment variables for Langfuse (Optional)
+└── README.md               # Project documentation
 ```
 ## 🌍 Language Support
 # From
 reader = easyocr.Reader(['bn', 'en'], gpu=True)
+# To (for example, adding Hindi, Arabic, Urdu, Malay, Chinese, and Japanese)
+reader = easyocr.Reader(['bn', 'en', 'hi', 'ar', 'ur', 'ms', 'ch_sim', 'ja'], gpu=True)
 ```
 - **[EasyOCR](https://github.com/JaidedAI/EasyOCR)** for its powerful and accessible multilingual OCR library.
 - **[Gradio](https://www.gradio.app/)** for making it incredibly simple to create machine learning interfaces.
 - **[PyTorch](https://pytorch.org/)** for powering the deep learning backend.
+- **[Docker](https://www.docker.com)** — Containerization platform for reproducible deployment across environments.
+- **[Hugging Face Spaces](https://huggingface.co/spaces)** — Free hosting platform for ML demos with secret management and GPU support.
+-   **[Langfuse](https://langfuse.com/)** for providing excellent observability tools.
 > “Small tools, big impact.” — Let’s make machine learning approachable, one project at a time.

app.py CHANGED Viewed

@@ -1,20 +1,40 @@
 import gradio as gr
 import easyocr
 import numpy as np
-from PIL import Image
 import time
 from pathlib import Path
-from pathlib import Path
 # Use home directory for model storage
-model_dir = Path("/tmp/easyocr_models")
 model_dir.mkdir(parents=True, exist_ok=True)
-reader = easyocr.Reader(['bn', 'en'], gpu=True, model_storage_directory=str(model_dir))
 def ocr_image(image):
     if image is None:
         return "No image uploaded.", ""
     start_time = time.time()
@@ -27,17 +47,52 @@ def ocr_image(image):
     human_time = f"⏱ Time taken: {duration:.2f} seconds"
     return extracted_text, human_time
-iface = gr.Interface(
-    fn=ocr_image,
-    inputs=gr.Image(type="pil"),
-    outputs=[
-        gr.Textbox(label="Extracted Text"),
-        gr.Textbox(label="Duration")
-    ],
-    title="Bangla OCR Demo",
-    description="Upload an image with Bangla text to extract it using EasyOCR.",
-    allow_flagging="never"
-)
 if __name__ == "__main__":
-    iface.launch(server_name="0.0.0.0")

+"""
+A Gradio web application for performing Optical Character Recognition (OCR)
+on images containing Bangla and English text using the EasyOCR library.
+This application provides a simple user interface to upload an image and view
+the extracted text along with the processing time. It is instrumented with
+Langfuse for observability.
+"""
 import gradio as gr
 import easyocr
 import numpy as np
 import time
 from pathlib import Path
+from langfuse import observe
 # Use home directory for model storage
+model_dir = Path("/app/easyocr_models")
 model_dir.mkdir(parents=True, exist_ok=True)
+reader = easyocr.Reader(['bn', 'en'], gpu=False, model_storage_directory=str(model_dir))
+@observe()
 def ocr_image(image):
+    """
+    Performs OCR on an image to extract Bangla and English text.
+    This function is decorated with `@observe` to trace its execution with Langfuse.
+    Args:
+        image (PIL.Image.Image): The image uploaded by the user via the Gradio interface.
+    Returns:
+        tuple[str, str]: A tuple containing:
+            - The extracted text as a single string.
+            - A formatted string indicating the processing time.
+    """
     if image is None:
         return "No image uploaded.", ""
     start_time = time.time()
     human_time = f"⏱ Time taken: {duration:.2f} seconds"
     return extracted_text, human_time
+with gr.Blocks() as demo:
+    gr.HTML("""
+    <div style="display: flex; align-items: center; gap: 20px; flex-wrap: wrap; margin-bottom: 20px;">
+        <h1 style="margin: 0;">🇧🇩 Bangla OCR App</h1>
+        <div style="display: flex; gap: 10px; flex-wrap: wrap;">
+            <a href="https://github.com/JaidedAI/EasyOCR" target="_blank">
+                <img src="https://img.shields.io/badge/EasyOCR-OCR%20Engine-green" alt="EasyOCR">
+            </a>
+            <a href="https://opencv.org/" target="_blank">
+                <img src="https://img.shields.io/badge/OpenCV-Image%20Processing-blue?logo=opencv" alt="OpenCV">
+            </a>
+            <a href="https://numpy.org/" target="_blank">
+                <img src="https://img.shields.io/badge/NumPy-Arrays-blue?logo=numpy" alt="NumPy">
+            </a>
+            <a href="https://pytorch.org/" target="_blank">
+                <img src="https://img.shields.io/badge/PyTorch-Backend-orange?logo=pytorch" alt="PyTorch">
+            </a>
+            <a href="https://langfuse.com/" target="_blank">
+                <img src="https://img.shields.io/badge/Langfuse-Observability-blue" alt="Langfuse">
+            </a>
+        </div>
+    </div>
+    <div style="display: flex; gap: 15px; flex-wrap: wrap; margin-bottom: 20px; align-items: center;">
+        <div>
+            <span style="font-size: 16px;">📦 <strong>Source Code:</strong></span>
+            <a href="https://github.com/KI-IAN/bangla-ocr-app.git" target="_blank"><img src="https://img.shields.io/badge/GitHub-Repo-blue?style=for-the-badge&logo=github" alt="GitHub Repo"></a>
+        </div>
+        <div>
+            <span style="font-size: 16px;">📖 <strong>Project Story:</strong></span>
+            <a href="https://frkhan.medium.com/turning-pages-into-pixels-the-making-of-a-bangla-ocr-app-9022bbffcd60" target="_blank"><img src="https://img.shields.io/badge/Medium-Read%20Story-black?style=for-the-badge&logo=medium" alt="Read Story on Medium"></a>
+        </div>
+    </div>
+    """)
+    gr.Interface(
+        fn=ocr_image,
+        inputs=gr.Image(type="pil"),
+        outputs=[
+            gr.Textbox(label="Extracted Text", lines=20),
+            gr.Textbox(label="Duration")
+        ],
+        title="Bangla OCR App",
+        description="Upload an image with Bangla text to extract it using EasyOCR.",
+        allow_flagging="never"
+    )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0")

docker-compose.dev.yml ADDED Viewed

	@@ -0,0 +1,18 @@

+version: "3.8"
+services:
+  bangla-ocr-app:
+    build:
+      context: .
+      dockerfile: Dockerfile.dev
+    container_name: bangla-ocr-app
+    ports:
+      - "12300:7860"
+    volumes:
+      - .:/app  # Mount the entire project directory for live code reloading
+      - ./easyocr_models:/app/easyocr_models # Persist downloaded OCR models
+    environment:
+      - LANGFUSE_PUBLIC_KEY=${LANGFUSE_PUBLIC_KEY}  # Load this key from .env in local/dev environment
+      - LANGFUSE_SECRET_KEY=${LANGFUSE_SECRET_KEY}  # Load this key from .env in local/dev environment
+      - LANGFUSE_HOST=${LANGFUSE_HOST}  # Load this key from .env in local/dev environment
+      - EASYOCR_MODULE_PATH=/app/easyocr_models  # Force all models into the persistent volume
+    restart: unless-stopped

docker-compose.yml CHANGED Viewed

@@ -1,10 +1,17 @@
 version: "3.8"
 services:
-  gradio-app:
-    build: .
     ports:
-      - "7860:7860"
-    # volumes:
-    #   - .:/app
     environment:
-      - PYTHONUNBUFFERED=1

 version: "3.8"
 services:
+  bangla-ocr-app:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: bangla-ocr-app
     ports:
+      - "12300:7860"
+    volumes:
+      - ./easyocr_models:/app/easyocr_models
     environment:
+      - LANGFUSE_PUBLIC_KEY=${LANGFUSE_PUBLIC_KEY}  # Load this key from .env in local/dev environment
+      - LANGFUSE_SECRET_KEY=${LANGFUSE_SECRET_KEY}  # Load this key from .env in local/dev environment
+      - LANGFUSE_HOST=${LANGFUSE_HOST}  # Load this key from .env in local/dev environment
+      - EASYOCR_MODULE_PATH=/app/easyocr_models  # Force all models into the persistent volume
+    restart: unless-stopped

requirements.txt CHANGED Viewed

@@ -1,4 +1,9 @@
-gradio==5.42.0
 easyocr==1.7.2
 opencv-python-headless==4.8.0.76
-numpy==1.26.4

+--extra-index-url https://download.pytorch.org/whl/cpu
+gradio==5.46.1
 easyocr==1.7.2
+torch==2.1.2
+torchvision==0.16.2
 opencv-python-headless==4.8.0.76
+numpy==1.26.4
+langfuse==3.5.2