frkhan commited on
Commit
a2f903f
·
1 Parent(s): 6893f63

-- Included Github Rep, Story Link, Tech Stack Badges in the UI

Browse files

-- Added observability (langfuse)
-- Changed the main docker image and updated docker-compose accordingly
-- Included changes in Readme
-- Added torch, torchvision, langfuse as dependencies

Files changed (9) hide show
  1. .env.example +3 -0
  2. .gitignore +4 -0
  3. Dockerfile +15 -11
  4. Dockerfile.dev +21 -0
  5. README.md +46 -20
  6. app.py +73 -18
  7. docker-compose.dev.yml +18 -0
  8. docker-compose.yml +13 -6
  9. requirements.txt +7 -2
.env.example ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ LANGFUSE_PUBLIC_KEY="pk-lf-..."
2
+ LANGFUSE_SECRET_KEY="sk-lf-..."
3
+ LANGFUSE_HOST="https://cloud.langfuse.com" # Or your self-hosted instance
.gitignore CHANGED
@@ -22,4 +22,8 @@ docker-compose.override.yml
22
  .DS_Store
23
  Thumbs.db
24
  .EasyOCR/bengali.pth
 
25
  .EasyOCR/craft_mlt_25k.pth
 
 
 
 
22
  .DS_Store
23
  Thumbs.db
24
  .EasyOCR/bengali.pth
25
+ .EasyOCR/english.pth
26
  .EasyOCR/craft_mlt_25k.pth
27
+
28
+ # Persisted EasyOCR models from Docker volume
29
+ easyocr_models/
Dockerfile CHANGED
@@ -1,17 +1,21 @@
1
- FROM docker.io/pytorch/pytorch
2
-
3
- RUN apt-get update && apt-get install -y \
4
- libgl1-mesa-glx \
5
- libglib2.0-0 \
6
- libsm6 \
7
- libxext6 \
8
- libxrender-dev \
9
- && rm -rf /var/lib/apt/lists/*
10
 
 
11
  WORKDIR /app
12
- COPY requirements.txt app.py ./
13
 
14
- RUN pip install --upgrade pip
 
 
 
 
 
 
 
15
  RUN pip install --no-cache-dir -r requirements.txt
16
 
 
 
 
 
17
  CMD ["python", "app.py"]
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10-slim-bookworm
 
 
 
 
 
 
 
3
 
4
+ # Set the working directory in the container
5
  WORKDIR /app
 
6
 
7
+ # Upgrade system packages to patch vulnerabilities
8
+ RUN apt-get update && apt-get upgrade -y && apt-get clean
9
+
10
+ # Copy only the requirements file to leverage Docker cache
11
+ COPY requirements.txt .
12
+
13
+ # Install dependencies
14
+ # Using --no-cache-dir reduces image size
15
  RUN pip install --no-cache-dir -r requirements.txt
16
 
17
+ # Copy the rest of the application code
18
+ COPY . .
19
+
20
+ # Command to run the application
21
  CMD ["python", "app.py"]
Dockerfile.dev ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10-slim-bookworm
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Prevent python from writing pyc files to disc and buffer logs
8
+ ENV PYTHONDONTWRITEBYTECODE 1
9
+ ENV PYTHONUNBUFFERED 1
10
+
11
+ # Copy only the requirements file to leverage Docker cache
12
+ # This is the layer that takes a long time, but it will be cached after the first build.
13
+ COPY requirements.txt .
14
+
15
+ # Install dependencies
16
+ # Using --no-cache-dir reduces image size
17
+ RUN pip install -r requirements.txt
18
+
19
+ # Command to run the application
20
+ # The CMD will run the app from the volume mounted by docker-compose
21
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🧠
4
  colorFrom: indigo
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 5.42.0
8
  app_file: app.py
9
  pinned: false
10
  ---
@@ -28,7 +28,8 @@ This project is a lightweight Optical Character Recognition (OCR) web applicatio
28
 
29
  ## 🔗 Live Demo
30
 
31
- Try it out here: **[Bangla OCR Demo App](https://huggingface.co/spaces/frkhan/bangla-ocr)**
 
32
 
33
  ---
34
 
@@ -38,8 +39,9 @@ Try it out here: **[Bangla OCR Demo App](https://huggingface.co/spaces/frkhan/ba
38
  | ---------------- | -------------------------------- |
39
  | `Gradio` | Web interface for user input |
40
  | `EasyOCR` | Text extraction from images |
41
- | `OpenCV` | Image preprocessing |
42
  | `NumPy` | Array manipulation |
 
43
  | `Docker` | Containerized deployment |
44
  | `Docker Compose` | Service orchestration |
45
  | `PyTorch` | Backend for EasyOCR |
@@ -89,7 +91,7 @@ You can run the app in three different ways:
89
  python app.py
90
  ```
91
 
92
- The app will be running at `http://127.0.0.1:7860`.
93
 
94
  *(When you're finished, you can leave the virtual environment by simply running the `deactivate` command.)*
95
 
@@ -97,22 +99,42 @@ The app will be running at `http://127.0.0.1:7860`.
97
 
98
  1. **Build the Docker image:**
99
  ```bash
100
- docker build -t bangla-ocr .
101
  ```
102
 
103
  2. **Run the container:**
104
  ```bash
105
- docker run -p 7860:7860 bangla-ocr
106
  ```
107
- Open your browser and visit: [http://localhost:7860](http://localhost:7860)
108
 
109
  ### 🧱 Option 3: Docker Compose
110
 
111
- 1. **Start the app using Docker Compose:**
112
- ```bash
113
- docker-compose up --build
114
- ```
115
- Open your browser and visit: [http://localhost:7860](http://localhost:7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  ---
118
 
@@ -136,11 +158,13 @@ Extracted Text:
136
 
137
  ```text
138
  bangla-ocr-app/
139
- ├── app.py # Main application logic
140
- ├── requirements.txt # Python dependencies
141
- ├── Dockerfile # Container setup
142
- ├── docker-compose.yml # Multi-container orchestration
143
- └── README.md # Project documentation
 
 
144
  ```
145
 
146
  ## 🌍 Language Support
@@ -156,8 +180,8 @@ To add more languages, modify the following line in `app.py`:
156
  # From
157
  reader = easyocr.Reader(['bn', 'en'], gpu=True)
158
 
159
- # To (for example, adding Hindi)
160
- reader = easyocr.Reader(['bn', 'en', 'hi'], gpu=True)
161
  ```
162
 
163
 
@@ -172,6 +196,8 @@ This project is open-source and distributed under the **[MIT License](https://op
172
  - **[EasyOCR](https://github.com/JaidedAI/EasyOCR)** for its powerful and accessible multilingual OCR library.
173
  - **[Gradio](https://www.gradio.app/)** for making it incredibly simple to create machine learning interfaces.
174
  - **[PyTorch](https://pytorch.org/)** for powering the deep learning backend.
 
 
 
175
 
176
  > “Small tools, big impact.” — Let’s make machine learning approachable, one project at a time.
177
-
 
4
  colorFrom: indigo
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 5.46.1
8
  app_file: app.py
9
  pinned: false
10
  ---
 
28
 
29
  ## 🔗 Live Demo
30
 
31
+ - **Live App**: Try it out here: **[Bangla OCR Demo App](https://huggingface.co/spaces/frkhan/bangla-ocr)**
32
+ - **Project Story**: Curious about how this app was built? **[Read the full story on Medium](https://frkhan.medium.com/turning-pages-into-pixels-the-making-of-a-bangla-ocr-app-9022bbffcd60)** to see the journey from idea to deployment.
33
 
34
  ---
35
 
 
39
  | ---------------- | -------------------------------- |
40
  | `Gradio` | Web interface for user input |
41
  | `EasyOCR` | Text extraction from images |
42
+ | `OpenCV` | Image processing backend for EasyOCR |
43
  | `NumPy` | Array manipulation |
44
+ | `Langfuse` | Observability and tracing |
45
  | `Docker` | Containerized deployment |
46
  | `Docker Compose` | Service orchestration |
47
  | `PyTorch` | Backend for EasyOCR |
 
91
  python app.py
92
  ```
93
 
94
+ The app will be running at `http://127.0.0.1:12300`.
95
 
96
  *(When you're finished, you can leave the virtual environment by simply running the `deactivate` command.)*
97
 
 
99
 
100
  1. **Build the Docker image:**
101
  ```bash
102
+ docker build -t bangla-ocr-app .
103
  ```
104
 
105
  2. **Run the container:**
106
  ```bash
107
+ docker run -p 12300:7860 bangla-ocr-app
108
  ```
109
+ Open your browser and visit: http://localhost:12300
110
 
111
  ### 🧱 Option 3: Docker Compose
112
 
113
+ ```bash
114
+ # To Run in Live environment. It automatically uses the docker-compose.yml
115
+ docker-compose up --build
116
+
117
+ # Or If you use the latest docker compose command, use the following
118
+
119
+ docker compose up --build
120
+ ```
121
+
122
+ Access the app at http://localhost:12300
123
+
124
+ ---
125
+
126
+
127
+ ```bash
128
+ # To Run in local environment use docker-compose.dev.yml if you want to reflect your code changes without rebuilding docker container
129
+ docker-compose -f docker-compose.dev.yml up --build
130
+
131
+ # Or If you use the latest docker compose command, use the following
132
+ docker compose -f docker-compose.dev.yml up --build
133
+
134
+ ```
135
+
136
+ Access the app at http://localhost:12300
137
+
138
 
139
  ---
140
 
 
158
 
159
  ```text
160
  bangla-ocr-app/
161
+ ├── app.py # Main application logic
162
+ ├── requirements.txt # Python dependencies
163
+ ├── Dockerfile # Container setup
164
+ ├── docker-compose.yml # Multi-container orchestration for production
165
+ ├── docker-compose.dev.yml # Multi-container orchestration for development
166
+ ├── .env # Environment variables for Langfuse (Optional)
167
+ └── README.md # Project documentation
168
  ```
169
 
170
  ## 🌍 Language Support
 
180
  # From
181
  reader = easyocr.Reader(['bn', 'en'], gpu=True)
182
 
183
+ # To (for example, adding Hindi, Arabic, Urdu, Malay, Chinese, and Japanese)
184
+ reader = easyocr.Reader(['bn', 'en', 'hi', 'ar', 'ur', 'ms', 'ch_sim', 'ja'], gpu=True)
185
  ```
186
 
187
 
 
196
  - **[EasyOCR](https://github.com/JaidedAI/EasyOCR)** for its powerful and accessible multilingual OCR library.
197
  - **[Gradio](https://www.gradio.app/)** for making it incredibly simple to create machine learning interfaces.
198
  - **[PyTorch](https://pytorch.org/)** for powering the deep learning backend.
199
+ - **[Docker](https://www.docker.com)** — Containerization platform for reproducible deployment across environments.
200
+ - **[Hugging Face Spaces](https://huggingface.co/spaces)** — Free hosting platform for ML demos with secret management and GPU support.
201
+ - **[Langfuse](https://langfuse.com/)** for providing excellent observability tools.
202
 
203
  > “Small tools, big impact.” — Let’s make machine learning approachable, one project at a time.
 
app.py CHANGED
@@ -1,20 +1,40 @@
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import easyocr
3
  import numpy as np
4
- from PIL import Image
5
  import time
6
  from pathlib import Path
7
-
8
- from pathlib import Path
9
 
10
  # Use home directory for model storage
11
- model_dir = Path("/tmp/easyocr_models")
12
  model_dir.mkdir(parents=True, exist_ok=True)
13
 
14
- reader = easyocr.Reader(['bn', 'en'], gpu=True, model_storage_directory=str(model_dir))
15
-
16
 
 
17
  def ocr_image(image):
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  if image is None:
19
  return "No image uploaded.", ""
20
  start_time = time.time()
@@ -27,17 +47,52 @@ def ocr_image(image):
27
  human_time = f"⏱ Time taken: {duration:.2f} seconds"
28
  return extracted_text, human_time
29
 
30
- iface = gr.Interface(
31
- fn=ocr_image,
32
- inputs=gr.Image(type="pil"),
33
- outputs=[
34
- gr.Textbox(label="Extracted Text"),
35
- gr.Textbox(label="Duration")
36
- ],
37
- title="Bangla OCR Demo",
38
- description="Upload an image with Bangla text to extract it using EasyOCR.",
39
- allow_flagging="never"
40
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  if __name__ == "__main__":
43
- iface.launch(server_name="0.0.0.0")
 
1
+ """
2
+ A Gradio web application for performing Optical Character Recognition (OCR)
3
+ on images containing Bangla and English text using the EasyOCR library.
4
+
5
+ This application provides a simple user interface to upload an image and view
6
+ the extracted text along with the processing time. It is instrumented with
7
+ Langfuse for observability.
8
+ """
9
+
10
  import gradio as gr
11
  import easyocr
12
  import numpy as np
 
13
  import time
14
  from pathlib import Path
15
+ from langfuse import observe
 
16
 
17
  # Use home directory for model storage
18
+ model_dir = Path("/app/easyocr_models")
19
  model_dir.mkdir(parents=True, exist_ok=True)
20
 
21
+ reader = easyocr.Reader(['bn', 'en'], gpu=False, model_storage_directory=str(model_dir))
 
22
 
23
+ @observe()
24
  def ocr_image(image):
25
+ """
26
+ Performs OCR on an image to extract Bangla and English text.
27
+
28
+ This function is decorated with `@observe` to trace its execution with Langfuse.
29
+
30
+ Args:
31
+ image (PIL.Image.Image): The image uploaded by the user via the Gradio interface.
32
+
33
+ Returns:
34
+ tuple[str, str]: A tuple containing:
35
+ - The extracted text as a single string.
36
+ - A formatted string indicating the processing time.
37
+ """
38
  if image is None:
39
  return "No image uploaded.", ""
40
  start_time = time.time()
 
47
  human_time = f"⏱ Time taken: {duration:.2f} seconds"
48
  return extracted_text, human_time
49
 
50
+
51
+ with gr.Blocks() as demo:
52
+ gr.HTML("""
53
+ <div style="display: flex; align-items: center; gap: 20px; flex-wrap: wrap; margin-bottom: 20px;">
54
+ <h1 style="margin: 0;">🇧🇩 Bangla OCR App</h1>
55
+ <div style="display: flex; gap: 10px; flex-wrap: wrap;">
56
+ <a href="https://github.com/JaidedAI/EasyOCR" target="_blank">
57
+ <img src="https://img.shields.io/badge/EasyOCR-OCR%20Engine-green" alt="EasyOCR">
58
+ </a>
59
+ <a href="https://opencv.org/" target="_blank">
60
+ <img src="https://img.shields.io/badge/OpenCV-Image%20Processing-blue?logo=opencv" alt="OpenCV">
61
+ </a>
62
+ <a href="https://numpy.org/" target="_blank">
63
+ <img src="https://img.shields.io/badge/NumPy-Arrays-blue?logo=numpy" alt="NumPy">
64
+ </a>
65
+ <a href="https://pytorch.org/" target="_blank">
66
+ <img src="https://img.shields.io/badge/PyTorch-Backend-orange?logo=pytorch" alt="PyTorch">
67
+ </a>
68
+ <a href="https://langfuse.com/" target="_blank">
69
+ <img src="https://img.shields.io/badge/Langfuse-Observability-blue" alt="Langfuse">
70
+ </a>
71
+ </div>
72
+ </div>
73
+ <div style="display: flex; gap: 15px; flex-wrap: wrap; margin-bottom: 20px; align-items: center;">
74
+ <div>
75
+ <span style="font-size: 16px;">📦 <strong>Source Code:</strong></span>
76
+ <a href="https://github.com/KI-IAN/bangla-ocr-app.git" target="_blank"><img src="https://img.shields.io/badge/GitHub-Repo-blue?style=for-the-badge&logo=github" alt="GitHub Repo"></a>
77
+ </div>
78
+ <div>
79
+ <span style="font-size: 16px;">📖 <strong>Project Story:</strong></span>
80
+ <a href="https://frkhan.medium.com/turning-pages-into-pixels-the-making-of-a-bangla-ocr-app-9022bbffcd60" target="_blank"><img src="https://img.shields.io/badge/Medium-Read%20Story-black?style=for-the-badge&logo=medium" alt="Read Story on Medium"></a>
81
+ </div>
82
+ </div>
83
+ """)
84
+
85
+ gr.Interface(
86
+ fn=ocr_image,
87
+ inputs=gr.Image(type="pil"),
88
+ outputs=[
89
+ gr.Textbox(label="Extracted Text", lines=20),
90
+ gr.Textbox(label="Duration")
91
+ ],
92
+ title="Bangla OCR App",
93
+ description="Upload an image with Bangla text to extract it using EasyOCR.",
94
+ allow_flagging="never"
95
+ )
96
 
97
  if __name__ == "__main__":
98
+ demo.launch(server_name="0.0.0.0")
docker-compose.dev.yml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: "3.8"
2
+ services:
3
+ bangla-ocr-app:
4
+ build:
5
+ context: .
6
+ dockerfile: Dockerfile.dev
7
+ container_name: bangla-ocr-app
8
+ ports:
9
+ - "12300:7860"
10
+ volumes:
11
+ - .:/app # Mount the entire project directory for live code reloading
12
+ - ./easyocr_models:/app/easyocr_models # Persist downloaded OCR models
13
+ environment:
14
+ - LANGFUSE_PUBLIC_KEY=${LANGFUSE_PUBLIC_KEY} # Load this key from .env in local/dev environment
15
+ - LANGFUSE_SECRET_KEY=${LANGFUSE_SECRET_KEY} # Load this key from .env in local/dev environment
16
+ - LANGFUSE_HOST=${LANGFUSE_HOST} # Load this key from .env in local/dev environment
17
+ - EASYOCR_MODULE_PATH=/app/easyocr_models # Force all models into the persistent volume
18
+ restart: unless-stopped
docker-compose.yml CHANGED
@@ -1,10 +1,17 @@
1
  version: "3.8"
2
  services:
3
- gradio-app:
4
- build: .
 
 
 
5
  ports:
6
- - "7860:7860"
7
- # volumes:
8
- # - .:/app
9
  environment:
10
- - PYTHONUNBUFFERED=1
 
 
 
 
 
1
  version: "3.8"
2
  services:
3
+ bangla-ocr-app:
4
+ build:
5
+ context: .
6
+ dockerfile: Dockerfile
7
+ container_name: bangla-ocr-app
8
  ports:
9
+ - "12300:7860"
10
+ volumes:
11
+ - ./easyocr_models:/app/easyocr_models
12
  environment:
13
+ - LANGFUSE_PUBLIC_KEY=${LANGFUSE_PUBLIC_KEY} # Load this key from .env in local/dev environment
14
+ - LANGFUSE_SECRET_KEY=${LANGFUSE_SECRET_KEY} # Load this key from .env in local/dev environment
15
+ - LANGFUSE_HOST=${LANGFUSE_HOST} # Load this key from .env in local/dev environment
16
+ - EASYOCR_MODULE_PATH=/app/easyocr_models # Force all models into the persistent volume
17
+ restart: unless-stopped
requirements.txt CHANGED
@@ -1,4 +1,9 @@
1
- gradio==5.42.0
 
 
2
  easyocr==1.7.2
 
 
3
  opencv-python-headless==4.8.0.76
4
- numpy==1.26.4
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cpu
2
+
3
+ gradio==5.46.1
4
  easyocr==1.7.2
5
+ torch==2.1.2
6
+ torchvision==0.16.2
7
  opencv-python-headless==4.8.0.76
8
+ numpy==1.26.4
9
+ langfuse==3.5.2