mrrtmob commited on
Commit
5543d33
Β·
verified Β·
1 Parent(s): bb956f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -30
app.py CHANGED
@@ -2,33 +2,40 @@
2
  Kiri OCR - Gradio Demo for Hugging Face Spaces
3
 
4
  A lightweight OCR library for English and Khmer documents.
5
- Supports ZeroGPU for free GPU inference.
6
  """
7
  import gradio as gr
8
  import numpy as np
9
  from PIL import Image
10
  import cv2
11
- import spaces
12
-
13
- # Global OCR instance
14
- ocr = None
15
 
16
 
17
- def load_ocr(device="cuda"):
 
18
  """Load the OCR model."""
19
  from kiri_ocr import OCR
20
  return OCR(
21
  model_path="mrrtmob/kiri-ocr",
22
  det_method="db",
23
- device=device,
24
  verbose=False
25
  )
26
 
27
 
28
- @spaces.GPU(duration=60)
 
 
 
 
 
 
 
 
 
 
 
29
  def process_image(image, mode="lines", show_boxes=True):
30
  """
31
- Process an image and extract text using GPU.
32
 
33
  Args:
34
  image: Input image (PIL Image or numpy array)
@@ -38,15 +45,11 @@ def process_image(image, mode="lines", show_boxes=True):
38
  Returns:
39
  Tuple of (annotated_image, extracted_text, detailed_results)
40
  """
41
- global ocr
42
-
43
  if image is None:
44
  return None, "Please upload an image.", ""
45
 
46
  try:
47
- # Load OCR with GPU
48
- if ocr is None:
49
- ocr = load_ocr(device="cuda")
50
 
51
  # Convert to numpy array if needed
52
  if isinstance(image, Image.Image):
@@ -67,16 +70,16 @@ def process_image(image, mode="lines", show_boxes=True):
67
 
68
  # Save temp file for processing
69
  import tempfile
70
- import os
71
  with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
72
  temp_path = f.name
73
 
74
  cv2.imwrite(temp_path, img_display)
75
 
76
  # Process document
77
- results = ocr.process_document(temp_path, mode=mode, verbose=False)
78
 
79
  # Clean up temp file
 
80
  os.unlink(temp_path)
81
 
82
  if not results:
@@ -146,10 +149,9 @@ def process_image(image, mode="lines", show_boxes=True):
146
  return image, error_msg, ""
147
 
148
 
149
- @spaces.GPU(duration=30)
150
  def recognize_single_line(image):
151
  """
152
- Recognize text from a single-line image (no detection) using GPU.
153
 
154
  Args:
155
  image: Input image containing a single line of text
@@ -157,15 +159,11 @@ def recognize_single_line(image):
157
  Returns:
158
  Tuple of (text, confidence)
159
  """
160
- global ocr
161
-
162
  if image is None:
163
  return "Please upload an image.", ""
164
 
165
  try:
166
- # Load OCR with GPU
167
- if ocr is None:
168
- ocr = load_ocr(device="cuda")
169
 
170
  # Convert to numpy array
171
  if isinstance(image, Image.Image):
@@ -186,9 +184,9 @@ def recognize_single_line(image):
186
  # Preprocess and recognize
187
  from kiri_ocr.model import preprocess_pil
188
  img_pil = Image.fromarray(img_gray)
189
- img_tensor = preprocess_pil(ocr.cfg, img_pil)
190
 
191
- text, confidence = ocr.recognize_region(img_tensor)
192
 
193
  return text, f"Confidence: {confidence*100:.1f}%"
194
 
@@ -205,8 +203,6 @@ with gr.Blocks(title="Kiri OCR - Khmer & English OCR") as demo:
205
  **Lightweight OCR for English and Khmer documents**
206
 
207
  Upload an image containing text and get the extracted text. Supports both English and Khmer languages.
208
-
209
- πŸš€ **Powered by ZeroGPU** - Free GPU inference!
210
  """
211
  )
212
 
@@ -287,11 +283,10 @@ with gr.Blocks(title="Kiri OCR - Khmer & English OCR") as demo:
287
  Kiri OCR is a lightweight OCR library designed for **English** and **Khmer** documents.
288
 
289
  ### Features
290
- - πŸš€ **Fast**: Optimized for quick text extraction with GPU acceleration
291
  - 🎯 **Accurate**: Transformer-based architecture with CTC + Attention decoder
292
  - 🌏 **Multilingual**: Supports English and Khmer text
293
  - πŸ“¦ **Lightweight**: Easy to deploy and use
294
- - ⚑ **ZeroGPU**: Free GPU inference on Hugging Face Spaces
295
 
296
  ### Technical Details
297
  - **Model Architecture**: CNN backbone + Transformer encoder + CTC/Attention decoder
@@ -331,4 +326,4 @@ with gr.Blocks(title="Kiri OCR - Khmer & English OCR") as demo:
331
 
332
  # Launch
333
  if __name__ == "__main__":
334
- demo.launch()
 
2
  Kiri OCR - Gradio Demo for Hugging Face Spaces
3
 
4
  A lightweight OCR library for English and Khmer documents.
 
5
  """
6
  import gradio as gr
7
  import numpy as np
8
  from PIL import Image
9
  import cv2
 
 
 
 
10
 
11
 
12
+ # Initialize OCR
13
+ def load_ocr():
14
  """Load the OCR model."""
15
  from kiri_ocr import OCR
16
  return OCR(
17
  model_path="mrrtmob/kiri-ocr",
18
  det_method="db",
19
+ device="cpu",
20
  verbose=False
21
  )
22
 
23
 
24
+ # Global OCR instance (loaded once)
25
+ ocr = None
26
+
27
+
28
+ def get_ocr():
29
+ """Get or create OCR instance."""
30
+ global ocr
31
+ if ocr is None:
32
+ ocr = load_ocr()
33
+ return ocr
34
+
35
+
36
  def process_image(image, mode="lines", show_boxes=True):
37
  """
38
+ Process an image and extract text.
39
 
40
  Args:
41
  image: Input image (PIL Image or numpy array)
 
45
  Returns:
46
  Tuple of (annotated_image, extracted_text, detailed_results)
47
  """
 
 
48
  if image is None:
49
  return None, "Please upload an image.", ""
50
 
51
  try:
52
+ ocr_engine = get_ocr()
 
 
53
 
54
  # Convert to numpy array if needed
55
  if isinstance(image, Image.Image):
 
70
 
71
  # Save temp file for processing
72
  import tempfile
 
73
  with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
74
  temp_path = f.name
75
 
76
  cv2.imwrite(temp_path, img_display)
77
 
78
  # Process document
79
+ results = ocr_engine.process_document(temp_path, mode=mode, verbose=False)
80
 
81
  # Clean up temp file
82
+ import os
83
  os.unlink(temp_path)
84
 
85
  if not results:
 
149
  return image, error_msg, ""
150
 
151
 
 
152
  def recognize_single_line(image):
153
  """
154
+ Recognize text from a single-line image (no detection).
155
 
156
  Args:
157
  image: Input image containing a single line of text
 
159
  Returns:
160
  Tuple of (text, confidence)
161
  """
 
 
162
  if image is None:
163
  return "Please upload an image.", ""
164
 
165
  try:
166
+ ocr_engine = get_ocr()
 
 
167
 
168
  # Convert to numpy array
169
  if isinstance(image, Image.Image):
 
184
  # Preprocess and recognize
185
  from kiri_ocr.model import preprocess_pil
186
  img_pil = Image.fromarray(img_gray)
187
+ img_tensor = preprocess_pil(ocr_engine.cfg, img_pil)
188
 
189
+ text, confidence = ocr_engine.recognize_region(img_tensor)
190
 
191
  return text, f"Confidence: {confidence*100:.1f}%"
192
 
 
203
  **Lightweight OCR for English and Khmer documents**
204
 
205
  Upload an image containing text and get the extracted text. Supports both English and Khmer languages.
 
 
206
  """
207
  )
208
 
 
283
  Kiri OCR is a lightweight OCR library designed for **English** and **Khmer** documents.
284
 
285
  ### Features
286
+ - πŸš€ **Fast**: Optimized for quick text extraction
287
  - 🎯 **Accurate**: Transformer-based architecture with CTC + Attention decoder
288
  - 🌏 **Multilingual**: Supports English and Khmer text
289
  - πŸ“¦ **Lightweight**: Easy to deploy and use
 
290
 
291
  ### Technical Details
292
  - **Model Architecture**: CNN backbone + Transformer encoder + CTC/Attention decoder
 
326
 
327
  # Launch
328
  if __name__ == "__main__":
329
+ demo.launch()