Spaces:

mrrtmob
/

kiri-ocr

Running

App Files Files Community

mrrtmob commited on 19 days ago

Commit

5543d33

verified ·

1 Parent(s): bb956f6

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -30

app.py CHANGED Viewed

@@ -2,33 +2,40 @@
 Kiri OCR - Gradio Demo for Hugging Face Spaces
 A lightweight OCR library for English and Khmer documents.
-Supports ZeroGPU for free GPU inference.
 """
 import gradio as gr
 import numpy as np
 from PIL import Image
 import cv2
-import spaces
-# Global OCR instance
-ocr = None
-def load_ocr(device="cuda"):
     """Load the OCR model."""
     from kiri_ocr import OCR
     return OCR(
         model_path="mrrtmob/kiri-ocr",
         det_method="db",
-        device=device,
         verbose=False
     )
-@spaces.GPU(duration=60)
 def process_image(image, mode="lines", show_boxes=True):
     """
-    Process an image and extract text using GPU.
     Args:
         image: Input image (PIL Image or numpy array)
@@ -38,15 +45,11 @@ def process_image(image, mode="lines", show_boxes=True):
     Returns:
         Tuple of (annotated_image, extracted_text, detailed_results)
     """
-    global ocr
     if image is None:
         return None, "Please upload an image.", ""
     try:
-        # Load OCR with GPU
-        if ocr is None:
-            ocr = load_ocr(device="cuda")
         # Convert to numpy array if needed
         if isinstance(image, Image.Image):
@@ -67,16 +70,16 @@ def process_image(image, mode="lines", show_boxes=True):
         # Save temp file for processing
         import tempfile
-        import os
         with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
             temp_path = f.name
         cv2.imwrite(temp_path, img_display)
         # Process document
-        results = ocr.process_document(temp_path, mode=mode, verbose=False)
         # Clean up temp file
         os.unlink(temp_path)
         if not results:
@@ -146,10 +149,9 @@ def process_image(image, mode="lines", show_boxes=True):
         return image, error_msg, ""
-@spaces.GPU(duration=30)
 def recognize_single_line(image):
     """
-    Recognize text from a single-line image (no detection) using GPU.
     Args:
         image: Input image containing a single line of text
@@ -157,15 +159,11 @@ def recognize_single_line(image):
     Returns:
         Tuple of (text, confidence)
     """
-    global ocr
     if image is None:
         return "Please upload an image.", ""
     try:
-        # Load OCR with GPU
-        if ocr is None:
-            ocr = load_ocr(device="cuda")
         # Convert to numpy array
         if isinstance(image, Image.Image):
@@ -186,9 +184,9 @@ def recognize_single_line(image):
         # Preprocess and recognize
         from kiri_ocr.model import preprocess_pil
         img_pil = Image.fromarray(img_gray)
-        img_tensor = preprocess_pil(ocr.cfg, img_pil)
-        text, confidence = ocr.recognize_region(img_tensor)
         return text, f"Confidence: {confidence*100:.1f}%"
@@ -205,8 +203,6 @@ with gr.Blocks(title="Kiri OCR - Khmer & English OCR") as demo:
         **Lightweight OCR for English and Khmer documents**
         Upload an image containing text and get the extracted text. Supports both English and Khmer languages.
-        🚀 **Powered by ZeroGPU** - Free GPU inference!
         """
     )
@@ -287,11 +283,10 @@ with gr.Blocks(title="Kiri OCR - Khmer & English OCR") as demo:
                 Kiri OCR is a lightweight OCR library designed for **English** and **Khmer** documents.
                 ### Features
-                - 🚀 **Fast**: Optimized for quick text extraction with GPU acceleration
                 - 🎯 **Accurate**: Transformer-based architecture with CTC + Attention decoder
                 - 🌏 **Multilingual**: Supports English and Khmer text
                 - 📦 **Lightweight**: Easy to deploy and use
-                - ⚡ **ZeroGPU**: Free GPU inference on Hugging Face Spaces
                 ### Technical Details
                 - **Model Architecture**: CNN backbone + Transformer encoder + CTC/Attention decoder
@@ -331,4 +326,4 @@ with gr.Blocks(title="Kiri OCR - Khmer & English OCR") as demo:
 # Launch
 if __name__ == "__main__":
-    demo.launch()

 Kiri OCR - Gradio Demo for Hugging Face Spaces
 A lightweight OCR library for English and Khmer documents.
 """
 import gradio as gr
 import numpy as np
 from PIL import Image
 import cv2
+# Initialize OCR
+def load_ocr():
     """Load the OCR model."""
     from kiri_ocr import OCR
     return OCR(
         model_path="mrrtmob/kiri-ocr",
         det_method="db",
+        device="cpu",
         verbose=False
     )
+# Global OCR instance (loaded once)
+ocr = None
+def get_ocr():
+    """Get or create OCR instance."""
+    global ocr
+    if ocr is None:
+        ocr = load_ocr()
+    return ocr
 def process_image(image, mode="lines", show_boxes=True):
     """
+    Process an image and extract text.
     Args:
         image: Input image (PIL Image or numpy array)
     Returns:
         Tuple of (annotated_image, extracted_text, detailed_results)
     """
     if image is None:
         return None, "Please upload an image.", ""
     try:
+        ocr_engine = get_ocr()
         # Convert to numpy array if needed
         if isinstance(image, Image.Image):
         # Save temp file for processing
         import tempfile
         with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
             temp_path = f.name
         cv2.imwrite(temp_path, img_display)
         # Process document
+        results = ocr_engine.process_document(temp_path, mode=mode, verbose=False)
         # Clean up temp file
+        import os
         os.unlink(temp_path)
         if not results:
         return image, error_msg, ""
 def recognize_single_line(image):
     """
+    Recognize text from a single-line image (no detection).
     Args:
         image: Input image containing a single line of text
     Returns:
         Tuple of (text, confidence)
     """
     if image is None:
         return "Please upload an image.", ""
     try:
+        ocr_engine = get_ocr()
         # Convert to numpy array
         if isinstance(image, Image.Image):
         # Preprocess and recognize
         from kiri_ocr.model import preprocess_pil
         img_pil = Image.fromarray(img_gray)
+        img_tensor = preprocess_pil(ocr_engine.cfg, img_pil)
+        text, confidence = ocr_engine.recognize_region(img_tensor)
         return text, f"Confidence: {confidence*100:.1f}%"
         **Lightweight OCR for English and Khmer documents**
         Upload an image containing text and get the extracted text. Supports both English and Khmer languages.
         """
     )
                 Kiri OCR is a lightweight OCR library designed for **English** and **Khmer** documents.
                 ### Features
+                - 🚀 **Fast**: Optimized for quick text extraction
                 - 🎯 **Accurate**: Transformer-based architecture with CTC + Attention decoder
                 - 🌏 **Multilingual**: Supports English and Khmer text
                 - 📦 **Lightweight**: Easy to deploy and use
                 ### Technical Details
                 - **Model Architecture**: CNN backbone + Transformer encoder + CTC/Attention decoder
 # Launch
 if __name__ == "__main__":
+    demo.launch()