Spaces:

mrrtmob
/

kiri-ocr

Running

App Files Files Community

mrrtmob commited on 21 days ago

Commit

36c3ce2

verified ·

1 Parent(s): 901fda1

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -20

app.py CHANGED Viewed

@@ -10,34 +10,35 @@ import cv2
 import tempfile
 import os
-# Initialize OCR (lazy load)
-def load_ocr():
-    """Load the OCR model."""
     from kiri_ocr import OCR
-    print("Loading OCR model...")
     return OCR(
         model_path="mrrtmob/kiri-ocr",
         det_method="db",
         device="cpu",
         verbose=False
     )
-# Global OCR instance
-ocr = None
-def get_ocr():
-    """Get or create OCR instance."""
-    global ocr
-    if ocr is None:
-        ocr = load_ocr()
-    return ocr
-def process_document_stream(image):
     """
     Process document image with real-time character streaming.
     Args:
         image: Input image (PIL Image or numpy array)
     Yields:
         Tuple of (annotated_image, extracted_text)
@@ -47,7 +48,7 @@ def process_document_stream(image):
         return
     try:
-        ocr_engine = get_ocr()
         # Save temp file for processing (required by current API)
         # Convert PIL to BGR numpy array first if needed
@@ -117,16 +118,20 @@ def process_document_stream(image):
         yield image, f"Error: {str(e)}\n{traceback.format_exc()}"
-def recognize_line_stream(image):
     """
     Stream text from single line image.
     """
     if image is None:
         yield "Please upload an image."
         return
     try:
-        ocr_engine = get_ocr()
         # Save temp file
         if isinstance(image, Image.Image):
@@ -181,6 +186,14 @@ with gr.Blocks(title="Kiri OCR - Streaming Demo", css=css, theme=gr.themes.Soft(
                         sources=["upload", "clipboard", "webcam"]
                     )
                     with gr.Row():
                         doc_btn = gr.Button("⚡ Stream Text", variant="primary")
                         doc_stop = gr.Button("⏹️ Stop", variant="secondary", visible=False)
@@ -207,6 +220,15 @@ with gr.Blocks(title="Kiri OCR - Streaming Demo", css=css, theme=gr.themes.Soft(
                         type="pil",
                         sources=["upload", "clipboard"]
                     )
                     with gr.Row():
                         line_btn = gr.Button("⚡ Stream Recognize", variant="primary")
                         line_stop = gr.Button("⏹️ Stop", variant="secondary", visible=False)
@@ -231,7 +253,7 @@ with gr.Blocks(title="Kiri OCR - Streaming Demo", css=css, theme=gr.themes.Soft(
         outputs=[doc_btn, doc_stop]
     ).then(
         fn=process_document_stream,
-        inputs=[doc_input],
         outputs=[doc_output_img, doc_output_text]
     ).then(
         fn=reset_doc_buttons,
@@ -255,7 +277,7 @@ with gr.Blocks(title="Kiri OCR - Streaming Demo", css=css, theme=gr.themes.Soft(
         outputs=[line_btn, line_stop]
     ).then(
         fn=recognize_line_stream,
-        inputs=line_input,
         outputs=line_output_text
     ).then(
         fn=reset_line_buttons,
@@ -270,10 +292,17 @@ with gr.Blocks(title="Kiri OCR - Streaming Demo", css=css, theme=gr.themes.Soft(
     gr.Markdown(
         """
         [GitHub Repository](https://github.com/mrrtmob/kiri-ocr) | [Hugging Face Model](https://huggingface.co/mrrtmob/kiri-ocr)
         """
     )
 # Launch
 if __name__ == "__main__":
-    demo.queue().launch()

 import tempfile
 import os
+# Global OCR instances (one per decode method)
+ocr_instances = {}
+def load_ocr(decode_method="accurate"):
+    """Load the OCR model with specified decode method."""
     from kiri_ocr import OCR
+    print(f"Loading OCR model with decode_method={decode_method}...")
     return OCR(
         model_path="mrrtmob/kiri-ocr",
         det_method="db",
+        decode_method=decode_method,
         device="cpu",
         verbose=False
     )
+def get_ocr(decode_method="accurate"):
+    """Get or create OCR instance for the specified decode method."""
+    global ocr_instances
+    if decode_method not in ocr_instances:
+        ocr_instances[decode_method] = load_ocr(decode_method)
+    return ocr_instances[decode_method]
+def process_document_stream(image, decode_method):
     """
     Process document image with real-time character streaming.
     Args:
         image: Input image (PIL Image or numpy array)
+        decode_method: Decode method to use (fast, accurate, or beam)
     Yields:
         Tuple of (annotated_image, extracted_text)
         return
     try:
+        ocr_engine = get_ocr(decode_method)
         # Save temp file for processing (required by current API)
         # Convert PIL to BGR numpy array first if needed
         yield image, f"Error: {str(e)}\n{traceback.format_exc()}"
+def recognize_line_stream(image, decode_method):
     """
     Stream text from single line image.
+    Args:
+        image: Input image
+        decode_method: Decode method to use (fast, accurate, or beam)
     """
     if image is None:
         yield "Please upload an image."
         return
     try:
+        ocr_engine = get_ocr(decode_method)
         # Save temp file
         if isinstance(image, Image.Image):
                         sources=["upload", "clipboard", "webcam"]
                     )
+                    # Decode method selector
+                    doc_decode_method = gr.Radio(
+                        choices=["fast", "accurate", "beam"],
+                        value="accurate",
+                        label="Decode Method",
+                        info="Fast: Fastest, lower accuracy | Accurate: Balanced | Beam: Slowest, highest accuracy"
+                    )
                     with gr.Row():
                         doc_btn = gr.Button("⚡ Stream Text", variant="primary")
                         doc_stop = gr.Button("⏹️ Stop", variant="secondary", visible=False)
                         type="pil",
                         sources=["upload", "clipboard"]
                     )
+                    # Decode method selector
+                    line_decode_method = gr.Radio(
+                        choices=["fast", "accurate", "beam"],
+                        value="accurate",
+                        label="Decode Method",
+                        info="Fast: Fastest, lower accuracy | Accurate: Balanced | Beam: Slowest, highest accuracy"
+                    )
                     with gr.Row():
                         line_btn = gr.Button("⚡ Stream Recognize", variant="primary")
                         line_stop = gr.Button("⏹️ Stop", variant="secondary", visible=False)
         outputs=[doc_btn, doc_stop]
     ).then(
         fn=process_document_stream,
+        inputs=[doc_input, doc_decode_method],
         outputs=[doc_output_img, doc_output_text]
     ).then(
         fn=reset_doc_buttons,
         outputs=[line_btn, line_stop]
     ).then(
         fn=recognize_line_stream,
+        inputs=[line_input, line_decode_method],
         outputs=line_output_text
     ).then(
         fn=reset_line_buttons,
     gr.Markdown(
         """
+        ---
+        ### 🔍 Decode Methods:
+        - **Fast**: Greedy decoding - fastest speed, good for quick previews
+        - **Accurate**: Default balanced mode - good speed and accuracy
+        - **Beam**: Beam search decoding - slowest but highest accuracy
+        ---
         [GitHub Repository](https://github.com/mrrtmob/kiri-ocr) | [Hugging Face Model](https://huggingface.co/mrrtmob/kiri-ocr)
         """
     )
 # Launch
 if __name__ == "__main__":
+    demo.queue().launch()