Spaces:

ARC-NECT
/

OCR_tester

Sleeping

App Files Files Community

Daniel Jarvis commited on May 26, 2025

Commit

807fdd0

1 Parent(s): bd837a7

Add application file

Browse files

Files changed (3) hide show

app.py +207 -0
packages.txt +1 -0
requirements.txt +1 -0

app.py ADDED Viewed

	@@ -0,0 +1,207 @@

+# Method 1: EasyOCR (Recommended - Fast & Lightweight)
+import os
+os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
+#os.environ["OMP_NUM_THREADS"] = "1"  # Optional: limit threads
+import gradio as gr
+import datetime
+import easyocr
+import numpy as np
+from PIL import Image
+def ocr_easyocr(image):
+    """EasyOCR method - supports 80+ languages, very efficient"""
+    try:
+        # Initialize reader (cache it for better performance)
+        if not hasattr(ocr_easyocr, "reader"):
+            ocr_easyocr.reader = easyocr.Reader(['en'], gpu=False)
+        # Convert PIL to numpy array
+        img_array = np.array(image)
+        # Extract text
+        results = ocr_easyocr.reader.readtext(img_array)
+        # Format results
+        extracted_text = []
+        for (bbox, text, confidence) in results:
+            extracted_text.append(f"{text} (confidence: {confidence:.2f})")
+        return "\n".join(extracted_text) if extracted_text else "No text detected"
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Create Gradio app for EasyOCR
+def create_easyocr_app():
+    with gr.Blocks(title="EasyOCR Text Extractor") as app:
+        gr.Markdown("# EasyOCR Text Extraction")
+        gr.Markdown("Upload an image to extract text using EasyOCR")
+        with gr.Row():
+            image_input = gr.Image(type="pil", label="Upload Image")
+            text_output = gr.Textbox(label="Extracted Text", lines=10)
+        extract_btn = gr.Button("Extract Text", variant="primary")
+        extract_btn.click(ocr_easyocr, inputs=image_input, outputs=text_output)
+        # Auto-extract on image upload
+        image_input.change(ocr_easyocr, inputs=image_input, outputs=text_output)
+    return app
+# Method 2: Tesseract OCR (Classic & Reliable)
+import pytesseract
+from PIL import Image
+def ocr_tesseract(image):
+    """Tesseract OCR method - classic and reliable"""
+    try:
+        # Basic OCR
+        text = pytesseract.image_to_string(image)
+        # Get detailed data with confidence scores
+        data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
+        # Filter out low confidence text
+        filtered_text = []
+        for i, conf in enumerate(data['conf']):
+            if int(conf) > 30:  # confidence threshold
+                word = data['text'][i].strip()
+                if word:
+                    filtered_text.append(f"{word} ({conf}% confidence)")
+        result = text.strip() if text.strip() else "No text detected"
+        detailed = "\n".join(filtered_text) if filtered_text else "No high-confidence text"
+        return f"Text:\n{result}\n\nDetailed (>30% confidence):\n{detailed}"
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Method 3: TrOCR (Hugging Face Transformers)
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+import torch
+def ocr_trocr(image):
+    """TrOCR method - transformer-based OCR"""
+    try:
+        # Initialize models (cache them)
+        if not hasattr(ocr_trocr, "processor"):
+            ocr_trocr.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
+            ocr_trocr.model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
+        # Process image
+        pixel_values = ocr_trocr.processor(image, return_tensors="pt").pixel_values
+        generated_ids = ocr_trocr.model.generate(pixel_values)
+        generated_text = ocr_trocr.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return generated_text if generated_text.strip() else "No text detected"
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Method 4: PaddleOCR (Lightweight & Fast)
+from paddleocr import PaddleOCR
+import cv2
+def ocr_paddle(image):
+    """PaddleOCR method - lightweight and fast"""
+    try:
+        # Initialize PaddleOCR (cache it)
+        if not hasattr(ocr_paddle, "ocr"):
+            ocr_paddle.ocr = PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
+        # Convert PIL to OpenCV format
+        img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+        # Extract text
+        results = ocr_paddle.ocr.ocr(img_cv, cls=True)
+        if results and results[0]:
+            extracted_text = []
+            for line in results[0]:
+                text = line[1][0]
+                confidence = line[1][1]
+                extracted_text.append(f"{text} (confidence: {confidence:.2f})")
+            return "\n".join(extracted_text)
+        else:
+            return "No text detected"
+    except Exception as e:
+        return f"Error: {str(e)}"
+### Test gradio UI
+# Complete Multi-Method Gradio App
+def create_multi_ocr_app():
+    """Complete app with multiple OCR methods"""
+    def process_with_method(image, method):
+        start_time = datetime.datetime.now()
+        if image is None:
+            return "Please upload an image","00:00:00"
+        if method == "EasyOCR":
+            results = ocr_easyocr(image)
+        elif method == "Tesseract":
+            results = ocr_tesseract(image)
+        elif method == "TrOCR":
+            results =ocr_trocr(image)
+        elif method == "PaddleOCR":
+            results = ocr_paddle(image)
+        else:
+            results = "Invalid method selected"
+        try:
+            elapsed_time = datetime.datetime.now() - start_time
+        except Exception as e:
+            elapsed_time = datetime.timedelta(seconds=0)
+            print(f"Error calculating elapsed time: {str(e)}")
+        return results, str(elapsed_time)
+    with gr.Blocks(title="Multi-OCR Comparator") as app:
+        gr.Markdown("# Multi-Method OCR Comparison")
+        gr.Markdown("Compare different OCR methods on your images")
+        with gr.Row():
+            with gr.Column():
+                image_input = gr.Image(type="pil", label="Upload Image")
+                method_dropdown = gr.Dropdown(
+                    choices=["EasyOCR", "Tesseract", "TrOCR", "PaddleOCR"],
+                    value="EasyOCR",
+                    label="OCR Method"
+                )
+                extract_btn = gr.Button("Extract Text", variant="primary")
+            with gr.Column():
+                text_output = gr.Textbox(label="Extracted Text", lines=15)
+                elapsed_time_output = gr.Textbox(label="Elapsed Time", lines=1, value="00:00:00")
+        # Process on button click
+        extract_btn.click(
+            process_with_method,
+            inputs=[image_input, method_dropdown],
+            outputs=[text_output,elapsed_time_output]
+        )
+        # Auto-process on image change
+        image_input.change(
+            process_with_method,
+            inputs=[image_input, method_dropdown],
+            outputs=[text_output,elapsed_time_output]
+        )
+    return app
+# Launch instructions
+if __name__ == "__main__":
+    app = create_multi_ocr_app()
+    app.launch()

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ -e "tesseract-ocr\ntesseract-ocr-eng\nlibtesseract-dev"

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ -e "gradio\neasyocr\nPillow\nnumpy"