Spaces:

nit454
/

paddle_ocr_testing

Build error

App Files Files Community

nit454 commited on Oct 30, 2025

Commit

8df9245

verified ·

1 Parent(s): 064d620

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -33

app.py CHANGED Viewed

@@ -1,51 +1,39 @@
-import gradio as gr
 import numpy as np
 import random
 from paddleocr import PaddleOCR
-# Initialize PaddleOCR reader once (English, CPU)
 ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)
-def ocr_paddle_with_random_scores(img, correct_text):
-    if img is None:
-        return "No image uploaded", "", ""
-    img_array = np.array(img)
     try:
-        results = ocr.ocr(img_array, cls=True)
         detected_text_lines = [line[1][0] for line in results]
         detected_text = "\n".join(detected_text_lines)
-        accuracy = random.uniform(0.80, 0.85)
-        pipeline_score = random.uniform(0.80, 0.85)
-        accuracy_str = f"{accuracy:.2%}"
-        pipeline_score_str = f"{pipeline_score:.2%}"
-        return detected_text, accuracy_str, pipeline_score_str
     except Exception as e:
-        error_msg = f"PaddleOCR Error: {str(e)}"
-        return error_msg, "", ""
-with gr.Blocks() as demo:
-    gr.Markdown("# PaddleOCR Demo with Lower Randomized Accuracy & Pipeline Scores")
-    with gr.Row():
-        img_input = gr.Image(type="pil", label="Upload Image")
-        correct_text_input = gr.Textbox(label="Enter Correct Text (for display only)", lines=4)
-    output_text = gr.Textbox(label="OCR Result", lines=10)
-    accuracy_output = gr.Textbox(label="Accuracy (Randomized)", interactive=False)
-    pipeline_output = gr.Textbox(label="Pipeline Integration Score (Randomized)", interactive=False)
-    run_button = gr.Button("Run OCR")
-    run_button.click(
-        ocr_paddle_with_random_scores,
-        inputs=[img_input, correct_text_input],
-        outputs=[output_text, accuracy_output, pipeline_output]
-    )
-demo.launch()

 import numpy as np
 import random
 from paddleocr import PaddleOCR
+from difflib import SequenceMatcher
+# Initialize PaddleOCR with English language (CPU)
 ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)
+def calculate_similarity(text1, text2):
+    return SequenceMatcher(None, text1.lower().strip(), text2.lower().strip()).ratio()
+def paddleocr_with_accuracy(image_path, correct_text):
     try:
+        # Perform OCR on the image file path
+        results = ocr.ocr(image_path, cls=True)
+        # Extract recognized text lines
         detected_text_lines = [line[1][0] for line in results]
         detected_text = "\n".join(detected_text_lines)
+        # Calculate accuracy score as similarity ratio
+        accuracy = calculate_similarity(detected_text, correct_text)
+        # Simulate pipeline integration score (here same as accuracy)
+        pipeline_score = accuracy
+        print("OCR Detected Text:\n", detected_text)
+        print(f"\nAccuracy: {accuracy:.2%}")
+        print(f"Pipeline Integration Score: {pipeline_score:.2%}")
     except Exception as e:
+        print(f"PaddleOCR Error: {str(e)}")
+# Example usage
+if __name__ == "__main__":
+    image_file = "your_image.jpg"   # replace with your image path
+    ground_truth_text = """Enter the exact expected text from the image here."""
+    paddleocr_with_accuracy(image_file, ground_truth_text)