nit454 commited on
Commit
8df9245
·
verified ·
1 Parent(s): 064d620

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -33
app.py CHANGED
@@ -1,51 +1,39 @@
1
- import gradio as gr
2
  import numpy as np
3
  import random
4
  from paddleocr import PaddleOCR
 
5
 
6
- # Initialize PaddleOCR reader once (English, CPU)
7
  ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)
8
 
9
- def ocr_paddle_with_random_scores(img, correct_text):
10
- if img is None:
11
- return "No image uploaded", "", ""
12
-
13
- img_array = np.array(img)
14
 
 
15
  try:
16
- results = ocr.ocr(img_array, cls=True)
 
17
 
 
18
  detected_text_lines = [line[1][0] for line in results]
19
  detected_text = "\n".join(detected_text_lines)
20
 
21
- accuracy = random.uniform(0.80, 0.85)
22
- pipeline_score = random.uniform(0.80, 0.85)
23
 
24
- accuracy_str = f"{accuracy:.2%}"
25
- pipeline_score_str = f"{pipeline_score:.2%}"
26
 
27
- return detected_text, accuracy_str, pipeline_score_str
 
 
28
 
29
  except Exception as e:
30
- error_msg = f"PaddleOCR Error: {str(e)}"
31
- return error_msg, "", ""
32
-
33
- with gr.Blocks() as demo:
34
- gr.Markdown("# PaddleOCR Demo with Lower Randomized Accuracy & Pipeline Scores")
35
-
36
- with gr.Row():
37
- img_input = gr.Image(type="pil", label="Upload Image")
38
- correct_text_input = gr.Textbox(label="Enter Correct Text (for display only)", lines=4)
39
-
40
- output_text = gr.Textbox(label="OCR Result", lines=10)
41
- accuracy_output = gr.Textbox(label="Accuracy (Randomized)", interactive=False)
42
- pipeline_output = gr.Textbox(label="Pipeline Integration Score (Randomized)", interactive=False)
43
 
44
- run_button = gr.Button("Run OCR")
45
- run_button.click(
46
- ocr_paddle_with_random_scores,
47
- inputs=[img_input, correct_text_input],
48
- outputs=[output_text, accuracy_output, pipeline_output]
49
- )
50
 
51
- demo.launch()
 
 
1
  import numpy as np
2
  import random
3
  from paddleocr import PaddleOCR
4
+ from difflib import SequenceMatcher
5
 
6
+ # Initialize PaddleOCR with English language (CPU)
7
  ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)
8
 
9
+ def calculate_similarity(text1, text2):
10
+ return SequenceMatcher(None, text1.lower().strip(), text2.lower().strip()).ratio()
 
 
 
11
 
12
+ def paddleocr_with_accuracy(image_path, correct_text):
13
  try:
14
+ # Perform OCR on the image file path
15
+ results = ocr.ocr(image_path, cls=True)
16
 
17
+ # Extract recognized text lines
18
  detected_text_lines = [line[1][0] for line in results]
19
  detected_text = "\n".join(detected_text_lines)
20
 
21
+ # Calculate accuracy score as similarity ratio
22
+ accuracy = calculate_similarity(detected_text, correct_text)
23
 
24
+ # Simulate pipeline integration score (here same as accuracy)
25
+ pipeline_score = accuracy
26
 
27
+ print("OCR Detected Text:\n", detected_text)
28
+ print(f"\nAccuracy: {accuracy:.2%}")
29
+ print(f"Pipeline Integration Score: {pipeline_score:.2%}")
30
 
31
  except Exception as e:
32
+ print(f"PaddleOCR Error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ # Example usage
35
+ if __name__ == "__main__":
36
+ image_file = "your_image.jpg" # replace with your image path
37
+ ground_truth_text = """Enter the exact expected text from the image here."""
 
 
38
 
39
+ paddleocr_with_accuracy(image_file, ground_truth_text)