nit454 commited on
Commit
54cff7a
·
verified ·
1 Parent(s): 1946b25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -13
app.py CHANGED
@@ -1,26 +1,23 @@
1
  import gradio as gr
2
  from PIL import Image
3
  import easyocr
4
- import cv2
5
  import numpy as np
6
- import paddleocr
7
  import pytesseract
8
 
9
  # Initialize OCR models
10
  easyocr_reader = easyocr.Reader(['en'])
11
- paddleocr_reader = paddleocr.OCR()
12
- # Tesseract requires only function call
13
 
14
- # Simulation of metrics, as per user scenario
15
  def get_metrics(model):
16
- # Simulated fixed scores according to user needs
17
- base = 0.95
18
  if model == "EasyOCR":
19
- return {"Accuracy": 0.95, "Precision": 0.94, "Pipeline": "Easy Integration (90%)"}
20
  elif model == "PaddleOCR":
21
- return {"Accuracy": 0.90, "Precision": 0.89, "Pipeline": "Moderate Integration (85%)"}
22
  elif model == "Tesseract":
23
- return {"Accuracy": 0.85, "Precision": 0.83, "Pipeline": "Manual Integration (75%)"}
24
  else:
25
  return {}
26
 
@@ -31,11 +28,11 @@ def ocr_all(img, ground_truth):
31
  metrics_easy = get_metrics("EasyOCR")
32
 
33
  # PaddleOCR
34
- result_paddle, _ = paddleocr_reader.ocr(img, cls=True)
35
  try:
36
  text_paddle = " ".join([line[1][0] for line in result_paddle])
37
  except Exception:
38
- text_paddle = "Error"
39
  metrics_paddle = get_metrics("PaddleOCR")
40
 
41
  # Tesseract
@@ -61,4 +58,4 @@ with gr.Blocks() as demo:
61
 
62
  btn.click(ocr_all, inputs=[img_in, txt_in], outputs=[out_easy, out_paddle, out_tess])
63
 
64
- demo.launch()
 
1
  import gradio as gr
2
  from PIL import Image
3
  import easyocr
 
4
  import numpy as np
5
+ from paddleocr import PaddleOCR
6
  import pytesseract
7
 
8
  # Initialize OCR models
9
  easyocr_reader = easyocr.Reader(['en'])
10
+ paddleocr_reader = PaddleOCR()
11
+ # Tesseract uses pytesseract directly
12
 
 
13
  def get_metrics(model):
14
+ # Simulated fixed scores according to user specs
 
15
  if model == "EasyOCR":
16
+ return {"Accuracy": "95%", "Precision": "94%", "Pipeline": "Easy Integration (90%)"}
17
  elif model == "PaddleOCR":
18
+ return {"Accuracy": "90%", "Precision": "89%", "Pipeline": "Moderate Integration (85%)"}
19
  elif model == "Tesseract":
20
+ return {"Accuracy": "85%", "Precision": "83%", "Pipeline": "Manual Integration (75%)"}
21
  else:
22
  return {}
23
 
 
28
  metrics_easy = get_metrics("EasyOCR")
29
 
30
  # PaddleOCR
31
+ result_paddle = paddleocr_reader.ocr(np.array(img), cls=True)
32
  try:
33
  text_paddle = " ".join([line[1][0] for line in result_paddle])
34
  except Exception:
35
+ text_paddle = "Error during PaddleOCR"
36
  metrics_paddle = get_metrics("PaddleOCR")
37
 
38
  # Tesseract
 
58
 
59
  btn.click(ocr_all, inputs=[img_in, txt_in], outputs=[out_easy, out_paddle, out_tess])
60
 
61
+ demo.launch()