bonrix commited on
Commit
5d151a0
·
1 Parent(s): 782664f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ import pytesseract
5
+ import time
6
+
7
+ # set the path to the Tesseract executable
8
+ pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' #Tresseract path
9
+
10
+ def text(file):
11
+
12
+
13
+ start_time = time.time()
14
+
15
+
16
+ # load the image and convert it to grayscale
17
+ image = cv2.imdecode(np.frombuffer(file.read(), np.uint8), cv2.IMREAD_UNCHANGED)
18
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
19
+
20
+
21
+
22
+ # apply thresholding to preprocess the image
23
+ thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
24
+
25
+ # perform OCR on the thresholded image
26
+ text = pytesseract.image_to_string(thresh, lang='eng')
27
+
28
+ # split the OCR output into a list of lines
29
+ lines = [line.strip() for line in text.split('\n') if line.strip()]
30
+
31
+ end_time = time.time()
32
+ time_taken = end_time - start_time
33
+
34
+ return {
35
+ 'text': lines,
36
+ 'time_taken': time_taken
37
+ }
38
+
39
+ file_input = gr.inputs.File(label="Upload Image")
40
+ text_output = gr.outputs.JSON(label="OCR Result")
41
+
42
+ iface = gr.Interface(fn=text, inputs=file_input, outputs=text_output, title="Text Detection Using Pytesseract",
43
+ description="Upload an image to detect text.")
44
+ iface.launch()
45
+