Spaces:

bonrix
/

text-detection-pytesseract

Runtime error

bonrix commited on May 25, 2023

Commit

5d151a0

1 Parent(s): 782664f

Upload app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import cv2
+import numpy as np
+import pytesseract
+import time
+# set the path to the Tesseract executable
+pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  #Tresseract path
+def text(file):
+    start_time = time.time()
+    # load the image and convert it to grayscale
+    image = cv2.imdecode(np.frombuffer(file.read(), np.uint8), cv2.IMREAD_UNCHANGED)
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    # apply thresholding to preprocess the image
+    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
+    # perform OCR on the thresholded image
+    text = pytesseract.image_to_string(thresh, lang='eng')
+    # split the OCR output into a list of lines
+    lines = [line.strip() for line in text.split('\n') if line.strip()]
+    end_time = time.time()
+    time_taken = end_time - start_time
+    return {
+        'text': lines,
+        'time_taken': time_taken
+    }
+file_input = gr.inputs.File(label="Upload Image")
+text_output = gr.outputs.JSON(label="OCR Result")
+iface = gr.Interface(fn=text, inputs=file_input, outputs=text_output,  title="Text Detection Using Pytesseract",
+    description="Upload an image to detect text.")
+iface.launch()