gopichandra commited on
Commit
b58c686
·
verified ·
1 Parent(s): 7ee62b8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pytesseract
3
+ import cv2
4
+ import numpy as np
5
+ from PIL import Image
6
+ import base64
7
+
8
+ # Path to Tesseract executable
9
+ pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
10
+
11
+ # Function to decode image
12
+ def decode_image(image):
13
+ # Convert PIL Image to OpenCV format
14
+ image = np.array(image)
15
+ return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
16
+
17
+ # Function to process and extract text
18
+ def extract_text(image):
19
+ try:
20
+ # Decode the image
21
+ decoded_image = decode_image(image)
22
+
23
+ # Convert to grayscale
24
+ gray_image = cv2.cvtColor(decoded_image, cv2.COLOR_BGR2GRAY)
25
+
26
+ # Preprocess the image (optional)
27
+ _, processed_image = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY)
28
+
29
+ # Extract text using Tesseract
30
+ text = pytesseract.image_to_string(processed_image, config="--oem 3 --psm 6")
31
+ return text
32
+ except Exception as e:
33
+ return f"Error: {str(e)}"
34
+
35
+ # Gradio Interface
36
+ interface = gr.Interface(
37
+ fn=extract_text,
38
+ inputs=gr.Image(type="pil", label="Upload Image"),
39
+ outputs=gr.Textbox(label="Extracted Text"),
40
+ title="Text Extraction App",
41
+ description="Upload an image of an invoice or document to extract text."
42
+ )
43
+
44
+ # Run the app
45
+ if __name__ == "__main__":
46
+ interface.launch()