Spaces:

gopichandra
/

Text_extraction

Runtime error

gopichandra commited on Dec 16, 2024

Commit

b58c686

verified ·

1 Parent(s): 7ee62b8

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import pytesseract
+import cv2
+import numpy as np
+from PIL import Image
+import base64
+# Path to Tesseract executable
+pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
+# Function to decode image
+def decode_image(image):
+    # Convert PIL Image to OpenCV format
+    image = np.array(image)
+    return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+# Function to process and extract text
+def extract_text(image):
+    try:
+        # Decode the image
+        decoded_image = decode_image(image)
+        # Convert to grayscale
+        gray_image = cv2.cvtColor(decoded_image, cv2.COLOR_BGR2GRAY)
+        # Preprocess the image (optional)
+        _, processed_image = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY)
+        # Extract text using Tesseract
+        text = pytesseract.image_to_string(processed_image, config="--oem 3 --psm 6")
+        return text
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Gradio Interface
+interface = gr.Interface(
+    fn=extract_text,
+    inputs=gr.Image(type="pil", label="Upload Image"),
+    outputs=gr.Textbox(label="Extracted Text"),
+    title="Text Extraction App",
+    description="Upload an image of an invoice or document to extract text."
+)
+# Run the app
+if __name__ == "__main__":
+    interface.launch()