File size: 1,685 Bytes
fc94d64
 
 
 
 
cb82368
 
 
 
fc94d64
cb82368
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc94d64
ce8aeda
cb82368
ce8aeda
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pytesseract
from PIL import Image
import re
import gradio as gr

# Ensure the Tesseract executable is in the right path (update path if necessary)
# This might not be needed in Hugging Face environment but is a common fix
# pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'

def extract_data(image):
    try:
        # Perform OCR to extract text from the image
        extracted_text = pytesseract.image_to_string(image)

        # Define regex patterns for tracking ID, address, customer name, and product name
        tracking_id_pattern = r"Tracking ID:\s*(\w+)"
        address_pattern = r"Address:\s*(.*)"
        customer_name_pattern = r"Customer Name:\s*(.*)"
        product_name_pattern = r"Product Name:\s*(.*)"

        # Extract specific data using regex
        tracking_id = re.search(tracking_id_pattern, extracted_text)
        address = re.search(address_pattern, extracted_text)
        customer_name = re.search(customer_name_pattern, extracted_text)
        product_name = re.search(product_name_pattern, extracted_text)

        # Prepare result dictionary
        result = {}
        if tracking_id:
            result["Tracking ID"] = tracking_id.group(1)
        if address:
            result["Address"] = address.group(1)
        if customer_name:
            result["Customer Name"] = customer_name.group(1)
        if product_name:
            result["Product Name"] = product_name.group(1)

        return result
    except Exception as e:
        return {"error": str(e)}

# Create the Gradio interface
interface = gr.Interface(fn=extract_data, inputs=gr.inputs.Image(type="pil"), outputs="json")

# Launch the app
interface.launch()