Spaces:
Runtime error
Runtime error
File size: 1,685 Bytes
fc94d64 cb82368 fc94d64 cb82368 fc94d64 ce8aeda cb82368 ce8aeda | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | import pytesseract
from PIL import Image
import re
import gradio as gr
# Ensure the Tesseract executable is in the right path (update path if necessary)
# This might not be needed in Hugging Face environment but is a common fix
# pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
def extract_data(image):
try:
# Perform OCR to extract text from the image
extracted_text = pytesseract.image_to_string(image)
# Define regex patterns for tracking ID, address, customer name, and product name
tracking_id_pattern = r"Tracking ID:\s*(\w+)"
address_pattern = r"Address:\s*(.*)"
customer_name_pattern = r"Customer Name:\s*(.*)"
product_name_pattern = r"Product Name:\s*(.*)"
# Extract specific data using regex
tracking_id = re.search(tracking_id_pattern, extracted_text)
address = re.search(address_pattern, extracted_text)
customer_name = re.search(customer_name_pattern, extracted_text)
product_name = re.search(product_name_pattern, extracted_text)
# Prepare result dictionary
result = {}
if tracking_id:
result["Tracking ID"] = tracking_id.group(1)
if address:
result["Address"] = address.group(1)
if customer_name:
result["Customer Name"] = customer_name.group(1)
if product_name:
result["Product Name"] = product_name.group(1)
return result
except Exception as e:
return {"error": str(e)}
# Create the Gradio interface
interface = gr.Interface(fn=extract_data, inputs=gr.inputs.Image(type="pil"), outputs="json")
# Launch the app
interface.launch()
|