import pytesseract from PIL import Image import re import gradio as gr # Ensure the Tesseract executable is in the right path (update path if necessary) # This might not be needed in Hugging Face environment but is a common fix # pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract' def extract_data(image): try: # Perform OCR to extract text from the image extracted_text = pytesseract.image_to_string(image) # Define regex patterns for tracking ID, address, customer name, and product name tracking_id_pattern = r"Tracking ID:\s*(\w+)" address_pattern = r"Address:\s*(.*)" customer_name_pattern = r"Customer Name:\s*(.*)" product_name_pattern = r"Product Name:\s*(.*)" # Extract specific data using regex tracking_id = re.search(tracking_id_pattern, extracted_text) address = re.search(address_pattern, extracted_text) customer_name = re.search(customer_name_pattern, extracted_text) product_name = re.search(product_name_pattern, extracted_text) # Prepare result dictionary result = {} if tracking_id: result["Tracking ID"] = tracking_id.group(1) if address: result["Address"] = address.group(1) if customer_name: result["Customer Name"] = customer_name.group(1) if product_name: result["Product Name"] = product_name.group(1) return result except Exception as e: return {"error": str(e)} # Create the Gradio interface interface = gr.Interface(fn=extract_data, inputs=gr.inputs.Image(type="pil"), outputs="json") # Launch the app interface.launch()