Spaces:

sairamtelagamsetti
/

OCR-Extraction

Runtime error

OCR-Extraction / app.py

Update app.py

cb82368 verified about 1 year ago

1.69 kB

	import pytesseract
	from PIL import Image
	import re
	import gradio as gr

	# Ensure the Tesseract executable is in the right path (update path if necessary)
	# This might not be needed in Hugging Face environment but is a common fix
	# pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'

	def extract_data(image):
	try:
	# Perform OCR to extract text from the image
	extracted_text = pytesseract.image_to_string(image)

	# Define regex patterns for tracking ID, address, customer name, and product name
	tracking_id_pattern = r"Tracking ID:\s*(\w+)"
	address_pattern = r"Address:\s(.)"
	customer_name_pattern = r"Customer Name:\s(.)"
	product_name_pattern = r"Product Name:\s(.)"

	# Extract specific data using regex
	tracking_id = re.search(tracking_id_pattern, extracted_text)
	address = re.search(address_pattern, extracted_text)
	customer_name = re.search(customer_name_pattern, extracted_text)
	product_name = re.search(product_name_pattern, extracted_text)

	# Prepare result dictionary
	result = {}
	if tracking_id:
	result["Tracking ID"] = tracking_id.group(1)
	if address:
	result["Address"] = address.group(1)
	if customer_name:
	result["Customer Name"] = customer_name.group(1)
	if product_name:
	result["Product Name"] = product_name.group(1)

	return result
	except Exception as e:
	return {"error": str(e)}

	# Create the Gradio interface
	interface = gr.Interface(fn=extract_data, inputs=gr.inputs.Image(type="pil"), outputs="json")

	# Launch the app
	interface.launch()