Spaces:

Zoe911
/

OCR-C

Sleeping

App Files Files Community

OCR-C / app.py

Zoe911

Update app.py

916034b verified about 1 year ago

raw

history blame contribute delete

3.84 kB

	import gradio as gr
	from paddleocr import PaddleOCR
	from PIL import Image
	import numpy as np
	import re

	# Initialize OCR model
	ocr = PaddleOCR(use_angle_cls=True, lang="en")

	def ocr_recognition(image):
	try:
	# Handle Gradio file upload from client (TempFileWrapper)
	if hasattr(image, "read"): # This is a file-like object
	image = Image.open(image)

	# If it's still np.ndarray (browser upload), convert
	if isinstance(image, np.ndarray):
	image = Image.fromarray(image)

	if not isinstance(image, Image.Image):
	return "Invalid image format"

	image_np = np.array(image)
	result = ocr.ocr(image_np, cls=True)

	if not result or len(result) == 0 or result[0] is None:
	return "No text recognized"

	texts = [line[1][0] for line in result[0] if line]
	if not texts:
	return "No text recognized"

	structured_info = parse_text_to_structure(texts)
	return structured_info

	except Exception as e:
	import traceback
	return f"Error processing image:\n{traceback.format_exc()}"


	def parse_text_to_structure(texts):
	# Initialize structured fields
	structured_data = {
	"Event": "",
	"Location": "",
	"Start Time": "",
	"End Time": "",
	"Duration": "",
	"Date": ""
	}

	# Merge all text
	full_text = " ".join(texts)

	# Try to match each field using regex
	# These patterns should be adjusted according to your actual document format
	event_pattern = re.search(r"(?:Event\|Title):\s(.+?)(?=\s(?:Location\|Venue\|Date\|Start\|End\|Duration)\|$)", full_text, re.I)
	if event_pattern:
	structured_data["Event"] = event_pattern.group(1).strip()

	location_pattern = re.search(r"(?:Location\|Venue\|Place):\s(.+?)(?=\s(?:Event\|Title\|Date\|Start\|End\|Duration)\|$)", full_text, re.I)
	if location_pattern:
	structured_data["Location"] = location_pattern.group(1).strip()

	date_pattern = re.search(r"(?:Date):\s*(\d{1,2}[\/\-\.]\d{1,2}[\/\-\.]\d{2,4}\|\w+ \d{1,2},? \d{4})", full_text, re.I)
	if date_pattern:
	structured_data["Date"] = date_pattern.group(1).strip()

	start_time_pattern = re.search(r"(?:Start\|Begin\|Starting)(?:\sTime)?:\s(\d{1,2}:\d{2}(?:\s*[AP]M)?)", full_text, re.I)
	if start_time_pattern:
	structured_data["Start Time"] = start_time_pattern.group(1).strip()

	end_time_pattern = re.search(r"(?:End\|Finish\|Ending)(?:\sTime)?:\s(\d{1,2}:\d{2}(?:\s*[AP]M)?)", full_text, re.I)
	if end_time_pattern:
	structured_data["End Time"] = end_time_pattern.group(1).strip()

	duration_pattern = re.search(r"(?:Duration\|Length):\s(\d+\s(?:hours\|hrs\|h\|minutes\|mins\|m))", full_text, re.I)
	if duration_pattern:
	structured_data["Duration"] = duration_pattern.group(1).strip()

	# If no explicit duration is found but we have start and end times, we could calculate it
	if structured_data["Duration"] == "" and structured_data["Start Time"] and structured_data["End Time"]:
	# More complex time calculation logic would be needed here
	pass

	# Format output
	output = "\n".join([f"{key}: {value}" for key, value in structured_data.items() if value])

	# If no structured information was extracted, return the original text
	if not output:
	return "\n".join(texts)

	return output

	# Create Gradio interface
	interface = gr.Interface(
	fn=ocr_recognition,
	inputs=gr.Image(type="pil"), # Explicitly specify using PIL
	outputs="text",
	title="Structured OCR Recognition",
	description="Upload an image for text recognition and extraction of structured information"
	)

	# Launch the service
	if __name__ == "__main__":
	interface.launch()