import gradio as gr from paddleocr import PaddleOCR from PIL import Image import numpy as np import re # Initialize OCR model ocr = PaddleOCR(use_angle_cls=True, lang="en") def ocr_recognition(image): try: # Handle Gradio file upload from client (TempFileWrapper) if hasattr(image, "read"): # This is a file-like object image = Image.open(image) # If it's still np.ndarray (browser upload), convert if isinstance(image, np.ndarray): image = Image.fromarray(image) if not isinstance(image, Image.Image): return "Invalid image format" image_np = np.array(image) result = ocr.ocr(image_np, cls=True) if not result or len(result) == 0 or result[0] is None: return "No text recognized" texts = [line[1][0] for line in result[0] if line] if not texts: return "No text recognized" structured_info = parse_text_to_structure(texts) return structured_info except Exception as e: import traceback return f"Error processing image:\n{traceback.format_exc()}" def parse_text_to_structure(texts): # Initialize structured fields structured_data = { "Event": "", "Location": "", "Start Time": "", "End Time": "", "Duration": "", "Date": "" } # Merge all text full_text = " ".join(texts) # Try to match each field using regex # These patterns should be adjusted according to your actual document format event_pattern = re.search(r"(?:Event|Title):\s*(.+?)(?=\s*(?:Location|Venue|Date|Start|End|Duration)|$)", full_text, re.I) if event_pattern: structured_data["Event"] = event_pattern.group(1).strip() location_pattern = re.search(r"(?:Location|Venue|Place):\s*(.+?)(?=\s*(?:Event|Title|Date|Start|End|Duration)|$)", full_text, re.I) if location_pattern: structured_data["Location"] = location_pattern.group(1).strip() date_pattern = re.search(r"(?:Date):\s*(\d{1,2}[\/\-\.]\d{1,2}[\/\-\.]\d{2,4}|\w+ \d{1,2},? \d{4})", full_text, re.I) if date_pattern: structured_data["Date"] = date_pattern.group(1).strip() start_time_pattern = re.search(r"(?:Start|Begin|Starting)(?:\s*Time)?:\s*(\d{1,2}:\d{2}(?:\s*[AP]M)?)", full_text, re.I) if start_time_pattern: structured_data["Start Time"] = start_time_pattern.group(1).strip() end_time_pattern = re.search(r"(?:End|Finish|Ending)(?:\s*Time)?:\s*(\d{1,2}:\d{2}(?:\s*[AP]M)?)", full_text, re.I) if end_time_pattern: structured_data["End Time"] = end_time_pattern.group(1).strip() duration_pattern = re.search(r"(?:Duration|Length):\s*(\d+\s*(?:hours|hrs|h|minutes|mins|m))", full_text, re.I) if duration_pattern: structured_data["Duration"] = duration_pattern.group(1).strip() # If no explicit duration is found but we have start and end times, we could calculate it if structured_data["Duration"] == "" and structured_data["Start Time"] and structured_data["End Time"]: # More complex time calculation logic would be needed here pass # Format output output = "\n".join([f"{key}: {value}" for key, value in structured_data.items() if value]) # If no structured information was extracted, return the original text if not output: return "\n".join(texts) return output # Create Gradio interface interface = gr.Interface( fn=ocr_recognition, inputs=gr.Image(type="pil"), # Explicitly specify using PIL outputs="text", title="Structured OCR Recognition", description="Upload an image for text recognition and extraction of structured information" ) # Launch the service if __name__ == "__main__": interface.launch()