OCR-C / app.py
Zoe911's picture
Update app.py
916034b verified
import gradio as gr
from paddleocr import PaddleOCR
from PIL import Image
import numpy as np
import re
# Initialize OCR model
ocr = PaddleOCR(use_angle_cls=True, lang="en")
def ocr_recognition(image):
try:
# Handle Gradio file upload from client (TempFileWrapper)
if hasattr(image, "read"): # This is a file-like object
image = Image.open(image)
# If it's still np.ndarray (browser upload), convert
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
if not isinstance(image, Image.Image):
return "Invalid image format"
image_np = np.array(image)
result = ocr.ocr(image_np, cls=True)
if not result or len(result) == 0 or result[0] is None:
return "No text recognized"
texts = [line[1][0] for line in result[0] if line]
if not texts:
return "No text recognized"
structured_info = parse_text_to_structure(texts)
return structured_info
except Exception as e:
import traceback
return f"Error processing image:\n{traceback.format_exc()}"
def parse_text_to_structure(texts):
# Initialize structured fields
structured_data = {
"Event": "",
"Location": "",
"Start Time": "",
"End Time": "",
"Duration": "",
"Date": ""
}
# Merge all text
full_text = " ".join(texts)
# Try to match each field using regex
# These patterns should be adjusted according to your actual document format
event_pattern = re.search(r"(?:Event|Title):\s*(.+?)(?=\s*(?:Location|Venue|Date|Start|End|Duration)|$)", full_text, re.I)
if event_pattern:
structured_data["Event"] = event_pattern.group(1).strip()
location_pattern = re.search(r"(?:Location|Venue|Place):\s*(.+?)(?=\s*(?:Event|Title|Date|Start|End|Duration)|$)", full_text, re.I)
if location_pattern:
structured_data["Location"] = location_pattern.group(1).strip()
date_pattern = re.search(r"(?:Date):\s*(\d{1,2}[\/\-\.]\d{1,2}[\/\-\.]\d{2,4}|\w+ \d{1,2},? \d{4})", full_text, re.I)
if date_pattern:
structured_data["Date"] = date_pattern.group(1).strip()
start_time_pattern = re.search(r"(?:Start|Begin|Starting)(?:\s*Time)?:\s*(\d{1,2}:\d{2}(?:\s*[AP]M)?)", full_text, re.I)
if start_time_pattern:
structured_data["Start Time"] = start_time_pattern.group(1).strip()
end_time_pattern = re.search(r"(?:End|Finish|Ending)(?:\s*Time)?:\s*(\d{1,2}:\d{2}(?:\s*[AP]M)?)", full_text, re.I)
if end_time_pattern:
structured_data["End Time"] = end_time_pattern.group(1).strip()
duration_pattern = re.search(r"(?:Duration|Length):\s*(\d+\s*(?:hours|hrs|h|minutes|mins|m))", full_text, re.I)
if duration_pattern:
structured_data["Duration"] = duration_pattern.group(1).strip()
# If no explicit duration is found but we have start and end times, we could calculate it
if structured_data["Duration"] == "" and structured_data["Start Time"] and structured_data["End Time"]:
# More complex time calculation logic would be needed here
pass
# Format output
output = "\n".join([f"{key}: {value}" for key, value in structured_data.items() if value])
# If no structured information was extracted, return the original text
if not output:
return "\n".join(texts)
return output
# Create Gradio interface
interface = gr.Interface(
fn=ocr_recognition,
inputs=gr.Image(type="pil"), # Explicitly specify using PIL
outputs="text",
title="Structured OCR Recognition",
description="Upload an image for text recognition and extraction of structured information"
)
# Launch the service
if __name__ == "__main__":
interface.launch()