import gradio as gr
from paddleocr import PaddleOCR
from PIL import Image
import numpy as np
import re

# Initialize OCR model
ocr = PaddleOCR(use_angle_cls=True, lang="en")

def ocr_recognition(image):
    try:
        # Handle Gradio file upload from client (TempFileWrapper)
        if hasattr(image, "read"):  # This is a file-like object
            image = Image.open(image)

        # If it's still np.ndarray (browser upload), convert
        if isinstance(image, np.ndarray):
            image = Image.fromarray(image)

        if not isinstance(image, Image.Image):
            return "Invalid image format"

        image_np = np.array(image)
        result = ocr.ocr(image_np, cls=True)

        if not result or len(result) == 0 or result[0] is None:
            return "No text recognized"

        texts = [line[1][0] for line in result[0] if line]
        if not texts:
            return "No text recognized"

        structured_info = parse_text_to_structure(texts)
        return structured_info

    except Exception as e:
        import traceback
        return f"Error processing image:\n{traceback.format_exc()}"


def parse_text_to_structure(texts):
    # Initialize structured fields
    structured_data = {
        "Event": "",
        "Location": "",
        "Start Time": "",
        "End Time": "",
        "Duration": "",
        "Date": ""
    }
    
    # Merge all text
    full_text = " ".join(texts)
    
    # Try to match each field using regex
    # These patterns should be adjusted according to your actual document format
    event_pattern = re.search(r"(?:Event|Title):\s*(.+?)(?=\s*(?:Location|Venue|Date|Start|End|Duration)|$)", full_text, re.I)
    if event_pattern:
        structured_data["Event"] = event_pattern.group(1).strip()
    
    location_pattern = re.search(r"(?:Location|Venue|Place):\s*(.+?)(?=\s*(?:Event|Title|Date|Start|End|Duration)|$)", full_text, re.I)
    if location_pattern:
        structured_data["Location"] = location_pattern.group(1).strip()
        
    date_pattern = re.search(r"(?:Date):\s*(\d{1,2}[\/\-\.]\d{1,2}[\/\-\.]\d{2,4}|\w+ \d{1,2},? \d{4})", full_text, re.I)
    if date_pattern:
        structured_data["Date"] = date_pattern.group(1).strip()
    
    start_time_pattern = re.search(r"(?:Start|Begin|Starting)(?:\s*Time)?:\s*(\d{1,2}:\d{2}(?:\s*[AP]M)?)", full_text, re.I)
    if start_time_pattern:
        structured_data["Start Time"] = start_time_pattern.group(1).strip()
    
    end_time_pattern = re.search(r"(?:End|Finish|Ending)(?:\s*Time)?:\s*(\d{1,2}:\d{2}(?:\s*[AP]M)?)", full_text, re.I)
    if end_time_pattern:
        structured_data["End Time"] = end_time_pattern.group(1).strip()
    
    duration_pattern = re.search(r"(?:Duration|Length):\s*(\d+\s*(?:hours|hrs|h|minutes|mins|m))", full_text, re.I)
    if duration_pattern:
        structured_data["Duration"] = duration_pattern.group(1).strip()
    
    # If no explicit duration is found but we have start and end times, we could calculate it
    if structured_data["Duration"] == "" and structured_data["Start Time"] and structured_data["End Time"]:
        # More complex time calculation logic would be needed here
        pass
    
    # Format output
    output = "\n".join([f"{key}: {value}" for key, value in structured_data.items() if value])
    
    # If no structured information was extracted, return the original text
    if not output:
        return "\n".join(texts)
        
    return output

# Create Gradio interface
interface = gr.Interface(
    fn=ocr_recognition,
    inputs=gr.Image(type="pil"),  # Explicitly specify using PIL
    outputs="text",
    title="Structured OCR Recognition",
    description="Upload an image for text recognition and extraction of structured information"
)

# Launch the service
if __name__ == "__main__":
    interface.launch()