import gradio as gr
from pix2text import Pix2Text
import logging
from PIL import Image

# Set up logging to WARNING level to suppress excessive output from model libraries
logging.basicConfig(level=logging.WARNING)

# Initialize Pix2Text model globally (expensive operation, do it once)
p2t = None
try:
    # Initialize the Pix2Text model
    p2t = Pix2Text()
except Exception as e:
    print(f"Error initializing Pix2Text model: {e}. Recognition will use a fallback function.")

# Define the main recognition function
def recognize_text(image_path: str) -> str:
    """
    Performs OCR on the uploaded image and safely parses the output.
    This function includes debugging to understand the result structure.
    """
    if p2t is None:
        return (
            "Model initialization failed at startup. Please check the logs "
            "to ensure all dependencies (like ONNX runtime) loaded correctly."
        )

    try:
        # Recognize text and formulas
        result = p2t.recognize(image_path, save_formula_images=False, use_analyzer=True)

        # DEBUG: Print the actual result structure
        print(f"DEBUG - Result type: {type(result)}")
        print(f"DEBUG - Result content: {result}")
        
        # Handle different possible return types
        if isinstance(result, str):
            # If result is directly a string
            return result if result.strip() else "No recognizable text or formulas found in the image."
        
        if isinstance(result, dict):
            # If result is a dictionary, try to extract text from common keys
            print(f"DEBUG - Result keys: {result.keys()}")
            possible_keys = ['text', 'content', 'result', 'output']
            for key in possible_keys:
                if key in result:
                    return str(result[key])
            return f"Result is a dict but couldn't find text. Keys: {list(result.keys())}"
        
        if isinstance(result, list):
            # If result is a list, process each item
            extracted_parts = []
            
            for i, item in enumerate(result):
                print(f"DEBUG - Item {i} type: {type(item)}")
                print(f"DEBUG - Item {i} content: {item}")
                
                if hasattr(item, 'text'):
                    # P2TOutput object (for formulas or structured text)
                    extracted_parts.append(item.text)
                elif isinstance(item, str):
                    # Simple text string
                    extracted_parts.append(item)
                elif isinstance(item, dict):
                    # Dictionary with text content
                    if 'text' in item:
                        extracted_parts.append(item['text'])
                    elif 'content' in item:
                        extracted_parts.append(item['content'])
                    else:
                        extracted_parts.append(str(item))
                else:
                    # Try to convert to string as fallback
                    extracted_parts.append(str(item))

            extracted_text = "\n\n".join(extracted_parts)

            if not extracted_text.strip():
                return "No recognizable text or formulas found in the image."

            return extracted_text
        
        # If none of the above, try to convert to string
        return str(result) if result else "No recognizable text or formulas found in the image."

    except Exception as e:
        # Catch any unexpected errors during the recognition process
        import traceback
        return f"An unexpected error occurred during recognition: {e}\n\nTraceback:\n{traceback.format_exc()}"


# --- Gradio Interface Setup ---

iface = gr.Interface(
    fn=recognize_text,
    # Use type="filepath" to send the local file path to the Python function
    inputs=gr.Image(type="filepath", label="Upload Image (Text/Formula/Math)"),
    # The output is a standard textbox
    outputs=gr.Textbox(label="Extracted Text (LaTeX/Plain Text)", lines=10),
    title="🔬 Pix2Text OCR Formula and Text Recognition",
    description=(
        "Upload an image containing text, mathematical formulas, or scientific notation. "
        "The app converts the image content into editable text, using LaTeX for formulas."
    ),
    theme=gr.themes.Soft(),
    allow_flagging="never",
)

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch(show_api=False)