import gradio as gr
import torch
from PIL import Image
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize TrOCR model and processor
try:
    processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
    model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
    if torch.cuda.is_available():
        model.to('cuda')
except Exception as e:
    logger.error(f"Error loading model: {e}")
    raise

def process_image(image):
    """Process image and extract text using TrOCR"""
    try:
        # Convert to RGB if needed
        if image.mode != 'RGB':
            image = image.convert('RGB')
        
        # Prepare image for model
        pixel_values = processor(image, return_tensors="pt").pixel_values
        if torch.cuda.is_available():
            pixel_values = pixel_values.to('cuda')
        
        # Generate text
        generated_ids = model.generate(pixel_values, max_length=128)
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        return generated_text.strip()
    except Exception as e:
        logger.error(f"Error processing image: {e}")
        return f"Error processing image: {str(e)}"

def analyze_image(input_image):
    """Main function to handle image analysis"""
    if input_image is None:
        return "Please upload an image."
    
    try:
        # Open and process image
        image = Image.open(input_image)
        
        # Extract text
        extracted_text = process_image(image)
        
        # Format response
        response = f"""📝 Extracted Text:
{'-' * 40}
{extracted_text}
{'-' * 40}

📊 Statistics:
• Characters: {len(extracted_text)}
• Words: {len(extracted_text.split())}
"""
        return response
    except Exception as e:
        logger.error(f"Error in analysis: {e}")
        return f"Error analyzing image: {str(e)}"

# Create Gradio interface
demo = gr.Interface(
    fn=analyze_image,
    inputs=gr.Image(type="filepath", label="Upload Image"),
    outputs=gr.Textbox(label="Extracted Text", lines=10),
    title="📷 Smart OCR Text Extractor",
    description="""
    Extract text from images using Microsoft's TrOCR model.
    Supports handwritten and printed text.
    """,
    theme=gr.themes.Soft(),
    examples=[
        ["example1.jpg"],
        ["example2.png"]
    ]
)

if __name__ == "__main__":
    demo.launch()