"""Fanoni Document AI - HuggingFace Space with GOT-OCR2.0 Model."""

import gradio as gr
import spaces
from transformers import AutoModel, AutoTokenizer
from PIL import Image
import torch

# Load GOT-OCR2.0 model
MODEL_NAME = "ucaslcl/GOT-OCR2_0"

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)

print("Loading model...")
model = AutoModel.from_pretrained(
    MODEL_NAME,
    trust_remote_code=True,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16
)
print("Model loaded!")


@spaces.GPU
def extract_text(image, output_format):
    """Extract text from uploaded image using GOT-OCR2.0."""
    if image is None:
        return "Please upload an image."

    try:
        # Move model to GPU for this call
        device = "cuda" if torch.cuda.is_available() else "cpu"
        model_gpu = model.to(device).eval()

        # Save image temporarily
        temp_path = "/tmp/uploaded_image.png"
        if isinstance(image, str):
            temp_path = image
        else:
            Image.fromarray(image).save(temp_path)

        # OCR extraction
        if output_format == "Plain Text":
            result = model_gpu.chat(tokenizer, temp_path, ocr_type='ocr')
        elif output_format == "Formatted (Tables/Structure)":
            result = model_gpu.chat(tokenizer, temp_path, ocr_type='format')
        else:
            result = model_gpu.chat(tokenizer, temp_path, ocr_type='ocr')

        return result

    except Exception as e:
        return f"Error: {str(e)}"


# Gradio Interface
with gr.Blocks(title="Fanoni Document AI", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 📄 Fanoni Document AI
    ### Extract text from documents using GOT-OCR2.0

    Upload an image of a document (invoice, receipt, form, etc.) to extract text.
    """)

    with gr.Row():
        with gr.Column(scale=1):
            image_input = gr.Image(
                label="Upload Document",
                type="numpy",
                height=400
            )
            format_dropdown = gr.Dropdown(
                choices=["Plain Text", "Formatted (Tables/Structure)"],
                value="Plain Text",
                label="Output Format"
            )
            extract_btn = gr.Button("Extract Text", variant="primary", size="lg")

        with gr.Column(scale=1):
            output_text = gr.Textbox(
                label="Extracted Text",
                lines=20,
                max_lines=50,
                show_copy_button=True
            )

    extract_btn.click(
        fn=extract_text,
        inputs=[image_input, format_dropdown],
        outputs=output_text
    )

    gr.Markdown("""
    ---
    **Supported formats:** JPG, PNG, WEBP, BMP
    **Model:** [GOT-OCR2.0](https://huggingface.co/ucaslcl/GOT-OCR2_0) - General OCR Theory
    **Powered by:** Fanoni AI
    """)

if __name__ == "__main__":
    demo.launch()