import subprocess, sys
subprocess.run([sys.executable, "-m", "pip", "install",
    "chandra-ocr[hf]", "gradio", "-q"], check=True)

import torch, gradio as gr, spaces
from io import BytesIO
from PIL import Image
from transformers import AutoModelForImageTextToText, AutoProcessor
from chandra.model.hf import generate_hf
from chandra.model.schema import BatchInputItem
from chandra.output import parse_markdown

MODEL_ID = "datalab-to/chandra-ocr-2"
model = AutoModelForImageTextToText.from_pretrained(
    MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto"
)
model.eval()
model.processor = AutoProcessor.from_pretrained(MODEL_ID)
model.processor.tokenizer.padding_side = "left"

@spaces.GPU
def ocr(image, prompt_type="ocr_layout"):
    batch = [BatchInputItem(image=image.convert("RGB"), prompt_type=prompt_type)]
    result = generate_hf(batch, model)[0]
    return parse_markdown(result.raw)

gr.Interface(
    fn=ocr,
    inputs=[
        gr.Image(type="pil", label="Upload image"),
        gr.Radio(["ocr_layout", "ocr", "caption"], value="ocr_layout"),
    ],
    outputs=gr.Textbox(label="Markdown output", lines=20),
    title="Chandra OCR 2",
).launch()