import subprocess, sys subprocess.run([sys.executable, "-m", "pip", "install", "chandra-ocr[hf]", "gradio", "-q"], check=True) import torch, gradio as gr, spaces from io import BytesIO from PIL import Image from transformers import AutoModelForImageTextToText, AutoProcessor from chandra.model.hf import generate_hf from chandra.model.schema import BatchInputItem from chandra.output import parse_markdown MODEL_ID = "datalab-to/chandra-ocr-2" model = AutoModelForImageTextToText.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto" ) model.eval() model.processor = AutoProcessor.from_pretrained(MODEL_ID) model.processor.tokenizer.padding_side = "left" @spaces.GPU def ocr(image, prompt_type="ocr_layout"): batch = [BatchInputItem(image=image.convert("RGB"), prompt_type=prompt_type)] result = generate_hf(batch, model)[0] return parse_markdown(result.raw) gr.Interface( fn=ocr, inputs=[ gr.Image(type="pil", label="Upload image"), gr.Radio(["ocr_layout", "ocr", "caption"], value="ocr_layout"), ], outputs=gr.Textbox(label="Markdown output", lines=20), title="Chandra OCR 2", ).launch()