import gradio as gr
from transformers import AutoModel, AutoTokenizer
import torch

# Load model
model_name = "deepseek-ai/DeepSeek-OCR"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModel.from_pretrained(
    model_name,
    _attn_implementation="flash_attention_2",
    trust_remote_code=True,
    use_safetensors=True
)
model = model.eval().cuda().to(torch.bfloat16)

# OCR function
def ocr_app(image):
    output_path = "outputs/"
    prompt = "<image>\n<|grounding|>Convert the document to markdown."
    res = model.infer(
        tokenizer,
        prompt=prompt,
        image_file=image.name,
        output_path=output_path,
        base_size=1024,
        image_size=640,
        crop_mode=True,
        save_results=True,
        test_compress=True
    )
    return res

# Gradio UI
gr.Interface(
    fn=ocr_app,
    inputs=gr.Image(type="file"),
    outputs=gr.Textbox(),
    title="DeepSeek-OCR",
    description="Upload an image to convert it to markdown using DeepSeek-OCR"
).launch()