import gradio as gr from transformers import AutoModel, AutoTokenizer import torch # Load model model_name = "deepseek-ai/DeepSeek-OCR" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModel.from_pretrained( model_name, _attn_implementation="flash_attention_2", trust_remote_code=True, use_safetensors=True ) model = model.eval().cuda().to(torch.bfloat16) # OCR function def ocr_app(image): output_path = "outputs/" prompt = "\n<|grounding|>Convert the document to markdown." res = model.infer( tokenizer, prompt=prompt, image_file=image.name, output_path=output_path, base_size=1024, image_size=640, crop_mode=True, save_results=True, test_compress=True ) return res # Gradio UI gr.Interface( fn=ocr_app, inputs=gr.Image(type="file"), outputs=gr.Textbox(), title="DeepSeek-OCR", description="Upload an image to convert it to markdown using DeepSeek-OCR" ).launch()