| | import gradio as gr |
| | from transformers import AutoModel, AutoTokenizer |
| | import torch |
| |
|
| | |
| | model_name = "deepseek-ai/DeepSeek-OCR" |
| | tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) |
| | model = AutoModel.from_pretrained( |
| | model_name, |
| | _attn_implementation="flash_attention_2", |
| | trust_remote_code=True, |
| | use_safetensors=True |
| | ) |
| | model = model.eval().cuda().to(torch.bfloat16) |
| |
|
| | |
| | def ocr_app(image): |
| | output_path = "outputs/" |
| | prompt = "<image>\n<|grounding|>Convert the document to markdown." |
| | res = model.infer( |
| | tokenizer, |
| | prompt=prompt, |
| | image_file=image.name, |
| | output_path=output_path, |
| | base_size=1024, |
| | image_size=640, |
| | crop_mode=True, |
| | save_results=True, |
| | test_compress=True |
| | ) |
| | return res |
| |
|
| | |
| | gr.Interface( |
| | fn=ocr_app, |
| | inputs=gr.Image(type="file"), |
| | outputs=gr.Textbox(), |
| | title="DeepSeek-OCR", |
| | description="Upload an image to convert it to markdown using DeepSeek-OCR" |
| | ).launch() |
| |
|