deepseek / app.py
Sereinia's picture
Update app.py
5a84c7b verified
import gradio as gr
from transformers import AutoModel, AutoTokenizer
import torch
# Load model
model_name = "deepseek-ai/DeepSeek-OCR"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModel.from_pretrained(
model_name,
_attn_implementation="flash_attention_2",
trust_remote_code=True,
use_safetensors=True
)
model = model.eval().cuda().to(torch.bfloat16)
# OCR function
def ocr_app(image):
output_path = "outputs/"
prompt = "<image>\n<|grounding|>Convert the document to markdown."
res = model.infer(
tokenizer,
prompt=prompt,
image_file=image.name,
output_path=output_path,
base_size=1024,
image_size=640,
crop_mode=True,
save_results=True,
test_compress=True
)
return res
# Gradio UI
gr.Interface(
fn=ocr_app,
inputs=gr.Image(type="file"),
outputs=gr.Textbox(),
title="DeepSeek-OCR",
description="Upload an image to convert it to markdown using DeepSeek-OCR"
).launch()