|
|
import gradio as gr |
|
|
from transformers import AutoModel, AutoTokenizer |
|
|
import torch |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
model_name = 'deepseek-ai/DeepSeek-OCR-2' |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) |
|
|
model = AutoModel.from_pretrained( |
|
|
model_name, |
|
|
_attn_implementation='flash_attention_2', |
|
|
trust_remote_code=True, |
|
|
use_safetensors=True |
|
|
) |
|
|
model = model.eval().to(device).to(torch.bfloat16) |
|
|
|
|
|
def image_to_html(image): |
|
|
|
|
|
prompt = "<image>\n<|grounding|>Convert the document to full stack HTML code. " |
|
|
|
|
|
|
|
|
image.save("temp.jpg") |
|
|
|
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
generated_ids = model.generate(**inputs, max_new_tokens=1024) |
|
|
|
|
|
|
|
|
result = tokenizer.decode(generated_ids[0], skip_special_tokens=True) |
|
|
return result |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=image_to_html, |
|
|
inputs=gr.Image(type="pil"), |
|
|
outputs=gr.Textbox(lines=20), |
|
|
title="AI Full Stack HTML Generator", |
|
|
description="ارفع صورة، وسيقوم النموذج بتحويلها إلى كود HTML/JS/CSS كامل." |
|
|
) |
|
|
|
|
|
iface.launch() |