OCR / app.py
saiful-ai-dev's picture
Update app.py
8e0002b verified
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import torch
# Model load (প্রথমবার slow, cache হয়ে যাবে)
model_id = "vikhyatk/moondream2" # অথবা "moondream/moondream3-preview" try করো
revision = "2025-06-21" # latest stable চেক করো HF page-এ
model = AutoModelForCausalLM.from_pretrained(
model_id, revision=revision, trust_remote_code=True,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True)
def ocr_image(image, prompt="Extract all text from this image accurately."):
if image is None:
return "দয়া করে ছবি আপলোড করুন।"
# Moondream-এ image + text prompt দিয়ে generate
enc_image = model.encode_image(image)
generated_ids = model.generate(
**tokenizer(prompt, return_tensors="pt").to(model.device),
image_embeds=enc_image.to(model.device),
max_new_tokens=512,
do_sample=False
)
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
return generated_text if generated_text else "কোনো টেক্সট পাওয়া যায়নি।"
demo = gr.Interface(
fn=ocr_image,
inputs=[gr.Image(type="pil"), gr.Textbox(label="Custom Prompt (optional)", value="Extract all text from this image accurately.")],
outputs="text",
title="Moondream OCR - Any Language Try",
description="Moondream দিয়ে ছবি থেকে টেক্সট extract করুন। Prompt customize করতে পারেন (e.g., Bangla text চাইলে 'Extract Bangla text' বলুন)।"
)
demo.launch()