saiful-ai-dev commited on
Commit
8e0002b
·
verified ·
1 Parent(s): 7b046a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -18
app.py CHANGED
@@ -1,28 +1,40 @@
1
  import gradio as gr
2
- import easyocr
3
- import numpy as np
 
4
 
5
- # Multiple languages load করা (Bangla + English + Hindi + আরও চাইলে add করো)
6
- # প্রথমবার load slow, পরে fast
7
- reader = easyocr.Reader(['bn', 'en', 'hi', 'ar', 'fr', 'es'], gpu=False) # gpu=True যদি HF paid GPU থাকে
 
 
 
 
 
 
8
 
9
- def text_extraction(image):
10
  if image is None:
11
- return "দয়া করে একটি ছবি আপলোড করুন।"
12
-
13
- # readtext-এ detail=0 দিয়ে শুধু text, paragraph=True দিয়ে grouped text (better for books)
14
- results = reader.readtext(image, detail=0, paragraph=True, min_size=10, contrast_ths=0.1, adjust_contrast=0.5)
15
-
16
- # results list of strings (paragraph mode-এ)
17
- full_text = "\n\n".join(results) # paragraph আলাদা করে দেখানো
18
- return full_text if full_text.strip() else "ছবিতে কোনো লেখা খুঁজে পাওয়া যায়নি।"
 
 
 
 
 
19
 
20
  demo = gr.Interface(
21
- fn=text_extraction,
22
- inputs=gr.Image(type="numpy"),
23
  outputs="text",
24
- title="Education AI OCR - Any Language",
25
- description="যেকোনো ভাষার (Bangla, English, Hindi, Arabic ইত্যাদি) বই/পাতার ছবি আপলোড করুন। Mixed language-ও detect করবে।"
26
  )
27
 
28
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from PIL import Image
4
+ import torch
5
 
6
+ # Model load (প্রথমবার slow, cache হয়ে যাবে)
7
+ model_id = "vikhyatk/moondream2" # অথবা "moondream/moondream3-preview" try করো
8
+ revision = "2025-06-21" # latest stable চেক করো HF page-এ
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_id, revision=revision, trust_remote_code=True,
11
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
12
+ device_map="auto"
13
+ )
14
+ tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True)
15
 
16
+ def ocr_image(image, prompt="Extract all text from this image accurately."):
17
  if image is None:
18
+ return "দয়া করে ছবি আপলোড করুন।"
19
+
20
+ # Moondream-এ image + text prompt দিয়ে generate
21
+ enc_image = model.encode_image(image)
22
+ generated_ids = model.generate(
23
+ **tokenizer(prompt, return_tensors="pt").to(model.device),
24
+ image_embeds=enc_image.to(model.device),
25
+ max_new_tokens=512,
26
+ do_sample=False
27
+ )
28
+ generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
29
+
30
+ return generated_text if generated_text else "কোনো টেক্সট পাওয়া যায়নি।"
31
 
32
  demo = gr.Interface(
33
+ fn=ocr_image,
34
+ inputs=[gr.Image(type="pil"), gr.Textbox(label="Custom Prompt (optional)", value="Extract all text from this image accurately.")],
35
  outputs="text",
36
+ title="Moondream OCR - Any Language Try",
37
+ description="Moondream দিয়ে ছবি থেকে টেক্সট extract করুন। Prompt customize করতে পারেন (e.g., Bangla text চাইলে 'Extract Bangla text' বলুন)।"
38
  )
39
 
40
  demo.launch()