BanglaScreenReader

Sleeping

File size: 1,618 Bytes

5ffa482
3c9b1a8
5376bac
97afa3e
3c9b1a8
ad03eea
c31f5a5
ed37132
 
3c9b1a8
 
1213370
ed37132
 
c31f5a5
ed37132
97afa3e
 
ed37132
 
5376bac
 
 
 
 
 
 
5ffa482
97afa3e
1213370
c31f5a5
 
 
 
 
 
 
 
 
 
 
5ffa482

import gradio as gr
from PIL import Image
from gtts import gTTS
import pytesseract
import tempfile

# FIX: Set Tesseract path explicitly
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'

def bangla_reader(image):
    if image is None:
        return "কোনো ছবি দেওয়া হয়নি।", None
    
    # OCR with Bengali support
    ocr_text = pytesseract.image_to_string(image, lang='ben')
    
    if not ocr_text.strip():
        return "কোনো লেখা সনাক্ত করা যায়নি।", None
    
    # Text-to-Speech
    try:
        tts = gTTS(text=ocr_text, lang='bn')
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
            tts.save(tmp.name)
            audio_path = tmp.name
    except Exception as e:
        return f"OCR সফল হয়েছে, কিন্তু TTS ব্যর্থ: {str(e)}", None

    return f"OCR ফলাফল:\n{ocr_text.strip()}", audio_path

# Gradio UI
demo = gr.Interface(
    fn=bangla_reader,
    inputs=gr.Image(type="pil", label="বাংলা লেখা সম্বলিত ছবি দিন"),
    outputs=[
        gr.Textbox(label="OCR ফলাফল"),
        gr.Audio(label="বাংলা কণ্ঠে শুনুন")
    ],
    title="📖 বাংলা রিডার (Bangla Reader)",
    description="ছবির বাংলা লেখা পড়ে তা পাঠ্য ও কণ্ঠে রূপান্তর করে শোনায়।",
    allow_flagging="never"
)

if __name__ == "__main__":
    demo.launch()