import gradio as gr from PIL import Image import pytesseract from transformers import MBartForConditionalGeneration, MBart50Tokenizer MODEL_NAME = "facebook/mbart-large-50-many-to-many-mmt" tokenizer = MBart50Tokenizer.from_pretrained(MODEL_NAME) model = MBartForConditionalGeneration.from_pretrained(MODEL_NAME) def translate_image(img_path, src_lang, tgt_lang): try: # Extract text from the image using Tesseract OCR text = pytesseract.image_to_string(img_path) if not text.strip(): return "No text detected. Please upload a valid image.", "" # Translating the text tokenizer.src_lang = src_lang inputs = tokenizer(text, return_tensors="pt") generated_tokens = model.generate( **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang] ) translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] return text, translated_text except Exception as e: return f"Error: {str(e)}", "" with gr.Blocks() as app: gr.Markdown("## OCR and Translation App") gr.Markdown("Upload an image, specify source and target languages, and get the translated text.") with gr.Row(): img_input = gr.File(label="Upload Image", type="filepath") src_lang = gr.Textbox(label="Source Language Code (OCR works best with english)") tgt_lang = gr.Textbox(label="Target Language Code (any target language)") with gr.Row(): extracted_text = gr.Textbox(label="Extracted Text") translated_text = gr.Textbox(label="Translated Text") translate_button = gr.Button("Translate") translate_button.click( translate_image, inputs=[img_input, src_lang, tgt_lang], outputs=[extracted_text, translated_text] ) app.launch()