Spaces:
Runtime error
Runtime error
| from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer | |
| import pytesseract as tsr | |
| from PIL import Image | |
| import sys, os | |
| import gradio as gr | |
| tsr.pytesseract.tesseract_cmd = r'/usr/bin/tesseract' | |
| model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M") | |
| tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M") | |
| def extractAndTranslate(image): | |
| # Extract Text | |
| extractedText = tsr.image_to_string(image, lang='eng+hin') | |
| extractedTextFormatted = ' '.join(extractedText.split('\n')) | |
| # Translate | |
| tokenizer.src_lang = "en" | |
| encodedText = tokenizer(extractedTextFormatted, return_tensors="pt") | |
| generatedTokens = model.generate(**encodedText, forced_bos_token_id=tokenizer.get_lang_id("hi")) | |
| return tokenizer.batch_decode(generatedTokens, skip_special_tokens=True)[0] | |
| demoApp = gr.Interface(extractAndTranslate, "image", "text") | |
| demoApp.launch() |