missmeyet's picture
Changed tesseract path
3bc9086
raw
history blame contribute delete
931 Bytes
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import pytesseract as tsr
from PIL import Image
import sys, os
import gradio as gr
tsr.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
def extractAndTranslate(image):
# Extract Text
extractedText = tsr.image_to_string(image, lang='eng+hin')
extractedTextFormatted = ' '.join(extractedText.split('\n'))
# Translate
tokenizer.src_lang = "en"
encodedText = tokenizer(extractedTextFormatted, return_tensors="pt")
generatedTokens = model.generate(**encodedText, forced_bos_token_id=tokenizer.get_lang_id("hi"))
return tokenizer.batch_decode(generatedTokens, skip_special_tokens=True)[0]
demoApp = gr.Interface(extractAndTranslate, "image", "text")
demoApp.launch()