File size: 1,827 Bytes
31793aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

import gradio as gr
from PIL import Image
import pytesseract
from transformers import MBartForConditionalGeneration, MBart50Tokenizer


MODEL_NAME = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer = MBart50Tokenizer.from_pretrained(MODEL_NAME)
model = MBartForConditionalGeneration.from_pretrained(MODEL_NAME)


def translate_image(img_path, src_lang, tgt_lang):
    try:
        # Extract text from the image using Tesseract OCR
        text = pytesseract.image_to_string(img_path)
        if not text.strip():
            return "No text detected. Please upload a valid image.", ""

        # Translating the text
        tokenizer.src_lang = src_lang
        inputs = tokenizer(text, return_tensors="pt")
        generated_tokens = model.generate(
            **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang]
        )
        translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]

        return text, translated_text
    except Exception as e:
        return f"Error: {str(e)}", ""


with gr.Blocks() as app:
    gr.Markdown("## OCR and Translation App")
    gr.Markdown("Upload an image, specify source and target languages, and get the translated text.")
    
    with gr.Row():
        img_input = gr.File(label="Upload Image", type="filepath")
        src_lang = gr.Textbox(label="Source Language Code (OCR works best with english)")
        tgt_lang = gr.Textbox(label="Target Language Code (any target language)")

    with gr.Row():
        extracted_text = gr.Textbox(label="Extracted Text")
        translated_text = gr.Textbox(label="Translated Text")

    translate_button = gr.Button("Translate")
    translate_button.click(
        translate_image, inputs=[img_input, src_lang, tgt_lang], outputs=[extracted_text, translated_text]
    )


app.launch()