File size: 1,960 Bytes
24458a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer

# Define a dictionary with language codes (MarianMT uses ISO 639-1 codes)
lang_dict = {
    'English': 'en',
    'French': 'fr',
    'German': 'de',
    'Spanish': 'es',
    'Italian': 'it',
    'Russian': 'ru',
    'Chinese': 'zh',
    'Arabic': 'ar',
    'Japanese': 'ja',
    'Korean': 'ko',
    'Urdu': 'ur',
    'Hindi': 'hi'
}

# Function to load model and tokenizer for translation
def load_model(src_lang, tgt_lang):
    model_name = f'Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}'
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)
    return model, tokenizer

# Function to translate text
def translate_text(model, tokenizer, text):
    inputs = tokenizer(text, return_tensors="pt", padding=True)
    translated = model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Streamlit App

st.title('Language Translator')

# User selects input and output languages
st.subheader('Select Input and Output Languages')
src_lang = st.selectbox('Input Language', list(lang_dict.keys()))
tgt_lang = st.selectbox('Output Language', list(lang_dict.keys()))

# Get input text from the user
st.subheader(f'Translate from {src_lang} to {tgt_lang}')
input_text = st.text_area('Enter your text here', placeholder=f'Enter text in {src_lang}...')

# Translate button
if st.button('Translate'):
    if input_text.strip() == "":
        st.warning('Please enter some text to translate.')
    else:
        # Load the translation model
        model, tokenizer = load_model(lang_dict[src_lang], lang_dict[tgt_lang])
        
        # Translate text
        translated_text = translate_text(model, tokenizer, input_text)
        
        # Show the translated text
        st.subheader(f'Translated Text ({tgt_lang}):')
        st.write(translated_text)