| import streamlit as st |
| from transformers import MarianMTModel, MarianTokenizer |
|
|
| |
| languages = { |
| 'English': 'en', |
| 'Urdu': 'ur', |
| 'French': 'fr', |
| 'Spanish': 'es', |
| 'German': 'de', |
| 'Chinese': 'zh', |
| 'Italian': 'it', |
| 'Russian': 'ru', |
| 'Japanese': 'ja', |
| 'Arabic': 'ar', |
| 'Hindi': 'hi', |
| } |
|
|
| |
| language_pairs = { |
| ('en', 'ur'): 'Helsinki-NLP/opus-mt-en-ur', |
| ('ur', 'en'): 'Helsinki-NLP/opus-mt-ur-en', |
| ('en', 'fr'): 'Helsinki-NLP/opus-mt-en-fr', |
| ('fr', 'en'): 'Helsinki-NLP/opus-mt-fr-en', |
| ('en', 'es'): 'Helsinki-NLP/opus-mt-en-es', |
| ('es', 'en'): 'Helsinki-NLP/opus-mt-es-en', |
| ('en', 'de'): 'Helsinki-NLP/opus-mt-en-de', |
| ('de', 'en'): 'Helsinki-NLP/opus-mt-de-en', |
| ('en', 'zh'): 'Helsinki-NLP/opus-mt-en-zh', |
| ('zh', 'en'): 'Helsinki-NLP/opus-mt-zh-en', |
| ('en', 'it'): 'Helsinki-NLP/opus-mt-en-it', |
| ('it', 'en'): 'Helsinki-NLP/opus-mt-it-en', |
| ('en', 'ru'): 'Helsinki-NLP/opus-mt-en-ru', |
| ('ru', 'en'): 'Helsinki-NLP/opus-mt-ru-en', |
| ('en', 'ja'): 'Helsinki-NLP/opus-mt-en-ja', |
| ('ja', 'en'): 'Helsinki-NLP/opus-mt-ja-en', |
| ('en', 'ar'): 'Helsinki-NLP/opus-mt-en-ar', |
| ('ar', 'en'): 'Helsinki-NLP/opus-mt-ar-en', |
| ('en', 'hi'): 'Helsinki-NLP/opus-mt-en-hi', |
| ('hi', 'en'): 'Helsinki-NLP/opus-mt-hi-en', |
| |
| } |
|
|
| def load_model(src_lang, tgt_lang): |
| model_name = language_pairs.get((src_lang, tgt_lang)) |
| if not model_name: |
| raise ValueError(f"No available model for {src_lang} to {tgt_lang}") |
| |
| tokenizer = MarianTokenizer.from_pretrained(model_name) |
| model = MarianMTModel.from_pretrained(model_name) |
| return model, tokenizer |
|
|
| def translate(text, src_lang, tgt_lang): |
| model, tokenizer = load_model(src_lang, tgt_lang) |
| inputs = tokenizer.encode(text, return_tensors="pt", padding=True) |
| translated = model.generate(inputs) |
| return tokenizer.decode(translated[0], skip_special_tokens=True) |
|
|
| def translate_chain(text, src_lang, tgt_lang): |
| if src_lang != 'en': |
| text = translate(text, src_lang, 'en') |
| if tgt_lang != 'en': |
| text = translate(text, 'en', tgt_lang) |
| return text |
|
|
| def translate_ui(text, source_language, target_language): |
| src_lang = languages[source_language] |
| tgt_lang = languages[target_language] |
| |
| try: |
| return translate(text, src_lang, tgt_lang) |
| except ValueError: |
| return translate_chain(text, src_lang, tgt_lang) |
|
|
| |
| st.title("Multilingual Translator") |
| st.write("Translate text between various languages including Urdu, French, Spanish, and more.") |
|
|
| |
| text = st.text_area("Enter text to translate", height=100) |
|
|
| |
| source_language = st.selectbox("Select Source Language", list(languages.keys())) |
| target_language = st.selectbox("Select Target Language", list(languages.keys())) |
|
|
| |
| if st.button("Translate"): |
| if text.strip(): |
| translation = translate_ui(text, source_language, target_language) |
| st.text_area("Translated Text", translation, height=100) |
| else: |
| st.warning("Please enter text to translate.") |
|
|
| |
| st.sidebar.title("About") |
| st.sidebar.info( |
| """ |
| This app allows you to translate text between multiple languages using the MarianMT model from Hugging Face's Helsinki-NLP collection. |
| """ |
| ) |
|
|