Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| from langdetect import detect | |
| # ============================== | |
| # 🔐 HIDE STREAMLIT MENU (SOLUTION 2) | |
| # ============================== | |
| st.markdown(""" | |
| <style> | |
| #MainMenu {visibility: hidden;} | |
| header {visibility: hidden;} | |
| footer {visibility: hidden;} | |
| .stDeployButton {display:none;} | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # ============================== | |
| # PAGE CONFIG | |
| # ============================== | |
| st.set_page_config(page_title="🌍 Multilingual Translator", layout="wide") | |
| # ============================== | |
| # LOAD MODEL (CACHED) | |
| # ============================== | |
| def load_model(): | |
| tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M") | |
| return tokenizer, model | |
| tokenizer, model = load_model() | |
| # ============================== | |
| # LANGUAGE MAP (50+ LANGUAGES) | |
| # ============================== | |
| lang_map = { | |
| # Indian Languages | |
| "English": "eng_Latn", | |
| "Tamil": "tam_Taml", | |
| "Hindi": "hin_Deva", | |
| "Telugu": "tel_Telu", | |
| "Kannada": "kan_Knda", | |
| "Malayalam": "mal_Mlym", | |
| "Marathi": "mar_Deva", | |
| "Bengali": "ben_Beng", | |
| "Gujarati": "guj_Gujr", | |
| "Punjabi": "pan_Guru", | |
| "Urdu": "urd_Arab", | |
| # European | |
| "French": "fra_Latn", | |
| "Spanish": "spa_Latn", | |
| "German": "deu_Latn", | |
| "Italian": "ita_Latn", | |
| "Portuguese": "por_Latn", | |
| "Dutch": "nld_Latn", | |
| "Romanian": "ron_Latn", | |
| "Czech": "ces_Latn", | |
| "Polish": "pol_Latn", | |
| "Hungarian": "hun_Latn", | |
| "Finnish": "fin_Latn", | |
| "Swedish": "swe_Latn", | |
| "Norwegian": "nob_Latn", | |
| "Danish": "dan_Latn", | |
| # Middle East | |
| "Arabic": "arb_Arab", | |
| "Persian": "pes_Arab", | |
| "Hebrew": "heb_Hebr", | |
| "Turkish": "tur_Latn", | |
| # Asian | |
| "Chinese (Simplified)": "zho_Hans", | |
| "Chinese (Traditional)": "zho_Hant", | |
| "Japanese": "jpn_Jpan", | |
| "Korean": "kor_Hang", | |
| "Thai": "tha_Thai", | |
| "Vietnamese": "vie_Latn", | |
| "Indonesian": "ind_Latn", | |
| # Others | |
| "Russian": "rus_Cyrl", | |
| "Ukrainian": "ukr_Cyrl", | |
| "Bulgarian": "bul_Cyrl", | |
| "Greek": "ell_Grek", | |
| "Swahili": "swh_Latn", | |
| "Amharic": "amh_Ethi" | |
| } | |
| # ============================== | |
| # DETECT LANGUAGE | |
| # ============================== | |
| detect_map = { | |
| "en": "eng_Latn", | |
| "ta": "tam_Taml", | |
| "hi": "hin_Deva", | |
| "te": "tel_Telu", | |
| "kn": "kan_Knda", | |
| "ml": "mal_Mlym", | |
| "mr": "mar_Deva", | |
| "bn": "ben_Beng", | |
| "gu": "guj_Gujr", | |
| "pa": "pan_Guru", | |
| "ur": "urd_Arab", | |
| "fr": "fra_Latn", | |
| "es": "spa_Latn", | |
| "de": "deu_Latn", | |
| "it": "ita_Latn", | |
| "pt": "por_Latn", | |
| "nl": "nld_Latn", | |
| "ro": "ron_Latn", | |
| "ru": "rus_Cyrl", | |
| "zh-cn": "zho_Hans", | |
| "zh-tw": "zho_Hant", | |
| "ja": "jpn_Jpan", | |
| "ko": "kor_Hang", | |
| "ar": "arb_Arab", | |
| "tr": "tur_Latn", | |
| "vi": "vie_Latn" | |
| } | |
| def detect_language(text): | |
| if len(text.strip()) < 3: | |
| return "eng_Latn" | |
| try: | |
| lang = detect(text) | |
| except: | |
| lang = "en" | |
| return detect_map.get(lang, "eng_Latn") | |
| # ============================== | |
| # TRANSLATION FUNCTION | |
| # ============================== | |
| def translate(text, target_lang): | |
| if len(text.strip()) < 2: | |
| return "⚠️ Please enter valid text.", "N/A" | |
| src_lang = detect_language(text) | |
| tgt_lang = lang_map[target_lang] | |
| tokenizer.src_lang = src_lang | |
| encoded = tokenizer(text, return_tensors="pt") | |
| generated_tokens = model.generate( | |
| **encoded, | |
| forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang), | |
| max_length=200, | |
| num_beams=4, # improves accuracy | |
| early_stopping=True | |
| ) | |
| translated = tokenizer.batch_decode( | |
| generated_tokens, | |
| skip_special_tokens=True | |
| )[0] | |
| return translated, src_lang | |
| # ============================== | |
| # UI | |
| # ============================== | |
| st.title("🌍 40+ AI Powered-Multilingual Translators App") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| input_text = st.text_area("Enter Text", height=200) | |
| with col2: | |
| target_lang = st.selectbox("Select Target Language", list(lang_map.keys())) | |
| if st.button("Translate"): | |
| if not input_text.strip(): | |
| st.warning("Please enter text") | |
| else: | |
| with st.spinner("Translating..."): | |
| output, src_lang = translate(input_text, target_lang) | |
| st.success("✅ Translation") | |
| st.write(output) | |
| st.info(f"Detected Language Code: {src_lang}") |