sample_translate / src /streamlit_app.py
pradeep4321's picture
Update src/streamlit_app.py
f263b5e verified
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langdetect import detect
# ==============================
# 🔐 HIDE STREAMLIT MENU (SOLUTION 2)
# ==============================
st.markdown("""
<style>
#MainMenu {visibility: hidden;}
header {visibility: hidden;}
footer {visibility: hidden;}
.stDeployButton {display:none;}
</style>
""", unsafe_allow_html=True)
# ==============================
# PAGE CONFIG
# ==============================
st.set_page_config(page_title="🌍 Multilingual Translator", layout="wide")
# ==============================
# LOAD MODEL (CACHED)
# ==============================
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
return tokenizer, model
tokenizer, model = load_model()
# ==============================
# LANGUAGE MAP (50+ LANGUAGES)
# ==============================
lang_map = {
# Indian Languages
"English": "eng_Latn",
"Tamil": "tam_Taml",
"Hindi": "hin_Deva",
"Telugu": "tel_Telu",
"Kannada": "kan_Knda",
"Malayalam": "mal_Mlym",
"Marathi": "mar_Deva",
"Bengali": "ben_Beng",
"Gujarati": "guj_Gujr",
"Punjabi": "pan_Guru",
"Urdu": "urd_Arab",
# European
"French": "fra_Latn",
"Spanish": "spa_Latn",
"German": "deu_Latn",
"Italian": "ita_Latn",
"Portuguese": "por_Latn",
"Dutch": "nld_Latn",
"Romanian": "ron_Latn",
"Czech": "ces_Latn",
"Polish": "pol_Latn",
"Hungarian": "hun_Latn",
"Finnish": "fin_Latn",
"Swedish": "swe_Latn",
"Norwegian": "nob_Latn",
"Danish": "dan_Latn",
# Middle East
"Arabic": "arb_Arab",
"Persian": "pes_Arab",
"Hebrew": "heb_Hebr",
"Turkish": "tur_Latn",
# Asian
"Chinese (Simplified)": "zho_Hans",
"Chinese (Traditional)": "zho_Hant",
"Japanese": "jpn_Jpan",
"Korean": "kor_Hang",
"Thai": "tha_Thai",
"Vietnamese": "vie_Latn",
"Indonesian": "ind_Latn",
# Others
"Russian": "rus_Cyrl",
"Ukrainian": "ukr_Cyrl",
"Bulgarian": "bul_Cyrl",
"Greek": "ell_Grek",
"Swahili": "swh_Latn",
"Amharic": "amh_Ethi"
}
# ==============================
# DETECT LANGUAGE
# ==============================
detect_map = {
"en": "eng_Latn",
"ta": "tam_Taml",
"hi": "hin_Deva",
"te": "tel_Telu",
"kn": "kan_Knda",
"ml": "mal_Mlym",
"mr": "mar_Deva",
"bn": "ben_Beng",
"gu": "guj_Gujr",
"pa": "pan_Guru",
"ur": "urd_Arab",
"fr": "fra_Latn",
"es": "spa_Latn",
"de": "deu_Latn",
"it": "ita_Latn",
"pt": "por_Latn",
"nl": "nld_Latn",
"ro": "ron_Latn",
"ru": "rus_Cyrl",
"zh-cn": "zho_Hans",
"zh-tw": "zho_Hant",
"ja": "jpn_Jpan",
"ko": "kor_Hang",
"ar": "arb_Arab",
"tr": "tur_Latn",
"vi": "vie_Latn"
}
def detect_language(text):
if len(text.strip()) < 3:
return "eng_Latn"
try:
lang = detect(text)
except:
lang = "en"
return detect_map.get(lang, "eng_Latn")
# ==============================
# TRANSLATION FUNCTION
# ==============================
def translate(text, target_lang):
if len(text.strip()) < 2:
return "⚠️ Please enter valid text.", "N/A"
src_lang = detect_language(text)
tgt_lang = lang_map[target_lang]
tokenizer.src_lang = src_lang
encoded = tokenizer(text, return_tensors="pt")
generated_tokens = model.generate(
**encoded,
forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
max_length=200,
num_beams=4, # improves accuracy
early_stopping=True
)
translated = tokenizer.batch_decode(
generated_tokens,
skip_special_tokens=True
)[0]
return translated, src_lang
# ==============================
# UI
# ==============================
st.title("🌍 40+ AI Powered-Multilingual Translators App")
col1, col2 = st.columns(2)
with col1:
input_text = st.text_area("Enter Text", height=200)
with col2:
target_lang = st.selectbox("Select Target Language", list(lang_map.keys()))
if st.button("Translate"):
if not input_text.strip():
st.warning("Please enter text")
else:
with st.spinner("Translating..."):
output, src_lang = translate(input_text, target_lang)
st.success("✅ Translation")
st.write(output)
st.info(f"Detected Language Code: {src_lang}")