import streamlit as st import torch from transformers import MarianMTModel, MarianTokenizer st.set_page_config( page_title="Language Translation App", page_icon="🌍", layout="wide" ) # ----------------------------- # Custom CSS # ----------------------------- st.markdown(""" """, unsafe_allow_html=True) # ----------------------------- # Supported languages # ----------------------------- LANGUAGES = { "English": "en", "French": "fr", "German": "de", "Spanish": "es", "Italian": "it", "Portuguese": "pt", "Dutch": "nl", "Romanian": "ro", "Arabic": "ar", "Hindi": "hi", } # ----------------------------- # Helsinki-NLP OPUS-MT models # ----------------------------- MODEL_MAP = { ("en", "fr"): "Helsinki-NLP/opus-mt-en-fr", ("fr", "en"): "Helsinki-NLP/opus-mt-fr-en", ("en", "de"): "Helsinki-NLP/opus-mt-en-de", ("de", "en"): "Helsinki-NLP/opus-mt-de-en", ("en", "es"): "Helsinki-NLP/opus-mt-en-es", ("es", "en"): "Helsinki-NLP/opus-mt-es-en", ("en", "it"): "Helsinki-NLP/opus-mt-en-it", ("it", "en"): "Helsinki-NLP/opus-mt-it-en", ("en", "pt"): "Helsinki-NLP/opus-mt-en-pt", ("pt", "en"): "Helsinki-NLP/opus-mt-pt-en", ("en", "nl"): "Helsinki-NLP/opus-mt-en-nl", ("nl", "en"): "Helsinki-NLP/opus-mt-nl-en", ("en", "ro"): "Helsinki-NLP/opus-mt-en-ro", ("ro", "en"): "Helsinki-NLP/opus-mt-ro-en", ("en", "ar"): "Helsinki-NLP/opus-mt-en-ar", ("ar", "en"): "Helsinki-NLP/opus-mt-ar-en", ("en", "hi"): "Helsinki-NLP/opus-mt-en-hi", ("hi", "en"): "Helsinki-NLP/opus-mt-hi-en", } # ----------------------------- # Session state # ----------------------------- if "input_text" not in st.session_state: st.session_state.input_text = "" if "translated_text" not in st.session_state: st.session_state.translated_text = "" if "model_info" not in st.session_state: st.session_state.model_info = "" # ----------------------------- # Device # ----------------------------- DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # ----------------------------- # Load model + tokenizer # ----------------------------- @st.cache_resource def load_model(model_name: str): tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) model.to(DEVICE) return tokenizer, model # ----------------------------- # Translation function # ----------------------------- def translate_text(text: str, src_lang: str, tgt_lang: str): if src_lang == tgt_lang: return text, "Same language selected" pair = (src_lang, tgt_lang) if pair not in MODEL_MAP: return None, f"No open-source model available for {src_lang} → {tgt_lang}" model_name = MODEL_MAP[pair] try: tokenizer, model = load_model(model_name) inputs = tokenizer( [text], return_tensors="pt", padding=True, truncation=True, max_length=512 ) inputs = {key: value.to(DEVICE) for key, value in inputs.items()} translated_tokens = model.generate( **inputs, max_length=512, num_beams=4, early_stopping=True ) translated_text = tokenizer.decode( translated_tokens[0], skip_special_tokens=True ) return translated_text, model_name except Exception as e: return None, f"Translation failed: {str(e)}" # ----------------------------- # Header # ----------------------------- st.markdown('