File size: 4,580 Bytes
ddbcea8
e8141e8
 
b9a0eb8
f263b5e
 
 
 
 
 
 
 
 
 
 
b9a0eb8
 
db6c801
 
 
 
 
 
b9a0eb8
e8141e8
 
 
 
 
b9a0eb8
e8141e8
b9a0eb8
e36d297
db6c801
e36d297
e8141e8
db6c801
e8141e8
 
 
db6c801
 
 
 
 
 
 
 
 
 
e8141e8
 
db6c801
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8141e8
 
db6c801
 
 
e8141e8
 
 
 
db6c801
 
 
 
 
 
 
 
e8141e8
 
db6c801
 
 
 
 
 
 
 
 
 
 
 
 
e8141e8
b9a0eb8
db6c801
 
 
 
 
 
 
 
 
b9a0eb8
6d492af
b9a0eb8
 
c4e12b6
db6c801
 
e8141e8
db6c801
e8141e8
b9a0eb8
e8141e8
b9a0eb8
e8141e8
e36d297
e8141e8
 
 
db6c801
 
 
e8141e8
b9a0eb8
db6c801
 
 
 
b9a0eb8
e8141e8
b9a0eb8
 
 
 
8de26ca
db6c801
 
c4e12b6
db6c801
 
b9a0eb8
db6c801
 
b9a0eb8
 
c4e12b6
db6c801
b9a0eb8
 
e8141e8
b9a0eb8
 
 
 
db6c801
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langdetect import detect
# ==============================
# πŸ” HIDE STREAMLIT MENU (SOLUTION 2)
# ==============================
st.markdown("""
<style>
#MainMenu {visibility: hidden;}
header {visibility: hidden;}
footer {visibility: hidden;}
.stDeployButton {display:none;}
</style>
""", unsafe_allow_html=True)

# ==============================
# PAGE CONFIG
# ==============================
st.set_page_config(page_title="🌍 Multilingual Translator", layout="wide")

# ==============================
# LOAD MODEL (CACHED)
# ==============================
@st.cache_resource
def load_model():
    tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
    model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
    return tokenizer, model

tokenizer, model = load_model()

# ==============================
# LANGUAGE MAP (50+ LANGUAGES)
# ==============================
lang_map = {
    # Indian Languages
    "English": "eng_Latn",
    "Tamil": "tam_Taml",
    "Hindi": "hin_Deva",
    "Telugu": "tel_Telu",
    "Kannada": "kan_Knda",
    "Malayalam": "mal_Mlym",
    "Marathi": "mar_Deva",
    "Bengali": "ben_Beng",
    "Gujarati": "guj_Gujr",
    "Punjabi": "pan_Guru",
    "Urdu": "urd_Arab",

    # European
    "French": "fra_Latn",
    "Spanish": "spa_Latn",
    "German": "deu_Latn",
    "Italian": "ita_Latn",
    "Portuguese": "por_Latn",
    "Dutch": "nld_Latn",
    "Romanian": "ron_Latn",
    "Czech": "ces_Latn",
    "Polish": "pol_Latn",
    "Hungarian": "hun_Latn",
    "Finnish": "fin_Latn",
    "Swedish": "swe_Latn",
    "Norwegian": "nob_Latn",
    "Danish": "dan_Latn",

    # Middle East
    "Arabic": "arb_Arab",
    "Persian": "pes_Arab",
    "Hebrew": "heb_Hebr",
    "Turkish": "tur_Latn",

    # Asian
    "Chinese (Simplified)": "zho_Hans",
    "Chinese (Traditional)": "zho_Hant",
    "Japanese": "jpn_Jpan",
    "Korean": "kor_Hang",
    "Thai": "tha_Thai",
    "Vietnamese": "vie_Latn",
    "Indonesian": "ind_Latn",

    # Others
    "Russian": "rus_Cyrl",
    "Ukrainian": "ukr_Cyrl",
    "Bulgarian": "bul_Cyrl",
    "Greek": "ell_Grek",
    "Swahili": "swh_Latn",
    "Amharic": "amh_Ethi"
}

# ==============================
# DETECT LANGUAGE
# ==============================
detect_map = {
    "en": "eng_Latn",
    "ta": "tam_Taml",
    "hi": "hin_Deva",
    "te": "tel_Telu",
    "kn": "kan_Knda",
    "ml": "mal_Mlym",
    "mr": "mar_Deva",
    "bn": "ben_Beng",
    "gu": "guj_Gujr",
    "pa": "pan_Guru",
    "ur": "urd_Arab",
    "fr": "fra_Latn",
    "es": "spa_Latn",
    "de": "deu_Latn",
    "it": "ita_Latn",
    "pt": "por_Latn",
    "nl": "nld_Latn",
    "ro": "ron_Latn",
    "ru": "rus_Cyrl",
    "zh-cn": "zho_Hans",
    "zh-tw": "zho_Hant",
    "ja": "jpn_Jpan",
    "ko": "kor_Hang",
    "ar": "arb_Arab",
    "tr": "tur_Latn",
    "vi": "vie_Latn"
}

def detect_language(text):
    if len(text.strip()) < 3:
        return "eng_Latn"
    try:
        lang = detect(text)
    except:
        lang = "en"
    return detect_map.get(lang, "eng_Latn")

# ==============================
# TRANSLATION FUNCTION
# ==============================
def translate(text, target_lang):

    if len(text.strip()) < 2:
        return "⚠️ Please enter valid text.", "N/A"

    src_lang = detect_language(text)
    tgt_lang = lang_map[target_lang]

    tokenizer.src_lang = src_lang

    encoded = tokenizer(text, return_tensors="pt")

    generated_tokens = model.generate(
        **encoded,
        forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
        max_length=200,
        num_beams=4,   # improves accuracy
        early_stopping=True
    )

    translated = tokenizer.batch_decode(
        generated_tokens,
        skip_special_tokens=True
    )[0]

    return translated, src_lang

# ==============================
# UI
# ==============================
st.title("🌍 40+ AI Powered-Multilingual Translators App")

col1, col2 = st.columns(2)

with col1:
    input_text = st.text_area("Enter Text", height=200)

with col2:
    target_lang = st.selectbox("Select Target Language", list(lang_map.keys()))

if st.button("Translate"):
    if not input_text.strip():
        st.warning("Please enter text")
    else:
        with st.spinner("Translating..."):
            output, src_lang = translate(input_text, target_lang)

            st.success("βœ… Translation")
            st.write(output)

            st.info(f"Detected Language Code: {src_lang}")