San-NLP commited on
Commit
05373f8
·
verified ·
1 Parent(s): a514558

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +272 -0
app.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import MarianMTModel, MarianTokenizer
4
+
5
+ st.set_page_config(
6
+ page_title="Language Translation App",
7
+ page_icon="🌍",
8
+ layout="wide"
9
+ )
10
+
11
+ # -----------------------------
12
+ # Custom CSS
13
+ # -----------------------------
14
+ st.markdown("""
15
+ <style>
16
+ .block-container {
17
+ padding-top: 1.5rem;
18
+ padding-bottom: 2rem;
19
+ max-width: 1100px;
20
+ }
21
+
22
+ .app-title {
23
+ font-size: 3.2rem;
24
+ font-weight: 800;
25
+ color: #2d2d3a;
26
+ margin-bottom: 0.25rem;
27
+ }
28
+
29
+ .app-subtitle {
30
+ font-size: 1.2rem;
31
+ color: #555;
32
+ margin-bottom: 1.8rem;
33
+ }
34
+
35
+ .stSelectbox label, .stTextArea label {
36
+ font-size: 1.05rem !important;
37
+ font-weight: 600 !important;
38
+ }
39
+
40
+ .stTextArea textarea {
41
+ font-size: 1.15rem !important;
42
+ border-radius: 12px !important;
43
+ }
44
+
45
+ .stButton > button {
46
+ min-width: 140px;
47
+ height: 48px;
48
+ font-size: 1rem;
49
+ font-weight: 600;
50
+ border-radius: 10px;
51
+ }
52
+
53
+ .result-label {
54
+ font-size: 1.05rem;
55
+ font-weight: 600;
56
+ margin-top: 1rem;
57
+ margin-bottom: 0.5rem;
58
+ }
59
+ </style>
60
+ """, unsafe_allow_html=True)
61
+
62
+ # -----------------------------
63
+ # Supported languages
64
+ # -----------------------------
65
+ LANGUAGES = {
66
+ "English": "en",
67
+ "French": "fr",
68
+ "German": "de",
69
+ "Spanish": "es",
70
+ "Italian": "it",
71
+ "Portuguese": "pt",
72
+ "Dutch": "nl",
73
+ "Romanian": "ro",
74
+ "Arabic": "ar",
75
+ "Hindi": "hi",
76
+ }
77
+
78
+ # -----------------------------
79
+ # Helsinki-NLP OPUS-MT models
80
+ # -----------------------------
81
+ MODEL_MAP = {
82
+ ("en", "fr"): "Helsinki-NLP/opus-mt-en-fr",
83
+ ("fr", "en"): "Helsinki-NLP/opus-mt-fr-en",
84
+
85
+ ("en", "de"): "Helsinki-NLP/opus-mt-en-de",
86
+ ("de", "en"): "Helsinki-NLP/opus-mt-de-en",
87
+
88
+ ("en", "es"): "Helsinki-NLP/opus-mt-en-es",
89
+ ("es", "en"): "Helsinki-NLP/opus-mt-es-en",
90
+
91
+ ("en", "it"): "Helsinki-NLP/opus-mt-en-it",
92
+ ("it", "en"): "Helsinki-NLP/opus-mt-it-en",
93
+
94
+ ("en", "pt"): "Helsinki-NLP/opus-mt-en-pt",
95
+ ("pt", "en"): "Helsinki-NLP/opus-mt-pt-en",
96
+
97
+ ("en", "nl"): "Helsinki-NLP/opus-mt-en-nl",
98
+ ("nl", "en"): "Helsinki-NLP/opus-mt-nl-en",
99
+
100
+ ("en", "ro"): "Helsinki-NLP/opus-mt-en-ro",
101
+ ("ro", "en"): "Helsinki-NLP/opus-mt-ro-en",
102
+
103
+ ("en", "ar"): "Helsinki-NLP/opus-mt-en-ar",
104
+ ("ar", "en"): "Helsinki-NLP/opus-mt-ar-en",
105
+
106
+ ("en", "hi"): "Helsinki-NLP/opus-mt-en-hi",
107
+ ("hi", "en"): "Helsinki-NLP/opus-mt-hi-en",
108
+ }
109
+
110
+ # -----------------------------
111
+ # Session state
112
+ # -----------------------------
113
+ if "input_text" not in st.session_state:
114
+ st.session_state.input_text = ""
115
+
116
+ if "translated_text" not in st.session_state:
117
+ st.session_state.translated_text = ""
118
+
119
+ if "model_info" not in st.session_state:
120
+ st.session_state.model_info = ""
121
+
122
+ # -----------------------------
123
+ # Device
124
+ # -----------------------------
125
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
126
+
127
+ # -----------------------------
128
+ # Load model + tokenizer
129
+ # -----------------------------
130
+ @st.cache_resource
131
+ def load_model(model_name: str):
132
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
133
+ model = MarianMTModel.from_pretrained(model_name)
134
+ model.to(DEVICE)
135
+ return tokenizer, model
136
+
137
+ # -----------------------------
138
+ # Translation function
139
+ # -----------------------------
140
+ def translate_text(text: str, src_lang: str, tgt_lang: str):
141
+ if src_lang == tgt_lang:
142
+ return text, "Same language selected"
143
+
144
+ pair = (src_lang, tgt_lang)
145
+
146
+ if pair not in MODEL_MAP:
147
+ return None, f"No open-source model available for {src_lang} → {tgt_lang}"
148
+
149
+ model_name = MODEL_MAP[pair]
150
+
151
+ try:
152
+ tokenizer, model = load_model(model_name)
153
+
154
+ inputs = tokenizer(
155
+ [text],
156
+ return_tensors="pt",
157
+ padding=True,
158
+ truncation=True,
159
+ max_length=512
160
+ )
161
+
162
+ inputs = {key: value.to(DEVICE) for key, value in inputs.items()}
163
+
164
+ translated_tokens = model.generate(
165
+ **inputs,
166
+ max_length=512,
167
+ num_beams=4,
168
+ early_stopping=True
169
+ )
170
+
171
+ translated_text = tokenizer.decode(
172
+ translated_tokens[0],
173
+ skip_special_tokens=True
174
+ )
175
+
176
+ return translated_text, model_name
177
+
178
+ except Exception as e:
179
+ return None, f"Translation failed: {str(e)}"
180
+
181
+ # -----------------------------
182
+ # Header
183
+ # -----------------------------
184
+ st.markdown('<div class="app-title">Language Translation App 🌍</div>', unsafe_allow_html=True)
185
+ st.markdown(
186
+ '<div class="app-subtitle">Translate text between multiple languages using open-source models.</div>',
187
+ unsafe_allow_html=True
188
+ )
189
+
190
+ # -----------------------------
191
+ # Language selection
192
+ # -----------------------------
193
+ col1, col2 = st.columns(2)
194
+
195
+ with col1:
196
+ source_language_name = st.selectbox(
197
+ "Select Source Language",
198
+ list(LANGUAGES.keys()),
199
+ index=0
200
+ )
201
+
202
+ with col2:
203
+ target_language_name = st.selectbox(
204
+ "Select Target Language",
205
+ list(LANGUAGES.keys()),
206
+ index=2
207
+ )
208
+
209
+ source_language = LANGUAGES[source_language_name]
210
+ target_language = LANGUAGES[target_language_name]
211
+
212
+ # -----------------------------
213
+ # Input area
214
+ # -----------------------------
215
+ input_text = st.text_area(
216
+ "Enter Text to Translate",
217
+ value=st.session_state.input_text,
218
+ height=220,
219
+ placeholder="Type or paste your text here..."
220
+ )
221
+
222
+ st.session_state.input_text = input_text
223
+
224
+ # -----------------------------
225
+ # Buttons
226
+ # -----------------------------
227
+ b1, b2 = st.columns([1, 1])
228
+
229
+ with b1:
230
+ translate_button = st.button("Translate")
231
+
232
+ with b2:
233
+ clear_button = st.button("Clear")
234
+
235
+ if clear_button:
236
+ st.session_state.input_text = ""
237
+ st.session_state.translated_text = ""
238
+ st.session_state.model_info = ""
239
+ st.rerun()
240
+
241
+ # -----------------------------
242
+ # Translate action
243
+ # -----------------------------
244
+ if translate_button:
245
+ if not input_text.strip():
246
+ st.warning("Please enter some text to translate.")
247
+ else:
248
+ with st.spinner("Translating..."):
249
+ translated_text, info = translate_text(
250
+ input_text.strip(),
251
+ source_language,
252
+ target_language
253
+ )
254
+
255
+ if translated_text is None:
256
+ st.error(info)
257
+ else:
258
+ st.session_state.translated_text = translated_text
259
+ st.session_state.model_info = info
260
+ st.success("Translation completed successfully.")
261
+
262
+ # -----------------------------
263
+ # Output area
264
+ # -----------------------------
265
+ if st.session_state.translated_text:
266
+ st.markdown('<div class="result-label">Translated Text</div>', unsafe_allow_html=True)
267
+ st.text_area(
268
+ "",
269
+ value=st.session_state.translated_text,
270
+ height=220
271
+ )
272
+ st.caption(f"Model used: {st.session_state.model_info}")