STTR commited on
Commit
2ceddcf
ยท
1 Parent(s): 40e1a06

Add beautiful custom theme and CSS

Browse files
Files changed (1) hide show
  1. app.py +130 -59
app.py CHANGED
@@ -20,7 +20,6 @@ print(f"๐Ÿ–ฅ๏ธ Device: {device}")
20
  # Load Models
21
  # ============================================================
22
 
23
- # SeamlessM4T v2 Large for STT
24
  print("๐Ÿ“ฅ Loading SeamlessM4T v2 Large...")
25
  STT_MODEL = "facebook/seamless-m4t-v2-large"
26
  stt_processor = AutoProcessor.from_pretrained(STT_MODEL)
@@ -28,7 +27,6 @@ stt_model = SeamlessM4Tv2ForSpeechToText.from_pretrained(STT_MODEL)
28
  stt_model = stt_model.to(device).eval()
29
  print("โœ… SeamlessM4T v2 Large loaded!")
30
 
31
- # NLLB-200 for Translation
32
  print("๐Ÿ“ฅ Loading NLLB-200...")
33
  NLLB_MODEL = "facebook/nllb-200-distilled-600M"
34
  nllb_tokenizer = AutoTokenizer.from_pretrained(NLLB_MODEL)
@@ -36,8 +34,6 @@ nllb_model = AutoModelForSeq2SeqLM.from_pretrained(NLLB_MODEL)
36
  nllb_model = nllb_model.to(device).eval()
37
  print("โœ… NLLB-200 loaded!")
38
 
39
- print("๐ŸŽ‰ All models ready!")
40
-
41
  # ============================================================
42
  # Language Codes
43
  # ============================================================
@@ -54,9 +50,6 @@ NLLB_LANGS = {
54
  "๐Ÿ‡ฏ๐Ÿ‡ต Japanese": "jpn_Jpan",
55
  "๐Ÿ‡ฐ๐Ÿ‡ท Korean": "kor_Hang",
56
  "๐Ÿ‡ท๐Ÿ‡บ Russian": "rus_Cyrl",
57
- "๐Ÿ‡น๐Ÿ‡ท Turkish": "tur_Latn",
58
- "๐Ÿ‡ณ๐Ÿ‡ฑ Dutch": "nld_Latn",
59
- "๐Ÿ‡ฎ๐Ÿ‡ณ Hindi": "hin_Deva",
60
  }
61
 
62
  STT_LANGS = {
@@ -74,7 +67,6 @@ STT_LANGS = {
74
  "๐Ÿ‡ท๐Ÿ‡บ Russian": "rus",
75
  }
76
 
77
- # Fish Audio API
78
  FISH_AUDIO_API_KEY = os.environ.get('FISH_AUDIO_API_KEY', '')
79
 
80
  # ============================================================
@@ -132,17 +124,21 @@ def translate_audio(audio, source_lang, target_lang, enable_voice_clone):
132
 
133
  translation = nllb_tokenizer.decode(outputs[0], skip_special_tokens=True)
134
 
135
- # 3. TTS with Fish Audio
136
  tts_audio = None
137
  if FISH_AUDIO_API_KEY:
138
  tts_audio = generate_tts(translation, enable_voice_clone, audio if enable_voice_clone else None)
139
 
140
  result_text = f"""
141
- ### ๐ŸŽค {source_lang}
142
- {transcript}
 
 
143
 
144
- ### ๐ŸŒ {target_lang}
145
- {translation}
 
 
146
  """
147
 
148
  return tts_audio, result_text
@@ -159,7 +155,6 @@ def generate_tts(text, clone_voice=False, reference_audio=None):
159
  headers = {'Authorization': f'Bearer {FISH_AUDIO_API_KEY}'}
160
 
161
  if clone_voice and reference_audio:
162
- # Voice cloning
163
  import tempfile
164
  import scipy.io.wavfile as wavfile
165
 
@@ -188,7 +183,6 @@ def generate_tts(text, clone_voice=False, reference_audio=None):
188
 
189
  os.remove(audio_path)
190
  else:
191
- # Standard TTS
192
  payload = {
193
  'text': text,
194
  'format': 'mp3',
@@ -213,54 +207,132 @@ def generate_tts(text, clone_voice=False, reference_audio=None):
213
  return None
214
 
215
  # ============================================================
216
- # Gradio Interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  # ============================================================
218
 
219
- with gr.Blocks(theme=gr.themes.Soft(), title="Instant Translat") as demo:
 
 
 
 
 
 
 
 
 
 
 
220
  gr.Markdown("""
221
- # ๐ŸŒ Instant Translat - AI Voice Translation
222
- **Real-time voice translation powered by Meta AI**
223
 
224
- - ๐ŸŽค **STT**: SeamlessM4T v2 Large (101 languages)
225
- - ๐ŸŒ **Translation**: NLLB-200 (200 languages + Darija)
226
- - ๐Ÿ”Š **TTS**: Fish Audio S1 (Natural voice)
227
- - ๐ŸŽญ **Voice Cloning**: Your voice in any language
228
  """)
229
 
230
  with gr.Row():
231
  with gr.Column(scale=1):
 
 
232
  audio_input = gr.Audio(
233
- label="๐ŸŽค Record Your Voice",
234
  type="numpy",
235
- sources=["microphone"]
236
- )
237
-
238
- source_lang = gr.Dropdown(
239
- choices=list(NLLB_LANGS.keys()),
240
- value="๐Ÿ‡ฒ๐Ÿ‡ฆ Moroccan Arabic (Darija)",
241
- label="๐Ÿ—ฃ๏ธ Source Language"
242
  )
243
 
244
- target_lang = gr.Dropdown(
245
- choices=list(NLLB_LANGS.keys()),
246
- value="๐Ÿ‡ฌ๐Ÿ‡ง English",
247
- label="๐ŸŽฏ Target Language"
248
- )
 
 
 
 
 
 
 
 
 
249
 
250
  voice_clone = gr.Checkbox(
251
- label="๐ŸŽญ Clone Voice (Use your voice for translation)",
252
- value=True
 
253
  )
254
 
255
  translate_btn = gr.Button(
256
- "๐ŸŒ Translate",
257
  variant="primary",
258
- size="lg"
 
259
  )
260
 
261
  with gr.Column(scale=1):
262
- audio_output = gr.Audio(label="๐Ÿ”Š Translation Audio")
263
- text_output = gr.Markdown(label="๐Ÿ“ Translation Text")
 
 
 
 
 
 
264
 
265
  translate_btn.click(
266
  translate_audio,
@@ -269,25 +341,24 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Instant Translat") as demo:
269
  )
270
 
271
  gr.Markdown("""
272
- ## ๐ŸŽฏ How to Use
273
- 1. **Select Languages**: Choose your source and target languages
274
- 2. **Record**: Click the microphone and speak clearly
275
- 3. **Translate**: Click the translate button
276
- 4. **Listen**: Hear the translation in natural voice (or your cloned voice!)
 
 
 
 
 
 
 
277
 
278
- ## ๐ŸŒ Supported Languages
279
- - ๐Ÿ‡ฒ๐Ÿ‡ฆ **Moroccan Darija** (Moroccan Arabic)
280
- - ๐Ÿ‡ธ๐Ÿ‡ฆ Arabic (MSA)
281
- - ๐Ÿ‡ซ๐Ÿ‡ท French
282
- - ๐Ÿ‡ฌ๐Ÿ‡ง English
283
- - ๐Ÿ‡ช๐Ÿ‡ธ Spanish
284
- - ๐Ÿ‡ฉ๐Ÿ‡ช German
285
- - And 190+ more languages!
286
 
287
- ## ๐Ÿ”’ Privacy
288
- - No data is stored
289
- - Real-time processing
290
- - Secure API calls
291
  """)
292
 
293
  if __name__ == "__main__":
 
20
  # Load Models
21
  # ============================================================
22
 
 
23
  print("๐Ÿ“ฅ Loading SeamlessM4T v2 Large...")
24
  STT_MODEL = "facebook/seamless-m4t-v2-large"
25
  stt_processor = AutoProcessor.from_pretrained(STT_MODEL)
 
27
  stt_model = stt_model.to(device).eval()
28
  print("โœ… SeamlessM4T v2 Large loaded!")
29
 
 
30
  print("๐Ÿ“ฅ Loading NLLB-200...")
31
  NLLB_MODEL = "facebook/nllb-200-distilled-600M"
32
  nllb_tokenizer = AutoTokenizer.from_pretrained(NLLB_MODEL)
 
34
  nllb_model = nllb_model.to(device).eval()
35
  print("โœ… NLLB-200 loaded!")
36
 
 
 
37
  # ============================================================
38
  # Language Codes
39
  # ============================================================
 
50
  "๐Ÿ‡ฏ๐Ÿ‡ต Japanese": "jpn_Jpan",
51
  "๐Ÿ‡ฐ๐Ÿ‡ท Korean": "kor_Hang",
52
  "๐Ÿ‡ท๐Ÿ‡บ Russian": "rus_Cyrl",
 
 
 
53
  }
54
 
55
  STT_LANGS = {
 
67
  "๐Ÿ‡ท๐Ÿ‡บ Russian": "rus",
68
  }
69
 
 
70
  FISH_AUDIO_API_KEY = os.environ.get('FISH_AUDIO_API_KEY', '')
71
 
72
  # ============================================================
 
124
 
125
  translation = nllb_tokenizer.decode(outputs[0], skip_special_tokens=True)
126
 
127
+ # 3. TTS
128
  tts_audio = None
129
  if FISH_AUDIO_API_KEY:
130
  tts_audio = generate_tts(translation, enable_voice_clone, audio if enable_voice_clone else None)
131
 
132
  result_text = f"""
133
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 12px; margin: 10px 0;">
134
+ <h3 style="color: white; margin: 0 0 10px 0;">๐ŸŽค {source_lang}</h3>
135
+ <p style="color: white; font-size: 1.1em; margin: 0;">{transcript}</p>
136
+ </div>
137
 
138
+ <div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 20px; border-radius: 12px; margin: 10px 0;">
139
+ <h3 style="color: white; margin: 0 0 10px 0;">๐ŸŒ {target_lang}</h3>
140
+ <p style="color: white; font-size: 1.1em; margin: 0;">{translation}</p>
141
+ </div>
142
  """
143
 
144
  return tts_audio, result_text
 
155
  headers = {'Authorization': f'Bearer {FISH_AUDIO_API_KEY}'}
156
 
157
  if clone_voice and reference_audio:
 
158
  import tempfile
159
  import scipy.io.wavfile as wavfile
160
 
 
183
 
184
  os.remove(audio_path)
185
  else:
 
186
  payload = {
187
  'text': text,
188
  'format': 'mp3',
 
207
  return None
208
 
209
  # ============================================================
210
+ # Custom CSS
211
+ # ============================================================
212
+
213
+ custom_css = """
214
+ /* Modern Gradient Background */
215
+ .gradio-container {
216
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
217
+ font-family: 'Inter', sans-serif;
218
+ }
219
+
220
+ /* Card Style */
221
+ .contain {
222
+ background: rgba(255, 255, 255, 0.95) !important;
223
+ border-radius: 20px !important;
224
+ box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3) !important;
225
+ padding: 30px !important;
226
+ backdrop-filter: blur(10px) !important;
227
+ }
228
+
229
+ /* Buttons */
230
+ .primary {
231
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
232
+ border: none !important;
233
+ border-radius: 12px !important;
234
+ padding: 15px 30px !important;
235
+ font-weight: 600 !important;
236
+ font-size: 1.1em !important;
237
+ transition: all 0.3s ease !important;
238
+ }
239
+
240
+ .primary:hover {
241
+ transform: translateY(-2px) !important;
242
+ box-shadow: 0 10px 25px rgba(102, 126, 234, 0.4) !important;
243
+ }
244
+
245
+ /* Input Fields */
246
+ .input-audio, .dropdown {
247
+ border-radius: 12px !important;
248
+ border: 2px solid #e0e0e0 !important;
249
+ }
250
+
251
+ /* Headers */
252
+ h1, h2, h3 {
253
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
254
+ -webkit-background-clip: text;
255
+ -webkit-text-fill-color: transparent;
256
+ font-weight: 700;
257
+ }
258
+
259
+ /* Markdown Content */
260
+ .markdown-text {
261
+ line-height: 1.8;
262
+ }
263
+ """
264
+
265
+ # ============================================================
266
+ # Gradio Interface with Custom Theme
267
  # ============================================================
268
 
269
+ theme = gr.themes.Soft(
270
+ primary_hue="purple",
271
+ secondary_hue="pink",
272
+ neutral_hue="slate",
273
+ font=gr.themes.GoogleFont("Inter"),
274
+ ).set(
275
+ button_primary_background_fill="*primary_500",
276
+ button_primary_background_fill_hover="*primary_600",
277
+ button_primary_text_color="white",
278
+ )
279
+
280
+ with gr.Blocks(theme=theme, css=custom_css, title="Instant Translat") as demo:
281
  gr.Markdown("""
282
+ # ๐ŸŒ Instant Translat
283
+ ### AI-Powered Voice Translation in 200+ Languages
284
 
285
+ Translate your voice instantly with cutting-edge AI. Supports Moroccan Darija and 200+ languages!
 
 
 
286
  """)
287
 
288
  with gr.Row():
289
  with gr.Column(scale=1):
290
+ gr.Markdown("### ๐ŸŽค Input")
291
+
292
  audio_input = gr.Audio(
293
+ label="Record Your Voice",
294
  type="numpy",
295
+ sources=["microphone"],
296
+ elem_classes="input-audio"
 
 
 
 
 
297
  )
298
 
299
+ with gr.Row():
300
+ source_lang = gr.Dropdown(
301
+ choices=list(NLLB_LANGS.keys()),
302
+ value="๐Ÿ‡ฒ๐Ÿ‡ฆ Moroccan Arabic (Darija)",
303
+ label="๐Ÿ—ฃ๏ธ From",
304
+ elem_classes="dropdown"
305
+ )
306
+
307
+ target_lang = gr.Dropdown(
308
+ choices=list(NLLB_LANGS.keys()),
309
+ value="๐Ÿ‡ฌ๐Ÿ‡ง English",
310
+ label="๐ŸŽฏ To",
311
+ elem_classes="dropdown"
312
+ )
313
 
314
  voice_clone = gr.Checkbox(
315
+ label="๐ŸŽญ Clone My Voice",
316
+ value=True,
317
+ info="Hear translation in your own voice"
318
  )
319
 
320
  translate_btn = gr.Button(
321
+ "๐ŸŒ Translate Now",
322
  variant="primary",
323
+ size="lg",
324
+ elem_classes="primary"
325
  )
326
 
327
  with gr.Column(scale=1):
328
+ gr.Markdown("### ๐Ÿ”Š Output")
329
+
330
+ audio_output = gr.Audio(
331
+ label="Translation Audio",
332
+ type="filepath"
333
+ )
334
+
335
+ text_output = gr.HTML(label="Translation Text")
336
 
337
  translate_btn.click(
338
  translate_audio,
 
341
  )
342
 
343
  gr.Markdown("""
344
+ ---
345
+
346
+ ## โœจ Features
347
+
348
+ - ๐ŸŽค **Speech Recognition** - Powered by Meta's SeamlessM4T v2 Large
349
+ - ๐ŸŒ **Translation** - 200+ languages with NLLB-200
350
+ - ๐Ÿ”Š **Natural Voice** - Fish Audio S1 TTS
351
+ - ๐ŸŽญ **Voice Cloning** - Hear translation in your voice
352
+
353
+ ## ๐ŸŒ Popular Languages
354
+
355
+ ๐Ÿ‡ฒ๐Ÿ‡ฆ Moroccan Darija โ€ข ๐Ÿ‡ธ๐Ÿ‡ฆ Arabic โ€ข ๐Ÿ‡ซ๐Ÿ‡ท French โ€ข ๐Ÿ‡ฌ๐Ÿ‡ง English โ€ข ๐Ÿ‡ช๐Ÿ‡ธ Spanish โ€ข ๐Ÿ‡ฉ๐Ÿ‡ช German โ€ข ๐Ÿ‡ฎ๐Ÿ‡น Italian โ€ข ๐Ÿ‡ต๐Ÿ‡น Portuguese โ€ข ๐Ÿ‡จ๐Ÿ‡ณ Chinese โ€ข ๐Ÿ‡ฏ๐Ÿ‡ต Japanese โ€ข ๐Ÿ‡ฐ๐Ÿ‡ท Korean โ€ข ๐Ÿ‡ท๐Ÿ‡บ Russian
356
 
357
+ ---
 
 
 
 
 
 
 
358
 
359
+ <div style="text-align: center; padding: 20px;">
360
+ <p style="color: #666;">Made with โค๏ธ using Meta AI โ€ข Powered by HuggingFace</p>
361
+ </div>
 
362
  """)
363
 
364
  if __name__ == "__main__":