Minte commited on
Commit
755fa07
Β·
1 Parent(s): 943a8da
Files changed (2) hide show
  1. app.py +57 -6
  2. requirements.txt +2 -1
app.py CHANGED
@@ -10,6 +10,15 @@ from datetime import datetime
10
  import os
11
  import tempfile
12
 
 
 
 
 
 
 
 
 
 
13
  # Model configuration for each language
14
  MODELS = {
15
  "Amharic": "facebook/mms-tts-amh",
@@ -53,14 +62,34 @@ class MMS_TTS_Service:
53
  print(f"❌ Error loading model for {language}: {e}")
54
  raise e
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  def generate_speech(self, text, language, speed=1.0):
57
  """Generate speech from text for specified language"""
58
  try:
59
  # Load model if not already loaded
60
  model, tokenizer = self.load_model(language)
61
 
 
 
 
62
  # Tokenize input text
63
- inputs = tokenizer(text, return_tensors="pt")
64
  input_ids = inputs["input_ids"].to(self.device)
65
 
66
  # Generate speech with torch.no_grad for efficiency
@@ -125,11 +154,11 @@ def text_to_speech(text, language, speed=1.0):
125
  def create_demo_audio(language):
126
  """Create demo text for each language"""
127
  demo_texts = {
128
- "Amharic": "αˆ°αˆ‹αˆα£ α‹­αˆ… α‹¨α‹΅αˆα… αˆ›αˆ˜αŠ•αŒ« αˆžα‹΄αˆ αŠα‹α’",
129
  "Somali": "Salaam, kani waa modelka cod-sameynta.",
130
  "Swahili": "Halo, hii ni modeli ya kutengeneza sauti.",
131
  "Afan Oromo": "Akkam, kun modeli sagalee uumuudha.",
132
- "Tigrinya": "αˆ°αˆ‹αˆα£ αŠ₯α‹š α‹΅αˆαŒΊ α‹αŒˆα‰₯ር αˆžα‹΄αˆ αŠ₯ዩፒ",
133
  "Chichewa": "Moni, iyi ndi modeli yopanga mawu."
134
  }
135
 
@@ -141,6 +170,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
141
  """
142
  # πŸŽ™οΈ MMS Text-to-Speech for African Languages
143
  Convert text to natural speech in multiple African languages using Facebook's MMS-TTS models.
 
 
144
  """
145
  )
146
 
@@ -286,14 +317,14 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
286
  outputs=[audio_output, batch_status, batch_results]
287
  )
288
 
289
- # Examples
290
  gr.Markdown("### πŸ’‘ Example Texts")
291
  examples = [
292
- ["Amharic", "αˆαˆ‰αˆ αˆ°α‹ α‰ αˆαˆ‰αˆ መα‰₯ቢች αŠ₯ኩል αŠα‹α’"],
 
293
  ["Somali", "Qof walba wuxuu leeyahay xuquuqda aadamaha."],
294
  ["Swahili", "Kila mtu ana haki zote za binadamu."],
295
  ["Afan Oromo", "Nama hundi mirga ummataa hundaa waliin dhalate."],
296
- ["Tigrinya", "αŠ©αˆ‰ ሰα‰₯ αŠ•αŠ©αˆ‰ αˆ˜αˆ°αˆ‹α‰΅ αŠ₯ኩል αŠ₯ዩፒ"],
297
  ["Chichewa", "Alipo wina aliyense ali ndi ufulu wachibadwidwe."]
298
  ]
299
 
@@ -305,6 +336,24 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
305
  cache_examples=False
306
  )
307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  # Footer
309
  gr.Markdown(
310
  """
@@ -312,6 +361,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
312
  ### ℹ️ About
313
  **Powered by:** Facebook MMS-TTS Models
314
  **Supported Languages:** Amharic, Somali, Swahili, Afan Oromo, Tigrinya, Chichewa
 
315
  **Model Type:** Text-to-Speech
316
  **Max Text Length:** 500 characters (single), 200 characters (batch)
317
 
@@ -323,6 +373,7 @@ if __name__ == "__main__":
323
  # Pre-load a model to reduce first-time latency
324
  print("πŸš€ Starting MMS Text-to-Speech Service...")
325
  print("πŸ“‹ Supported Languages:", list(MODELS.keys()))
 
326
 
327
  # Pre-load Amharic model for faster first response
328
  try:
 
10
  import os
11
  import tempfile
12
 
13
+ # Install uroman if not available
14
+ try:
15
+ from uroman import uroman
16
+ except ImportError:
17
+ import subprocess
18
+ import sys
19
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "uroman"])
20
+ from uroman import uroman
21
+
22
  # Model configuration for each language
23
  MODELS = {
24
  "Amharic": "facebook/mms-tts-amh",
 
62
  print(f"❌ Error loading model for {language}: {e}")
63
  raise e
64
 
65
+ def preprocess_text(self, text, language):
66
+ """Preprocess text with romanization for Amharic and Tigrinya"""
67
+ if language in ["Amharic", "Tigrinya"]:
68
+ print(f"Romanizing {language} text...")
69
+ try:
70
+ # Romanize the text for Amharic and Tigrinya models
71
+ romanized_text = uroman(text)
72
+ print(f"Original: {text}")
73
+ print(f"Romanized: {romanized_text}")
74
+ return romanized_text
75
+ except Exception as e:
76
+ print(f"Romanization failed, using original text: {e}")
77
+ return text
78
+ else:
79
+ # For other languages, use text as is
80
+ return text
81
+
82
  def generate_speech(self, text, language, speed=1.0):
83
  """Generate speech from text for specified language"""
84
  try:
85
  # Load model if not already loaded
86
  model, tokenizer = self.load_model(language)
87
 
88
+ # Preprocess text (romanize for Amharic and Tigrinya)
89
+ processed_text = self.preprocess_text(text, language)
90
+
91
  # Tokenize input text
92
+ inputs = tokenizer(processed_text, return_tensors="pt")
93
  input_ids = inputs["input_ids"].to(self.device)
94
 
95
  # Generate speech with torch.no_grad for efficiency
 
154
  def create_demo_audio(language):
155
  """Create demo text for each language"""
156
  demo_texts = {
157
+ "Amharic": "αˆ°αˆ‹αˆα£ α‹­αˆ… α‹¨α‹΅αˆα… αˆ›αˆ˜αŠ•αŒ« αˆžα‹΄αˆ αŠα‹α’ αŠ αˆ˜αˆ°αŒαŠ“αˆˆαˆ!",
158
  "Somali": "Salaam, kani waa modelka cod-sameynta.",
159
  "Swahili": "Halo, hii ni modeli ya kutengeneza sauti.",
160
  "Afan Oromo": "Akkam, kun modeli sagalee uumuudha.",
161
+ "Tigrinya": "αˆ°αˆ‹αˆα£ αŠ₯α‹š α‹΅αˆαŒΊ α‹αŒˆα‰₯ር αˆžα‹΄αˆ αŠ₯ዩፒ α‹¨α‰αŠ•α‹¨αˆˆα‹­!",
162
  "Chichewa": "Moni, iyi ndi modeli yopanga mawu."
163
  }
164
 
 
170
  """
171
  # πŸŽ™οΈ MMS Text-to-Speech for African Languages
172
  Convert text to natural speech in multiple African languages using Facebook's MMS-TTS models.
173
+
174
+ **Special Features for Amharic & Tigrinya:** Automatic romanization for better pronunciation
175
  """
176
  )
177
 
 
317
  outputs=[audio_output, batch_status, batch_results]
318
  )
319
 
320
+ # Examples with better Amharic and Tigrinya samples
321
  gr.Markdown("### πŸ’‘ Example Texts")
322
  examples = [
323
+ ["Amharic", "αˆαˆ‰αˆ αˆ°α‹ α‰ αˆαˆ‰αˆ መα‰₯ቢች αŠ₯ኩል αŠα‹α’ αŠ αˆ˜αˆ°αŒαŠ“αˆˆαˆ!"],
324
+ ["Tigrinya", "αŠ©αˆ‰ ሰα‰₯ αŠ•αŠ©αˆ‰ αˆ˜αˆ°αˆ‹α‰΅ αŠ₯ኩል αŠ₯ዩፒ α‹¨α‰αŠ•α‹¨αˆˆα‹­!"],
325
  ["Somali", "Qof walba wuxuu leeyahay xuquuqda aadamaha."],
326
  ["Swahili", "Kila mtu ana haki zote za binadamu."],
327
  ["Afan Oromo", "Nama hundi mirga ummataa hundaa waliin dhalate."],
 
328
  ["Chichewa", "Alipo wina aliyense ali ndi ufulu wachibadwidwe."]
329
  ]
330
 
 
336
  cache_examples=False
337
  )
338
 
339
+ # Language-specific information
340
+ with gr.Accordion("ℹ️ Language-Specific Information", open=False):
341
+ gr.Markdown("""
342
+ ### Amharic & Tigrinya Support
343
+ - **Automatic Romanization**: Text is automatically converted to Latin script for better pronunciation
344
+ - **Native Script Support**: Works with Ge'ez script (αŠα‹°αˆ) characters
345
+ - **Enhanced Accuracy**: Romanization improves model performance for these languages
346
+
347
+ ### Other Languages
348
+ - **Somali, Swahili, Afan Oromo**: Direct text processing
349
+ - **Chichewa**: Uses Swahili model as fallback
350
+
351
+ ### Technical Details
352
+ - Uses Facebook's MMS-TTS models
353
+ - Automatic uroman romanization for Amharic and Tigrinya
354
+ - GPU acceleration when available
355
+ """)
356
+
357
  # Footer
358
  gr.Markdown(
359
  """
 
361
  ### ℹ️ About
362
  **Powered by:** Facebook MMS-TTS Models
363
  **Supported Languages:** Amharic, Somali, Swahili, Afan Oromo, Tigrinya, Chichewa
364
+ **Special Features:** Automatic romanization for Amharic & Tigrinya
365
  **Model Type:** Text-to-Speech
366
  **Max Text Length:** 500 characters (single), 200 characters (batch)
367
 
 
373
  # Pre-load a model to reduce first-time latency
374
  print("πŸš€ Starting MMS Text-to-Speech Service...")
375
  print("πŸ“‹ Supported Languages:", list(MODELS.keys()))
376
+ print("🌟 Special Romanization for: Amharic, Tigrinya")
377
 
378
  # Pre-load Amharic model for faster first response
379
  try:
requirements.txt CHANGED
@@ -4,4 +4,5 @@ torchaudio>=2.0.0
4
  transformers>=4.30.0
5
  gradio>=4.0.0
6
  numpy>=1.21.0
7
- soundfile>=0.12.0
 
 
4
  transformers>=4.30.0
5
  gradio>=4.0.0
6
  numpy>=1.21.0
7
+ soundfile>=0.12.0
8
+ uroman>=1.0.0