Spaces:

minte-atnafu
/

GihonTech_Local_Language_TTS

Sleeping

App Files Files Community

Minte commited on Oct 8, 2025

Commit

755fa07

1 Parent(s): 943a8da

tts space

Browse files

Files changed (2) hide show

app.py +57 -6
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -10,6 +10,15 @@ from datetime import datetime
 import os
 import tempfile
 # Model configuration for each language
 MODELS = {
     "Amharic": "facebook/mms-tts-amh",
@@ -53,14 +62,34 @@ class MMS_TTS_Service:
             print(f"❌ Error loading model for {language}: {e}")
             raise e
     def generate_speech(self, text, language, speed=1.0):
         """Generate speech from text for specified language"""
         try:
             # Load model if not already loaded
             model, tokenizer = self.load_model(language)
             # Tokenize input text
-            inputs = tokenizer(text, return_tensors="pt")
             input_ids = inputs["input_ids"].to(self.device)
             # Generate speech with torch.no_grad for efficiency
@@ -125,11 +154,11 @@ def text_to_speech(text, language, speed=1.0):
 def create_demo_audio(language):
     """Create demo text for each language"""
     demo_texts = {
-        "Amharic": "ሰላም፣ ይህ የድምፅ ማመንጫ ሞዴል ነው።",
         "Somali": "Salaam, kani waa modelka cod-sameynta.",
         "Swahili": "Halo, hii ni modeli ya kutengeneza sauti.",
         "Afan Oromo": "Akkam, kun modeli sagalee uumuudha.",
-        "Tigrinya": "ሰላም፣ እዚ ድምጺ ዝገብር ሞዴል እዩ።",
         "Chichewa": "Moni, iyi ndi modeli yopanga mawu."
     }
@@ -141,6 +170,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
         """
         # 🎙️ MMS Text-to-Speech for African Languages
         Convert text to natural speech in multiple African languages using Facebook's MMS-TTS models.
         """
     )
@@ -286,14 +317,14 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
         outputs=[audio_output, batch_status, batch_results]
     )
-    # Examples
     gr.Markdown("### 💡 Example Texts")
     examples = [
-        ["Amharic", "ሁሉም ሰው በሁሉም መብቶች እኩል ነው።"],
         ["Somali", "Qof walba wuxuu leeyahay xuquuqda aadamaha."],
         ["Swahili", "Kila mtu ana haki zote za binadamu."],
         ["Afan Oromo", "Nama hundi mirga ummataa hundaa waliin dhalate."],
-        ["Tigrinya", "ኩሉ ሰብ ንኩሉ መሰላት እኩል እዩ።"],
         ["Chichewa", "Alipo wina aliyense ali ndi ufulu wachibadwidwe."]
     ]
@@ -305,6 +336,24 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
         cache_examples=False
     )
     # Footer
     gr.Markdown(
         """
@@ -312,6 +361,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
         ### ℹ️ About
         **Powered by:** Facebook MMS-TTS Models
         **Supported Languages:** Amharic, Somali, Swahili, Afan Oromo, Tigrinya, Chichewa
         **Model Type:** Text-to-Speech
         **Max Text Length:** 500 characters (single), 200 characters (batch)
@@ -323,6 +373,7 @@ if __name__ == "__main__":
     # Pre-load a model to reduce first-time latency
     print("🚀 Starting MMS Text-to-Speech Service...")
     print("📋 Supported Languages:", list(MODELS.keys()))
     # Pre-load Amharic model for faster first response
     try:

 import os
 import tempfile
+# Install uroman if not available
+try:
+    from uroman import uroman
+except ImportError:
+    import subprocess
+    import sys
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "uroman"])
+    from uroman import uroman
 # Model configuration for each language
 MODELS = {
     "Amharic": "facebook/mms-tts-amh",
             print(f"❌ Error loading model for {language}: {e}")
             raise e
+    def preprocess_text(self, text, language):
+        """Preprocess text with romanization for Amharic and Tigrinya"""
+        if language in ["Amharic", "Tigrinya"]:
+            print(f"Romanizing {language} text...")
+            try:
+                # Romanize the text for Amharic and Tigrinya models
+                romanized_text = uroman(text)
+                print(f"Original: {text}")
+                print(f"Romanized: {romanized_text}")
+                return romanized_text
+            except Exception as e:
+                print(f"Romanization failed, using original text: {e}")
+                return text
+        else:
+            # For other languages, use text as is
+            return text
     def generate_speech(self, text, language, speed=1.0):
         """Generate speech from text for specified language"""
         try:
             # Load model if not already loaded
             model, tokenizer = self.load_model(language)
+            # Preprocess text (romanize for Amharic and Tigrinya)
+            processed_text = self.preprocess_text(text, language)
             # Tokenize input text
+            inputs = tokenizer(processed_text, return_tensors="pt")
             input_ids = inputs["input_ids"].to(self.device)
             # Generate speech with torch.no_grad for efficiency
 def create_demo_audio(language):
     """Create demo text for each language"""
     demo_texts = {
+        "Amharic": "ሰላም፣ ይህ የድምፅ ማመንጫ ሞዴል ነው። አመሰግናለሁ!",
         "Somali": "Salaam, kani waa modelka cod-sameynta.",
         "Swahili": "Halo, hii ni modeli ya kutengeneza sauti.",
         "Afan Oromo": "Akkam, kun modeli sagalee uumuudha.",
+        "Tigrinya": "ሰላም፣ እዚ ድምጺ ዝገብር ሞዴል እዩ። የቐንየለይ!",
         "Chichewa": "Moni, iyi ndi modeli yopanga mawu."
     }
         """
         # 🎙️ MMS Text-to-Speech for African Languages
         Convert text to natural speech in multiple African languages using Facebook's MMS-TTS models.
+        **Special Features for Amharic & Tigrinya:** Automatic romanization for better pronunciation
         """
     )
         outputs=[audio_output, batch_status, batch_results]
     )
+    # Examples with better Amharic and Tigrinya samples
     gr.Markdown("### 💡 Example Texts")
     examples = [
+        ["Amharic", "ሁሉም ሰው በሁሉም መብቶች እኩል ነው። አመሰግናለሁ!"],
+        ["Tigrinya", "ኩሉ ሰብ ንኩሉ መሰላት እኩል እዩ። የቐንየለይ!"],
         ["Somali", "Qof walba wuxuu leeyahay xuquuqda aadamaha."],
         ["Swahili", "Kila mtu ana haki zote za binadamu."],
         ["Afan Oromo", "Nama hundi mirga ummataa hundaa waliin dhalate."],
         ["Chichewa", "Alipo wina aliyense ali ndi ufulu wachibadwidwe."]
     ]
         cache_examples=False
     )
+    # Language-specific information
+    with gr.Accordion("ℹ️ Language-Specific Information", open=False):
+        gr.Markdown("""
+        ### Amharic & Tigrinya Support
+        - **Automatic Romanization**: Text is automatically converted to Latin script for better pronunciation
+        - **Native Script Support**: Works with Ge'ez script (ፊደል) characters
+        - **Enhanced Accuracy**: Romanization improves model performance for these languages
+        ### Other Languages
+        - **Somali, Swahili, Afan Oromo**: Direct text processing
+        - **Chichewa**: Uses Swahili model as fallback
+        ### Technical Details
+        - Uses Facebook's MMS-TTS models
+        - Automatic uroman romanization for Amharic and Tigrinya
+        - GPU acceleration when available
+        """)
     # Footer
     gr.Markdown(
         """
         ### ℹ️ About
         **Powered by:** Facebook MMS-TTS Models
         **Supported Languages:** Amharic, Somali, Swahili, Afan Oromo, Tigrinya, Chichewa
+        **Special Features:** Automatic romanization for Amharic & Tigrinya
         **Model Type:** Text-to-Speech
         **Max Text Length:** 500 characters (single), 200 characters (batch)
     # Pre-load a model to reduce first-time latency
     print("🚀 Starting MMS Text-to-Speech Service...")
     print("📋 Supported Languages:", list(MODELS.keys()))
+    print("🌟 Special Romanization for: Amharic, Tigrinya")
     # Pre-load Amharic model for faster first response
     try:

requirements.txt CHANGED Viewed

@@ -4,4 +4,5 @@ torchaudio>=2.0.0
 transformers>=4.30.0
 gradio>=4.0.0
 numpy>=1.21.0
-soundfile>=0.12.0

 transformers>=4.30.0
 gradio>=4.0.0
 numpy>=1.21.0
+soundfile>=0.12.0
+uroman>=1.0.0