Spaces:

minte-atnafu
/

GihonTech_Local_Language_TTS

Sleeping

App Files Files Community

Minte commited on Oct 8, 2025

Commit

943a8da

1 Parent(s): 6d28d4b

tts space

Browse files

Files changed (2) hide show

app.py +63 -68
requirements.txt +1 -3

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import io
 import soundfile as sf
 from datetime import datetime
 import os
 # Model configuration for each language
 MODELS = {
@@ -16,18 +17,7 @@ MODELS = {
     "Swahili": "facebook/mms-tts-swh",
     "Afan Oromo": "facebook/mms-tts-orm",
     "Tigrinya": "facebook/mms-tts-tir",
-    # Note: Chichewa doesn't have a dedicated MMS-TTS model, using Swahili as fallback
-    "Chichewa": "facebook/mms-tts-swh"
-}
-# Language codes for phonemizer
-LANGUAGE_CODES = {
-    "Amharic": "am",
-    "Somali": "so",
-    "Swahili": "sw",
-    "Afan Oromo": "om",
-    "Tigrinya": "ti",
-    "Chichewa": "ny"  # Chichewa language code
 }
 class MMS_TTS_Service:
@@ -128,27 +118,12 @@ def text_to_speech(text, language, speed=1.0):
         return None, error
     sample_rate, waveform = result
-    return (sample_rate, waveform), "✅ Speech generated successfully!"
-def batch_tts(text_list, language, speed=1.0):
-    """
-    Batch processing multiple texts
-    """
-    results = []
-    errors = []
-    for i, text in enumerate(text_list):
-        if text.strip():
-            result, error = tts_service.generate_speech(text.strip(), language, speed)
-            if error:
-                errors.append(f"Text {i+1}: {error}")
-            else:
-                results.append((f"output_{i+1}.wav", result[0], result[1]))
-    return results, errors
 def create_demo_audio(language):
-    """Create demo audio for each language"""
     demo_texts = {
         "Amharic": "ሰላም፣ ይህ የድምፅ ማመንጫ ሞዴል ነው።",
         "Somali": "Salaam, kani waa modelka cod-sameynta.",
@@ -158,8 +133,7 @@ def create_demo_audio(language):
         "Chichewa": "Moni, iyi ndi modeli yopanga mawu."
     }
-    demo_text = demo_texts.get(language, "Hello, this is a text-to-speech model.")
-    return demo_text
 # Gradio interface
 with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
@@ -216,22 +190,27 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
                 interactive=False,
                 placeholder="Ready to generate speech..."
             )
-            # Batch processing section
-            gr.Markdown("### 📚 Batch Processing")
-            batch_text = gr.Textbox(
-                lines=4,
-                placeholder="Enter multiple texts, one per line...",
-                label="Batch Texts",
-                info="Each line will be processed separately"
-            )
-            batch_btn = gr.Button("Process Batch")
-            batch_output = gr.File(
-                label="Batch Results",
-                file_count="multiple",
-                type="file"
-            )
-            batch_status = gr.Textbox(label="Batch Status")
     # Event handlers
     def generate_speech_handler(text, lang, spd):
@@ -240,33 +219,48 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
         return text_to_speech(text, lang, spd)
     def clear_all():
-        return "", "", None, "Cleared!"
     def load_demo(lang):
         return create_demo_audio(lang)
     def process_batch(texts, lang, spd):
         if not texts.strip():
-            return [], "No texts provided."
         text_list = [t.strip() for t in texts.split('\n') if t.strip()]
-        if len(text_list) > 10:
-            return [], "Maximum 10 texts allowed for batch processing."
-        results, errors = batch_tts(text_list, lang, spd)
-        # Save results to files
-        output_files = []
-        for i, (filename, sample_rate, waveform) in enumerate(results):
-            temp_file = f"/tmp/{filename}"
-            sf.write(temp_file, waveform, sample_rate)
-            output_files.append(temp_file)
-        status_msg = f"Processed {len(results)} texts successfully."
-        if errors:
-            status_msg += f" Errors: {len(errors)}"
-        return output_files, status_msg
     # Connect events
     generate_btn.click(
@@ -277,7 +271,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
     clear_btn.click(
         fn=clear_all,
-        outputs=[text_input, demo_output, audio_output, status]
     )
     demo_btn.click(
@@ -289,7 +283,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
     batch_btn.click(
         fn=process_batch,
         inputs=[batch_text, language, speed],
-        outputs=[batch_output, batch_status]
     )
     # Examples
@@ -299,7 +293,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
         ["Somali", "Qof walba wuxuu leeyahay xuquuqda aadamaha."],
         ["Swahili", "Kila mtu ana haki zote za binadamu."],
         ["Afan Oromo", "Nama hundi mirga ummataa hundaa waliin dhalate."],
-        ["Tigrinya", "ኩሉ ሰብ ንኩሉ መሰላት እኩል እዩ።"]
     ]
     gr.Examples(
@@ -318,9 +313,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
         **Powered by:** Facebook MMS-TTS Models
         **Supported Languages:** Amharic, Somali, Swahili, Afan Oromo, Tigrinya, Chichewa
         **Model Type:** Text-to-Speech
-        **Max Text Length:** 500 characters
-        For issues or questions, please check the model cards on Hugging Face.
         """
     )

 import soundfile as sf
 from datetime import datetime
 import os
+import tempfile
 # Model configuration for each language
 MODELS = {
     "Swahili": "facebook/mms-tts-swh",
     "Afan Oromo": "facebook/mms-tts-orm",
     "Tigrinya": "facebook/mms-tts-tir",
+    "Chichewa": "facebook/mms-tts-swh"  # Using Swahili as fallback
 }
 class MMS_TTS_Service:
         return None, error
     sample_rate, waveform = result
+    # Return as (sample_rate, audio_array) for gr.Audio
+    return (sample_rate, waveform), "✅ Speech generated successfully!"
 def create_demo_audio(language):
+    """Create demo text for each language"""
     demo_texts = {
         "Amharic": "ሰላም፣ ይህ የድምፅ ማመንጫ ሞዴል ነው።",
         "Somali": "Salaam, kani waa modelka cod-sameynta.",
         "Chichewa": "Moni, iyi ndi modeli yopanga mawu."
     }
+    return demo_texts.get(language, "Hello, this is a text-to-speech model.")
 # Gradio interface
 with gr.Blocks(theme=gr.themes.Soft(), title="MMS Text-to-Speech") as demo:
                 interactive=False,
                 placeholder="Ready to generate speech..."
             )
+    # Batch processing section (simplified)
+    with gr.Accordion("📚 Batch Processing (Advanced)", open=False):
+        gr.Markdown("Process multiple texts at once. Each line will be converted to a separate audio file.")
+        batch_text = gr.Textbox(
+            lines=4,
+            placeholder="Enter multiple texts, one per line...\nExample:\nHello\nHow are you?\nThank you",
+            label="Batch Texts",
+            info="Maximum 5 texts, each under 200 characters"
+        )
+        batch_btn = gr.Button("Process Batch Texts")
+        batch_status = gr.Textbox(label="Batch Processing Status")
+        # We'll use a gallery or multiple audio outputs for batch results
+        batch_results = gr.Gallery(
+            label="Batch Results",
+            show_label=True,
+            columns=2
+        )
     # Event handlers
     def generate_speech_handler(text, lang, spd):
         return text_to_speech(text, lang, spd)
     def clear_all():
+        return "", "", None, "Cleared!", "", None
     def load_demo(lang):
         return create_demo_audio(lang)
     def process_batch(texts, lang, spd):
+        """Process multiple texts and return file paths"""
         if not texts.strip():
+            return None, "No texts provided.", []
         text_list = [t.strip() for t in texts.split('\n') if t.strip()]
+        if len(text_list) > 5:
+            return None, "Maximum 5 texts allowed for batch processing.", []
+        # Validate each text
+        for i, text in enumerate(text_list):
+            if len(text) > 200:
+                return None, f"Text {i+1} is too long (max 200 characters).", []
+        results = []
+        error_count = 0
+        for i, text in enumerate(text_list):
+            result, error = tts_service.generate_speech(text, lang, spd)
+            if error:
+                error_count += 1
+                print(f"Error processing text {i+1}: {error}")
+            else:
+                sample_rate, waveform = result
+                # Create temporary file
+                with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+                    sf.write(f.name, waveform, sample_rate)
+                    results.append(f.name)
+        if error_count > 0:
+            status_msg = f"Processed {len(results)}/{len(text_list)} texts. {error_count} failed."
+        else:
+            status_msg = f"Successfully processed all {len(text_list)} texts!"
+        # Return first result as preview and all as files
+        preview_audio = (results[0] if results else None)
+        return preview_audio, status_msg, results
     # Connect events
     generate_btn.click(
     clear_btn.click(
         fn=clear_all,
+        outputs=[text_input, demo_output, audio_output, status, batch_text, batch_results]
     )
     demo_btn.click(
     batch_btn.click(
         fn=process_batch,
         inputs=[batch_text, language, speed],
+        outputs=[audio_output, batch_status, batch_results]
     )
     # Examples
         ["Somali", "Qof walba wuxuu leeyahay xuquuqda aadamaha."],
         ["Swahili", "Kila mtu ana haki zote za binadamu."],
         ["Afan Oromo", "Nama hundi mirga ummataa hundaa waliin dhalate."],
+        ["Tigrinya", "ኩሉ ሰብ ንኩሉ መሰላት እኩል እዩ።"],
+        ["Chichewa", "Alipo wina aliyense ali ndi ufulu wachibadwidwe."]
     ]
     gr.Examples(
         **Powered by:** Facebook MMS-TTS Models
         **Supported Languages:** Amharic, Somali, Swahili, Afan Oromo, Tigrinya, Chichewa
         **Model Type:** Text-to-Speech
+        **Max Text Length:** 500 characters (single), 200 characters (batch)
+        Note: First request may take longer as models are downloaded.
         """
     )

requirements.txt CHANGED Viewed

@@ -4,6 +4,4 @@ torchaudio>=2.0.0
 transformers>=4.30.0
 gradio>=4.0.0
 numpy>=1.21.0
-librosa>=0.10.0
-soundfile>=0.12.0
-phonemizer>=3.0.0

 transformers>=4.30.0
 gradio>=4.0.0
 numpy>=1.21.0
+soundfile>=0.12.0