Spaces:

ranaspark
/

voice

Sleeping

rahulrana0001 commited on May 1

Commit

fc330ed

1 Parent(s): 4c4ec72

Add Universal Storyteller voice pack (11 diverse tones)

Files changed (2) hide show

app.py CHANGED Viewed

@@ -26,9 +26,15 @@ def check_resources():
 VOICE_STYLES = [
     "Cheerful (Maya)",
     "Soft & Seductive (Maya)",
-    "Excited & High-Pitch (Maya)",
-    "Deep & Sensual (Sita)",
-    "Dramatic Narrator (Sita)"
 ]
 def process_standard_pipeline(image, document, input_text, emotion_choice):

 VOICE_STYLES = [
     "Cheerful (Maya)",
     "Soft & Seductive (Maya)",
+    "Excited (Maya)",
+    "Sad & Emotional (Sita)",
+    "Dramatic Narrator (Sita)",
+    "Old Wise Woman",
+    "Playful Child",
+    "Brave Hero (Arjun)",
+    "Deep & Serious (Arjun)",
+    "Calm Storyteller (Surya)",
+    "Professional News"
 ]
 def process_standard_pipeline(image, document, input_text, emotion_choice):

pipeline/tts.py CHANGED Viewed

@@ -10,11 +10,20 @@ class TamilTTS:
         # We use edge-tts which is cloud-based and requires zero local RAM for models.
         # This solves the 6GB RAM limitation on the host machine.
         self.voices = {
             "Cheerful (Maya)": ("ta-IN-PallaviNeural", "+10Hz", "+10%"),
             "Soft & Seductive (Maya)": ("ta-IN-PallaviNeural", "-15Hz", "-20%"),
-            "Excited & High-Pitch (Maya)": ("ta-IN-PallaviNeural", "+30Hz", "+30%"),
-            "Deep & Sensual (Sita)": ("ta-LK-SaranyaNeural", "-15Hz", "-10%"),
-            "Dramatic Narrator (Sita)": ("ta-LK-SaranyaNeural", "+0Hz", "+0%")
         }
     def generate_speech(self, text: str, style_choice: str) -> tuple:

         # We use edge-tts which is cloud-based and requires zero local RAM for models.
         # This solves the 6GB RAM limitation on the host machine.
         self.voices = {
+            # --- Female Voices ---
             "Cheerful (Maya)": ("ta-IN-PallaviNeural", "+10Hz", "+10%"),
             "Soft & Seductive (Maya)": ("ta-IN-PallaviNeural", "-15Hz", "-20%"),
+            "Excited (Maya)": ("ta-IN-PallaviNeural", "+30Hz", "+30%"),
+            "Sad & Emotional (Sita)": ("ta-LK-SaranyaNeural", "-10Hz", "-20%"),
+            "Dramatic Narrator (Sita)": ("ta-LK-SaranyaNeural", "+0Hz", "+0%"),
+            "Old Wise Woman": ("ta-SG-AnanthiNeural", "-20Hz", "-25%"),
+            "Playful Child": ("ta-IN-PallaviNeural", "+40Hz", "+25%"),
+            # --- Male Voices ---
+            "Brave Hero (Arjun)": ("ta-IN-ValluvarNeural", "+0Hz", "+5%"),
+            "Deep & Serious (Arjun)": ("ta-IN-ValluvarNeural", "-15Hz", "-10%"),
+            "Calm Storyteller (Surya)": ("ta-MY-SuryaNeural", "-5Hz", "-5%"),
+            "Professional News": ("ta-SG-VenkatNeural", "+5Hz", "+10%")
         }
     def generate_speech(self, text: str, style_choice: str) -> tuple: