rahulrana0001 commited on
Commit
fc330ed
·
1 Parent(s): 4c4ec72

Add Universal Storyteller voice pack (11 diverse tones)

Browse files
Files changed (2) hide show
  1. app.py +9 -3
  2. pipeline/tts.py +12 -3
app.py CHANGED
@@ -26,9 +26,15 @@ def check_resources():
26
  VOICE_STYLES = [
27
  "Cheerful (Maya)",
28
  "Soft & Seductive (Maya)",
29
- "Excited & High-Pitch (Maya)",
30
- "Deep & Sensual (Sita)",
31
- "Dramatic Narrator (Sita)"
 
 
 
 
 
 
32
  ]
33
 
34
  def process_standard_pipeline(image, document, input_text, emotion_choice):
 
26
  VOICE_STYLES = [
27
  "Cheerful (Maya)",
28
  "Soft & Seductive (Maya)",
29
+ "Excited (Maya)",
30
+ "Sad & Emotional (Sita)",
31
+ "Dramatic Narrator (Sita)",
32
+ "Old Wise Woman",
33
+ "Playful Child",
34
+ "Brave Hero (Arjun)",
35
+ "Deep & Serious (Arjun)",
36
+ "Calm Storyteller (Surya)",
37
+ "Professional News"
38
  ]
39
 
40
  def process_standard_pipeline(image, document, input_text, emotion_choice):
pipeline/tts.py CHANGED
@@ -10,11 +10,20 @@ class TamilTTS:
10
  # We use edge-tts which is cloud-based and requires zero local RAM for models.
11
  # This solves the 6GB RAM limitation on the host machine.
12
  self.voices = {
 
13
  "Cheerful (Maya)": ("ta-IN-PallaviNeural", "+10Hz", "+10%"),
14
  "Soft & Seductive (Maya)": ("ta-IN-PallaviNeural", "-15Hz", "-20%"),
15
- "Excited & High-Pitch (Maya)": ("ta-IN-PallaviNeural", "+30Hz", "+30%"),
16
- "Deep & Sensual (Sita)": ("ta-LK-SaranyaNeural", "-15Hz", "-10%"),
17
- "Dramatic Narrator (Sita)": ("ta-LK-SaranyaNeural", "+0Hz", "+0%")
 
 
 
 
 
 
 
 
18
  }
19
 
20
  def generate_speech(self, text: str, style_choice: str) -> tuple:
 
10
  # We use edge-tts which is cloud-based and requires zero local RAM for models.
11
  # This solves the 6GB RAM limitation on the host machine.
12
  self.voices = {
13
+ # --- Female Voices ---
14
  "Cheerful (Maya)": ("ta-IN-PallaviNeural", "+10Hz", "+10%"),
15
  "Soft & Seductive (Maya)": ("ta-IN-PallaviNeural", "-15Hz", "-20%"),
16
+ "Excited (Maya)": ("ta-IN-PallaviNeural", "+30Hz", "+30%"),
17
+ "Sad & Emotional (Sita)": ("ta-LK-SaranyaNeural", "-10Hz", "-20%"),
18
+ "Dramatic Narrator (Sita)": ("ta-LK-SaranyaNeural", "+0Hz", "+0%"),
19
+ "Old Wise Woman": ("ta-SG-AnanthiNeural", "-20Hz", "-25%"),
20
+ "Playful Child": ("ta-IN-PallaviNeural", "+40Hz", "+25%"),
21
+
22
+ # --- Male Voices ---
23
+ "Brave Hero (Arjun)": ("ta-IN-ValluvarNeural", "+0Hz", "+5%"),
24
+ "Deep & Serious (Arjun)": ("ta-IN-ValluvarNeural", "-15Hz", "-10%"),
25
+ "Calm Storyteller (Surya)": ("ta-MY-SuryaNeural", "-5Hz", "-5%"),
26
+ "Professional News": ("ta-SG-VenkatNeural", "+5Hz", "+10%")
27
  }
28
 
29
  def generate_speech(self, text: str, style_choice: str) -> tuple: