Commit ·
fc330ed
1
Parent(s): 4c4ec72
Add Universal Storyteller voice pack (11 diverse tones)
Browse files- app.py +9 -3
- pipeline/tts.py +12 -3
app.py
CHANGED
|
@@ -26,9 +26,15 @@ def check_resources():
|
|
| 26 |
VOICE_STYLES = [
|
| 27 |
"Cheerful (Maya)",
|
| 28 |
"Soft & Seductive (Maya)",
|
| 29 |
-
"Excited
|
| 30 |
-
"
|
| 31 |
-
"Dramatic Narrator (Sita)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
]
|
| 33 |
|
| 34 |
def process_standard_pipeline(image, document, input_text, emotion_choice):
|
|
|
|
| 26 |
VOICE_STYLES = [
|
| 27 |
"Cheerful (Maya)",
|
| 28 |
"Soft & Seductive (Maya)",
|
| 29 |
+
"Excited (Maya)",
|
| 30 |
+
"Sad & Emotional (Sita)",
|
| 31 |
+
"Dramatic Narrator (Sita)",
|
| 32 |
+
"Old Wise Woman",
|
| 33 |
+
"Playful Child",
|
| 34 |
+
"Brave Hero (Arjun)",
|
| 35 |
+
"Deep & Serious (Arjun)",
|
| 36 |
+
"Calm Storyteller (Surya)",
|
| 37 |
+
"Professional News"
|
| 38 |
]
|
| 39 |
|
| 40 |
def process_standard_pipeline(image, document, input_text, emotion_choice):
|
pipeline/tts.py
CHANGED
|
@@ -10,11 +10,20 @@ class TamilTTS:
|
|
| 10 |
# We use edge-tts which is cloud-based and requires zero local RAM for models.
|
| 11 |
# This solves the 6GB RAM limitation on the host machine.
|
| 12 |
self.voices = {
|
|
|
|
| 13 |
"Cheerful (Maya)": ("ta-IN-PallaviNeural", "+10Hz", "+10%"),
|
| 14 |
"Soft & Seductive (Maya)": ("ta-IN-PallaviNeural", "-15Hz", "-20%"),
|
| 15 |
-
"Excited
|
| 16 |
-
"
|
| 17 |
-
"Dramatic Narrator (Sita)": ("ta-LK-SaranyaNeural", "+0Hz", "+0%")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
}
|
| 19 |
|
| 20 |
def generate_speech(self, text: str, style_choice: str) -> tuple:
|
|
|
|
| 10 |
# We use edge-tts which is cloud-based and requires zero local RAM for models.
|
| 11 |
# This solves the 6GB RAM limitation on the host machine.
|
| 12 |
self.voices = {
|
| 13 |
+
# --- Female Voices ---
|
| 14 |
"Cheerful (Maya)": ("ta-IN-PallaviNeural", "+10Hz", "+10%"),
|
| 15 |
"Soft & Seductive (Maya)": ("ta-IN-PallaviNeural", "-15Hz", "-20%"),
|
| 16 |
+
"Excited (Maya)": ("ta-IN-PallaviNeural", "+30Hz", "+30%"),
|
| 17 |
+
"Sad & Emotional (Sita)": ("ta-LK-SaranyaNeural", "-10Hz", "-20%"),
|
| 18 |
+
"Dramatic Narrator (Sita)": ("ta-LK-SaranyaNeural", "+0Hz", "+0%"),
|
| 19 |
+
"Old Wise Woman": ("ta-SG-AnanthiNeural", "-20Hz", "-25%"),
|
| 20 |
+
"Playful Child": ("ta-IN-PallaviNeural", "+40Hz", "+25%"),
|
| 21 |
+
|
| 22 |
+
# --- Male Voices ---
|
| 23 |
+
"Brave Hero (Arjun)": ("ta-IN-ValluvarNeural", "+0Hz", "+5%"),
|
| 24 |
+
"Deep & Serious (Arjun)": ("ta-IN-ValluvarNeural", "-15Hz", "-10%"),
|
| 25 |
+
"Calm Storyteller (Surya)": ("ta-MY-SuryaNeural", "-5Hz", "-5%"),
|
| 26 |
+
"Professional News": ("ta-SG-VenkatNeural", "+5Hz", "+10%")
|
| 27 |
}
|
| 28 |
|
| 29 |
def generate_speech(self, text: str, style_choice: str) -> tuple:
|