Spaces:
Sleeping
Sleeping
Commit
Β·
d4d880a
1
Parent(s):
c1c3a76
Add character voice settings for more expressive TTS
Browse filesEach persona now has unique voice settings:
- 5-Year-Old: Fast, bouncy, very expressive (stability: 0.3, speed: 1.15)
- Gordon Ramsay: Unpredictable, maximum drama (stability: 0.25, style: 0.9)
- Pirate: Rough, theatrical (stability: 0.35, style: 0.85)
- Shakespeare: Slow, deliberate, poetic (speed: 0.85)
- Surfer Dude: Chill, relaxed flow (speed: 0.9)
- Yoda: Very slow, wise pauses (speed: 0.7)
π€ Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
- app.py +3 -1
- src/agent.py +1 -0
- src/personas.py +41 -0
- src/tts.py +23 -8
app.py
CHANGED
|
@@ -65,6 +65,7 @@ def explain_topic(topic: str, persona_name: str, audience: str = "", generate_au
|
|
| 65 |
explanation = ""
|
| 66 |
sources = []
|
| 67 |
voice_id = None
|
|
|
|
| 68 |
mcp_tools = []
|
| 69 |
|
| 70 |
# Run the agent pipeline
|
|
@@ -88,6 +89,7 @@ def explain_topic(topic: str, persona_name: str, audience: str = "", generate_au
|
|
| 88 |
explanation = update["explanation"]
|
| 89 |
sources = update.get("sources", sources)
|
| 90 |
voice_id = update["voice_id"]
|
|
|
|
| 91 |
mcp_tools = update.get("mcp_tools", [])
|
| 92 |
progress(0.8, desc="Explanation ready!")
|
| 93 |
|
|
@@ -99,7 +101,7 @@ def explain_topic(topic: str, persona_name: str, audience: str = "", generate_au
|
|
| 99 |
if generate_audio and explanation and voice_id:
|
| 100 |
progress(0.9, desc="Generating audio...")
|
| 101 |
try:
|
| 102 |
-
audio_bytes = generate_speech(explanation, voice_id)
|
| 103 |
# Save to temp file for Gradio
|
| 104 |
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
|
| 105 |
f.write(audio_bytes)
|
|
|
|
| 65 |
explanation = ""
|
| 66 |
sources = []
|
| 67 |
voice_id = None
|
| 68 |
+
voice_settings = None
|
| 69 |
mcp_tools = []
|
| 70 |
|
| 71 |
# Run the agent pipeline
|
|
|
|
| 89 |
explanation = update["explanation"]
|
| 90 |
sources = update.get("sources", sources)
|
| 91 |
voice_id = update["voice_id"]
|
| 92 |
+
voice_settings = update.get("voice_settings")
|
| 93 |
mcp_tools = update.get("mcp_tools", [])
|
| 94 |
progress(0.8, desc="Explanation ready!")
|
| 95 |
|
|
|
|
| 101 |
if generate_audio and explanation and voice_id:
|
| 102 |
progress(0.9, desc="Generating audio...")
|
| 103 |
try:
|
| 104 |
+
audio_bytes = generate_speech(explanation, voice_id, voice_settings)
|
| 105 |
# Save to temp file for Gradio
|
| 106 |
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
|
| 107 |
f.write(audio_bytes)
|
src/agent.py
CHANGED
|
@@ -307,5 +307,6 @@ Now explain "{topic}" in your unique {persona_name} voice and style. Make it fun
|
|
| 307 |
"persona": persona_name,
|
| 308 |
"persona_emoji": persona["emoji"],
|
| 309 |
"voice_id": persona["voice_id"],
|
|
|
|
| 310 |
"mcp_tools": mcp_tools,
|
| 311 |
}
|
|
|
|
| 307 |
"persona": persona_name,
|
| 308 |
"persona_emoji": persona["emoji"],
|
| 309 |
"voice_id": persona["voice_id"],
|
| 310 |
+
"voice_settings": persona.get("voice_settings"),
|
| 311 |
"mcp_tools": mcp_tools,
|
| 312 |
}
|
src/personas.py
CHANGED
|
@@ -1,5 +1,10 @@
|
|
| 1 |
"""Persona definitions for Explainor."""
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
PERSONAS = {
|
| 4 |
"5-Year-Old": {
|
| 5 |
"system_prompt": """You are an excited, curious 5-year-old child explaining things.
|
|
@@ -8,6 +13,12 @@ Say things like "Ooh!" and "Wow!" and "You know what?"
|
|
| 8 |
Compare everything to toys, candy, cartoons, and playground activities.
|
| 9 |
Keep sentences very short. Use lots of exclamation marks!""",
|
| 10 |
"voice_id": "jBpfuIE2acCO8z3wKNLl", # "Aria" - young, enthusiastic
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
"emoji": "πΆ",
|
| 12 |
},
|
| 13 |
"Gordon Ramsay": {
|
|
@@ -18,6 +29,12 @@ Compare concepts to cooking techniques, ingredients, and kitchen disasters.
|
|
| 18 |
Use phrases like "Listen here!", "It's RAW!", "Absolutely stunning!", and "Donkey!".
|
| 19 |
Be dramatic but ultimately make the explanation clear.""",
|
| 20 |
"voice_id": "N2lVS1w4EtoT3dr4eOWO", # "Callum" - British, intense
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
"emoji": "π¨βπ³",
|
| 22 |
},
|
| 23 |
"Pirate": {
|
|
@@ -28,6 +45,12 @@ Talk about concepts like they're parts of a treasure map or sea voyage.
|
|
| 28 |
Be dramatic and swashbuckling. Mention your crew, your ship, and rum occasionally.
|
| 29 |
End with something about setting sail for knowledge.""",
|
| 30 |
"voice_id": "TX3LPaxmHKxFdv7VOQHJ", # "Liam" - gruff, theatrical
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
"emoji": "π΄ββ οΈ",
|
| 32 |
},
|
| 33 |
"Shakespeare": {
|
|
@@ -38,6 +61,12 @@ Occasionally quote or parody your own famous lines.
|
|
| 38 |
Structure explanations like soliloquies with dramatic pauses.
|
| 39 |
Compare technology and modern things to courtly intrigue and theatrical performance.""",
|
| 40 |
"voice_id": "onwK4e9ZLuTAKqWW03F9", # "Daniel" - theatrical British
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
"emoji": "π",
|
| 42 |
},
|
| 43 |
"Surfer Dude": {
|
|
@@ -48,6 +77,12 @@ Be super chill and positive. Everything is awesome and gives good vibes.
|
|
| 48 |
Use "like" as filler. Talk about concepts like they're waves to ride.
|
| 49 |
Keep the energy mellow but enthusiastic.""",
|
| 50 |
"voice_id": "ErXwobaYiN019PkySvjV", # "Antoni" - laid-back American
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
"emoji": "π",
|
| 52 |
},
|
| 53 |
"Yoda": {
|
|
@@ -59,6 +94,12 @@ Use phrases like "Hmmmm", "Yes, yes", "Much to learn, you have."
|
|
| 59 |
Speak slowly and thoughtfully. Make profound observations.
|
| 60 |
Occasionally chuckle wisely: "Hehehehe".""",
|
| 61 |
"voice_id": "pqHfZKP75CvOlQylNhV4", # "Bill" - slow, thoughtful
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
"emoji": "π§",
|
| 63 |
},
|
| 64 |
}
|
|
|
|
| 1 |
"""Persona definitions for Explainor."""
|
| 2 |
|
| 3 |
+
# Voice settings: stability (0-1), similarity_boost (0-1), style (0-1), speed (0.5-2.0)
|
| 4 |
+
# Lower stability = more expressive/variable
|
| 5 |
+
# Higher style = more exaggerated delivery
|
| 6 |
+
# Speed affects pacing
|
| 7 |
+
|
| 8 |
PERSONAS = {
|
| 9 |
"5-Year-Old": {
|
| 10 |
"system_prompt": """You are an excited, curious 5-year-old child explaining things.
|
|
|
|
| 13 |
Compare everything to toys, candy, cartoons, and playground activities.
|
| 14 |
Keep sentences very short. Use lots of exclamation marks!""",
|
| 15 |
"voice_id": "jBpfuIE2acCO8z3wKNLl", # "Aria" - young, enthusiastic
|
| 16 |
+
"voice_settings": {
|
| 17 |
+
"stability": 0.3, # Very expressive, bouncy
|
| 18 |
+
"similarity_boost": 0.7,
|
| 19 |
+
"style": 0.8, # Exaggerated childlike delivery
|
| 20 |
+
"speed": 1.15, # Kids talk fast when excited
|
| 21 |
+
},
|
| 22 |
"emoji": "πΆ",
|
| 23 |
},
|
| 24 |
"Gordon Ramsay": {
|
|
|
|
| 29 |
Use phrases like "Listen here!", "It's RAW!", "Absolutely stunning!", and "Donkey!".
|
| 30 |
Be dramatic but ultimately make the explanation clear.""",
|
| 31 |
"voice_id": "N2lVS1w4EtoT3dr4eOWO", # "Callum" - British, intense
|
| 32 |
+
"voice_settings": {
|
| 33 |
+
"stability": 0.25, # Unpredictable, emotional
|
| 34 |
+
"similarity_boost": 0.8,
|
| 35 |
+
"style": 0.9, # Maximum drama!
|
| 36 |
+
"speed": 1.1, # Intense, rapid delivery
|
| 37 |
+
},
|
| 38 |
"emoji": "π¨βπ³",
|
| 39 |
},
|
| 40 |
"Pirate": {
|
|
|
|
| 45 |
Be dramatic and swashbuckling. Mention your crew, your ship, and rum occasionally.
|
| 46 |
End with something about setting sail for knowledge.""",
|
| 47 |
"voice_id": "TX3LPaxmHKxFdv7VOQHJ", # "Liam" - gruff, theatrical
|
| 48 |
+
"voice_settings": {
|
| 49 |
+
"stability": 0.35, # Rough, varied
|
| 50 |
+
"similarity_boost": 0.6,
|
| 51 |
+
"style": 0.85, # Theatrical pirate flair
|
| 52 |
+
"speed": 0.95, # Slightly slower, dramatic
|
| 53 |
+
},
|
| 54 |
"emoji": "π΄ββ οΈ",
|
| 55 |
},
|
| 56 |
"Shakespeare": {
|
|
|
|
| 61 |
Structure explanations like soliloquies with dramatic pauses.
|
| 62 |
Compare technology and modern things to courtly intrigue and theatrical performance.""",
|
| 63 |
"voice_id": "onwK4e9ZLuTAKqWW03F9", # "Daniel" - theatrical British
|
| 64 |
+
"voice_settings": {
|
| 65 |
+
"stability": 0.4, # Theatrical variation
|
| 66 |
+
"similarity_boost": 0.75,
|
| 67 |
+
"style": 0.7, # Dramatic but refined
|
| 68 |
+
"speed": 0.85, # Slow, deliberate, poetic
|
| 69 |
+
},
|
| 70 |
"emoji": "π",
|
| 71 |
},
|
| 72 |
"Surfer Dude": {
|
|
|
|
| 77 |
Use "like" as filler. Talk about concepts like they're waves to ride.
|
| 78 |
Keep the energy mellow but enthusiastic.""",
|
| 79 |
"voice_id": "ErXwobaYiN019PkySvjV", # "Antoni" - laid-back American
|
| 80 |
+
"voice_settings": {
|
| 81 |
+
"stability": 0.5, # Relaxed, flowing
|
| 82 |
+
"similarity_boost": 0.65,
|
| 83 |
+
"style": 0.6, # Chill vibes
|
| 84 |
+
"speed": 0.9, # Slooow and chill broooo
|
| 85 |
+
},
|
| 86 |
"emoji": "π",
|
| 87 |
},
|
| 88 |
"Yoda": {
|
|
|
|
| 94 |
Speak slowly and thoughtfully. Make profound observations.
|
| 95 |
Occasionally chuckle wisely: "Hehehehe".""",
|
| 96 |
"voice_id": "pqHfZKP75CvOlQylNhV4", # "Bill" - slow, thoughtful
|
| 97 |
+
"voice_settings": {
|
| 98 |
+
"stability": 0.45, # Wise, measured variations
|
| 99 |
+
"similarity_boost": 0.7,
|
| 100 |
+
"style": 0.5, # Subtle but distinct
|
| 101 |
+
"speed": 0.7, # Slow... speak I do... hmmm
|
| 102 |
+
},
|
| 103 |
"emoji": "π§",
|
| 104 |
},
|
| 105 |
}
|
src/tts.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
"""ElevenLabs Text-to-Speech integration."""
|
| 2 |
|
| 3 |
import os
|
| 4 |
-
from elevenlabs import ElevenLabs
|
| 5 |
|
| 6 |
|
| 7 |
def get_client() -> ElevenLabs:
|
|
@@ -12,25 +12,40 @@ def get_client() -> ElevenLabs:
|
|
| 12 |
return ElevenLabs(api_key=api_key)
|
| 13 |
|
| 14 |
|
| 15 |
-
def generate_speech(text: str, voice_id: str) -> bytes:
|
| 16 |
"""Generate speech audio from text.
|
| 17 |
|
| 18 |
Args:
|
| 19 |
text: The text to convert to speech
|
| 20 |
voice_id: ElevenLabs voice ID
|
|
|
|
| 21 |
|
| 22 |
Returns:
|
| 23 |
Audio bytes (MP3 format)
|
| 24 |
"""
|
| 25 |
client = get_client()
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
# Generate audio
|
| 28 |
-
|
| 29 |
-
voice_id
|
| 30 |
-
text
|
| 31 |
-
model_id
|
| 32 |
-
output_format
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
# Collect all audio chunks
|
| 36 |
audio_chunks = []
|
|
|
|
| 1 |
"""ElevenLabs Text-to-Speech integration."""
|
| 2 |
|
| 3 |
import os
|
| 4 |
+
from elevenlabs import ElevenLabs, VoiceSettings
|
| 5 |
|
| 6 |
|
| 7 |
def get_client() -> ElevenLabs:
|
|
|
|
| 12 |
return ElevenLabs(api_key=api_key)
|
| 13 |
|
| 14 |
|
| 15 |
+
def generate_speech(text: str, voice_id: str, voice_settings: dict = None) -> bytes:
|
| 16 |
"""Generate speech audio from text.
|
| 17 |
|
| 18 |
Args:
|
| 19 |
text: The text to convert to speech
|
| 20 |
voice_id: ElevenLabs voice ID
|
| 21 |
+
voice_settings: Optional dict with stability, similarity_boost, style, speed
|
| 22 |
|
| 23 |
Returns:
|
| 24 |
Audio bytes (MP3 format)
|
| 25 |
"""
|
| 26 |
client = get_client()
|
| 27 |
|
| 28 |
+
# Build voice settings if provided
|
| 29 |
+
settings = None
|
| 30 |
+
if voice_settings:
|
| 31 |
+
settings = VoiceSettings(
|
| 32 |
+
stability=voice_settings.get("stability", 0.5),
|
| 33 |
+
similarity_boost=voice_settings.get("similarity_boost", 0.75),
|
| 34 |
+
style=voice_settings.get("style", 0.0),
|
| 35 |
+
speed=voice_settings.get("speed", 1.0),
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
# Generate audio
|
| 39 |
+
kwargs = {
|
| 40 |
+
"voice_id": voice_id,
|
| 41 |
+
"text": text,
|
| 42 |
+
"model_id": "eleven_multilingual_v2",
|
| 43 |
+
"output_format": "mp3_44100_128",
|
| 44 |
+
}
|
| 45 |
+
if settings:
|
| 46 |
+
kwargs["voice_settings"] = settings
|
| 47 |
+
|
| 48 |
+
audio_generator = client.text_to_speech.convert(**kwargs)
|
| 49 |
|
| 50 |
# Collect all audio chunks
|
| 51 |
audio_chunks = []
|