Emperor555 Claude commited on
Commit
d4d880a
Β·
1 Parent(s): c1c3a76

Add character voice settings for more expressive TTS

Browse files

Each persona now has unique voice settings:
- 5-Year-Old: Fast, bouncy, very expressive (stability: 0.3, speed: 1.15)
- Gordon Ramsay: Unpredictable, maximum drama (stability: 0.25, style: 0.9)
- Pirate: Rough, theatrical (stability: 0.35, style: 0.85)
- Shakespeare: Slow, deliberate, poetic (speed: 0.85)
- Surfer Dude: Chill, relaxed flow (speed: 0.9)
- Yoda: Very slow, wise pauses (speed: 0.7)

πŸ€– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (4) hide show
  1. app.py +3 -1
  2. src/agent.py +1 -0
  3. src/personas.py +41 -0
  4. src/tts.py +23 -8
app.py CHANGED
@@ -65,6 +65,7 @@ def explain_topic(topic: str, persona_name: str, audience: str = "", generate_au
65
  explanation = ""
66
  sources = []
67
  voice_id = None
 
68
  mcp_tools = []
69
 
70
  # Run the agent pipeline
@@ -88,6 +89,7 @@ def explain_topic(topic: str, persona_name: str, audience: str = "", generate_au
88
  explanation = update["explanation"]
89
  sources = update.get("sources", sources)
90
  voice_id = update["voice_id"]
 
91
  mcp_tools = update.get("mcp_tools", [])
92
  progress(0.8, desc="Explanation ready!")
93
 
@@ -99,7 +101,7 @@ def explain_topic(topic: str, persona_name: str, audience: str = "", generate_au
99
  if generate_audio and explanation and voice_id:
100
  progress(0.9, desc="Generating audio...")
101
  try:
102
- audio_bytes = generate_speech(explanation, voice_id)
103
  # Save to temp file for Gradio
104
  with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
105
  f.write(audio_bytes)
 
65
  explanation = ""
66
  sources = []
67
  voice_id = None
68
+ voice_settings = None
69
  mcp_tools = []
70
 
71
  # Run the agent pipeline
 
89
  explanation = update["explanation"]
90
  sources = update.get("sources", sources)
91
  voice_id = update["voice_id"]
92
+ voice_settings = update.get("voice_settings")
93
  mcp_tools = update.get("mcp_tools", [])
94
  progress(0.8, desc="Explanation ready!")
95
 
 
101
  if generate_audio and explanation and voice_id:
102
  progress(0.9, desc="Generating audio...")
103
  try:
104
+ audio_bytes = generate_speech(explanation, voice_id, voice_settings)
105
  # Save to temp file for Gradio
106
  with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
107
  f.write(audio_bytes)
src/agent.py CHANGED
@@ -307,5 +307,6 @@ Now explain "{topic}" in your unique {persona_name} voice and style. Make it fun
307
  "persona": persona_name,
308
  "persona_emoji": persona["emoji"],
309
  "voice_id": persona["voice_id"],
 
310
  "mcp_tools": mcp_tools,
311
  }
 
307
  "persona": persona_name,
308
  "persona_emoji": persona["emoji"],
309
  "voice_id": persona["voice_id"],
310
+ "voice_settings": persona.get("voice_settings"),
311
  "mcp_tools": mcp_tools,
312
  }
src/personas.py CHANGED
@@ -1,5 +1,10 @@
1
  """Persona definitions for Explainor."""
2
 
 
 
 
 
 
3
  PERSONAS = {
4
  "5-Year-Old": {
5
  "system_prompt": """You are an excited, curious 5-year-old child explaining things.
@@ -8,6 +13,12 @@ Say things like "Ooh!" and "Wow!" and "You know what?"
8
  Compare everything to toys, candy, cartoons, and playground activities.
9
  Keep sentences very short. Use lots of exclamation marks!""",
10
  "voice_id": "jBpfuIE2acCO8z3wKNLl", # "Aria" - young, enthusiastic
 
 
 
 
 
 
11
  "emoji": "πŸ‘Ά",
12
  },
13
  "Gordon Ramsay": {
@@ -18,6 +29,12 @@ Compare concepts to cooking techniques, ingredients, and kitchen disasters.
18
  Use phrases like "Listen here!", "It's RAW!", "Absolutely stunning!", and "Donkey!".
19
  Be dramatic but ultimately make the explanation clear.""",
20
  "voice_id": "N2lVS1w4EtoT3dr4eOWO", # "Callum" - British, intense
 
 
 
 
 
 
21
  "emoji": "πŸ‘¨β€πŸ³",
22
  },
23
  "Pirate": {
@@ -28,6 +45,12 @@ Talk about concepts like they're parts of a treasure map or sea voyage.
28
  Be dramatic and swashbuckling. Mention your crew, your ship, and rum occasionally.
29
  End with something about setting sail for knowledge.""",
30
  "voice_id": "TX3LPaxmHKxFdv7VOQHJ", # "Liam" - gruff, theatrical
 
 
 
 
 
 
31
  "emoji": "πŸ΄β€β˜ οΈ",
32
  },
33
  "Shakespeare": {
@@ -38,6 +61,12 @@ Occasionally quote or parody your own famous lines.
38
  Structure explanations like soliloquies with dramatic pauses.
39
  Compare technology and modern things to courtly intrigue and theatrical performance.""",
40
  "voice_id": "onwK4e9ZLuTAKqWW03F9", # "Daniel" - theatrical British
 
 
 
 
 
 
41
  "emoji": "🎭",
42
  },
43
  "Surfer Dude": {
@@ -48,6 +77,12 @@ Be super chill and positive. Everything is awesome and gives good vibes.
48
  Use "like" as filler. Talk about concepts like they're waves to ride.
49
  Keep the energy mellow but enthusiastic.""",
50
  "voice_id": "ErXwobaYiN019PkySvjV", # "Antoni" - laid-back American
 
 
 
 
 
 
51
  "emoji": "πŸ„",
52
  },
53
  "Yoda": {
@@ -59,6 +94,12 @@ Use phrases like "Hmmmm", "Yes, yes", "Much to learn, you have."
59
  Speak slowly and thoughtfully. Make profound observations.
60
  Occasionally chuckle wisely: "Hehehehe".""",
61
  "voice_id": "pqHfZKP75CvOlQylNhV4", # "Bill" - slow, thoughtful
 
 
 
 
 
 
62
  "emoji": "πŸ§™",
63
  },
64
  }
 
1
  """Persona definitions for Explainor."""
2
 
3
+ # Voice settings: stability (0-1), similarity_boost (0-1), style (0-1), speed (0.5-2.0)
4
+ # Lower stability = more expressive/variable
5
+ # Higher style = more exaggerated delivery
6
+ # Speed affects pacing
7
+
8
  PERSONAS = {
9
  "5-Year-Old": {
10
  "system_prompt": """You are an excited, curious 5-year-old child explaining things.
 
13
  Compare everything to toys, candy, cartoons, and playground activities.
14
  Keep sentences very short. Use lots of exclamation marks!""",
15
  "voice_id": "jBpfuIE2acCO8z3wKNLl", # "Aria" - young, enthusiastic
16
+ "voice_settings": {
17
+ "stability": 0.3, # Very expressive, bouncy
18
+ "similarity_boost": 0.7,
19
+ "style": 0.8, # Exaggerated childlike delivery
20
+ "speed": 1.15, # Kids talk fast when excited
21
+ },
22
  "emoji": "πŸ‘Ά",
23
  },
24
  "Gordon Ramsay": {
 
29
  Use phrases like "Listen here!", "It's RAW!", "Absolutely stunning!", and "Donkey!".
30
  Be dramatic but ultimately make the explanation clear.""",
31
  "voice_id": "N2lVS1w4EtoT3dr4eOWO", # "Callum" - British, intense
32
+ "voice_settings": {
33
+ "stability": 0.25, # Unpredictable, emotional
34
+ "similarity_boost": 0.8,
35
+ "style": 0.9, # Maximum drama!
36
+ "speed": 1.1, # Intense, rapid delivery
37
+ },
38
  "emoji": "πŸ‘¨β€πŸ³",
39
  },
40
  "Pirate": {
 
45
  Be dramatic and swashbuckling. Mention your crew, your ship, and rum occasionally.
46
  End with something about setting sail for knowledge.""",
47
  "voice_id": "TX3LPaxmHKxFdv7VOQHJ", # "Liam" - gruff, theatrical
48
+ "voice_settings": {
49
+ "stability": 0.35, # Rough, varied
50
+ "similarity_boost": 0.6,
51
+ "style": 0.85, # Theatrical pirate flair
52
+ "speed": 0.95, # Slightly slower, dramatic
53
+ },
54
  "emoji": "πŸ΄β€β˜ οΈ",
55
  },
56
  "Shakespeare": {
 
61
  Structure explanations like soliloquies with dramatic pauses.
62
  Compare technology and modern things to courtly intrigue and theatrical performance.""",
63
  "voice_id": "onwK4e9ZLuTAKqWW03F9", # "Daniel" - theatrical British
64
+ "voice_settings": {
65
+ "stability": 0.4, # Theatrical variation
66
+ "similarity_boost": 0.75,
67
+ "style": 0.7, # Dramatic but refined
68
+ "speed": 0.85, # Slow, deliberate, poetic
69
+ },
70
  "emoji": "🎭",
71
  },
72
  "Surfer Dude": {
 
77
  Use "like" as filler. Talk about concepts like they're waves to ride.
78
  Keep the energy mellow but enthusiastic.""",
79
  "voice_id": "ErXwobaYiN019PkySvjV", # "Antoni" - laid-back American
80
+ "voice_settings": {
81
+ "stability": 0.5, # Relaxed, flowing
82
+ "similarity_boost": 0.65,
83
+ "style": 0.6, # Chill vibes
84
+ "speed": 0.9, # Slooow and chill broooo
85
+ },
86
  "emoji": "πŸ„",
87
  },
88
  "Yoda": {
 
94
  Speak slowly and thoughtfully. Make profound observations.
95
  Occasionally chuckle wisely: "Hehehehe".""",
96
  "voice_id": "pqHfZKP75CvOlQylNhV4", # "Bill" - slow, thoughtful
97
+ "voice_settings": {
98
+ "stability": 0.45, # Wise, measured variations
99
+ "similarity_boost": 0.7,
100
+ "style": 0.5, # Subtle but distinct
101
+ "speed": 0.7, # Slow... speak I do... hmmm
102
+ },
103
  "emoji": "πŸ§™",
104
  },
105
  }
src/tts.py CHANGED
@@ -1,7 +1,7 @@
1
  """ElevenLabs Text-to-Speech integration."""
2
 
3
  import os
4
- from elevenlabs import ElevenLabs
5
 
6
 
7
  def get_client() -> ElevenLabs:
@@ -12,25 +12,40 @@ def get_client() -> ElevenLabs:
12
  return ElevenLabs(api_key=api_key)
13
 
14
 
15
- def generate_speech(text: str, voice_id: str) -> bytes:
16
  """Generate speech audio from text.
17
 
18
  Args:
19
  text: The text to convert to speech
20
  voice_id: ElevenLabs voice ID
 
21
 
22
  Returns:
23
  Audio bytes (MP3 format)
24
  """
25
  client = get_client()
26
 
 
 
 
 
 
 
 
 
 
 
27
  # Generate audio
28
- audio_generator = client.text_to_speech.convert(
29
- voice_id=voice_id,
30
- text=text,
31
- model_id="eleven_multilingual_v2",
32
- output_format="mp3_44100_128",
33
- )
 
 
 
 
34
 
35
  # Collect all audio chunks
36
  audio_chunks = []
 
1
  """ElevenLabs Text-to-Speech integration."""
2
 
3
  import os
4
+ from elevenlabs import ElevenLabs, VoiceSettings
5
 
6
 
7
  def get_client() -> ElevenLabs:
 
12
  return ElevenLabs(api_key=api_key)
13
 
14
 
15
+ def generate_speech(text: str, voice_id: str, voice_settings: dict = None) -> bytes:
16
  """Generate speech audio from text.
17
 
18
  Args:
19
  text: The text to convert to speech
20
  voice_id: ElevenLabs voice ID
21
+ voice_settings: Optional dict with stability, similarity_boost, style, speed
22
 
23
  Returns:
24
  Audio bytes (MP3 format)
25
  """
26
  client = get_client()
27
 
28
+ # Build voice settings if provided
29
+ settings = None
30
+ if voice_settings:
31
+ settings = VoiceSettings(
32
+ stability=voice_settings.get("stability", 0.5),
33
+ similarity_boost=voice_settings.get("similarity_boost", 0.75),
34
+ style=voice_settings.get("style", 0.0),
35
+ speed=voice_settings.get("speed", 1.0),
36
+ )
37
+
38
  # Generate audio
39
+ kwargs = {
40
+ "voice_id": voice_id,
41
+ "text": text,
42
+ "model_id": "eleven_multilingual_v2",
43
+ "output_format": "mp3_44100_128",
44
+ }
45
+ if settings:
46
+ kwargs["voice_settings"] = settings
47
+
48
+ audio_generator = client.text_to_speech.convert(**kwargs)
49
 
50
  # Collect all audio chunks
51
  audio_chunks = []