Emperor555 Claude commited on
Commit
82c1b69
Β·
1 Parent(s): 3770d7a

Replace audio checkbox with Read Aloud button

Browse files

- Separate TTS from main explanation flow
- Add "Read Aloud" button to generate audio on demand
- Audio autoplays when generated
- Cleaner UX: get explanation first, then optionally hear it

πŸ€– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +59 -40
app.py CHANGED
@@ -43,15 +43,14 @@ def format_mcp_tools(tools: list[dict]) -> str:
43
  return md
44
 
45
 
46
- def explain_topic(topic: str, persona_name: str, audience: str = "", generate_audio: bool = False, progress=gr.Progress()):
47
  """Main function to explain a topic in a persona's voice.
48
 
49
- Returns: (explanation_text, audio_path, sources_md, steps_md, mcp_md)
50
  """
51
  if not topic.strip():
52
  return (
53
  "Please enter a topic to explain!",
54
- None,
55
  "",
56
  "❌ No topic provided",
57
  "",
@@ -63,8 +62,6 @@ def explain_topic(topic: str, persona_name: str, audience: str = "", generate_au
63
  steps_log = []
64
  explanation = ""
65
  sources = []
66
- voice_id = None
67
- voice_settings = None
68
  mcp_tools = []
69
 
70
  # Run the agent pipeline
@@ -87,41 +84,50 @@ def explain_topic(topic: str, persona_name: str, audience: str = "", generate_au
87
  elif update["type"] == "result":
88
  explanation = update["explanation"]
89
  sources = update.get("sources", sources)
90
- voice_id = update["voice_id"]
91
- voice_settings = update.get("voice_settings")
92
  mcp_tools = update.get("mcp_tools", [])
93
- progress(0.8, desc="Explanation ready!")
94
 
95
  # Format the steps log
96
  steps_md = "\n\n---\n\n".join(steps_log)
97
 
98
- # Generate audio only if checkbox is checked
99
- audio_path = None
100
- if generate_audio and explanation and voice_id:
101
- progress(0.9, desc="Generating audio...")
102
- try:
103
- audio_bytes = generate_speech(explanation, voice_id, voice_settings)
104
- # Save to temp file for Gradio
105
- with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
106
- f.write(audio_bytes)
107
- audio_path = f.name
108
- # Add text_to_speech tool
109
- mcp_tools.append({"name": "text_to_speech", "icon": "πŸ”Š", "desc": "Audio generation via ElevenLabs API"})
110
- progress(1.0, desc="Done!")
111
- except Exception as e:
112
- steps_log.append(f"**⚠️ Audio generation failed**\n{str(e)}")
113
- steps_md = "\n\n---\n\n".join(steps_log)
114
- progress(1.0, desc="Done (no audio)")
115
- else:
116
- progress(1.0, desc="Done!")
117
-
118
  # Format sources
119
  sources_md = format_sources(sources)
120
 
121
  # Format MCP tools
122
  mcp_md = format_mcp_tools(mcp_tools)
123
 
124
- return explanation, audio_path, sources_md, steps_md, mcp_md
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
 
127
  # Build the Gradio interface
@@ -196,14 +202,15 @@ def create_app():
196
  max_lines=15,
197
  )
198
 
199
- audio_checkbox = gr.Checkbox(
200
- label="πŸ”Š Generate audio",
201
- value=False,
 
202
  )
203
  audio_output = gr.Audio(
204
  label="πŸ”Š Listen to the explanation",
205
  type="filepath",
206
- autoplay=False,
207
  )
208
 
209
  with gr.Row():
@@ -246,27 +253,39 @@ def create_app():
246
  """
247
  )
248
 
249
- # Event handler
250
- def process_and_explain(topic, persona_with_emoji, gen_audio, audience_with_emoji):
251
  # Extract persona name (remove emoji prefix)
252
  persona_name = persona_with_emoji.split(" ", 1)[1] if " " in persona_with_emoji else persona_with_emoji
253
  # Extract audience (remove emoji prefix), skip if "Just me"
254
  audience = ""
255
  if audience_with_emoji and "Just me" not in audience_with_emoji:
256
  audience = audience_with_emoji.split(" ", 1)[1] if " " in audience_with_emoji else audience_with_emoji
257
- return explain_topic(topic, persona_name, audience, gen_audio)
258
 
259
  explain_btn.click(
260
  fn=process_and_explain,
261
- inputs=[topic_input, persona_dropdown, audio_checkbox, audience_dropdown],
262
- outputs=[explanation_output, audio_output, sources_output, steps_output, mcp_output],
263
  )
264
 
265
  # Also trigger on Enter key in topic input
266
  topic_input.submit(
267
  fn=process_and_explain,
268
- inputs=[topic_input, persona_dropdown, audio_checkbox, audience_dropdown],
269
- outputs=[explanation_output, audio_output, sources_output, steps_output, mcp_output],
 
 
 
 
 
 
 
 
 
 
 
 
270
  )
271
 
272
  return app
 
43
  return md
44
 
45
 
46
+ def explain_topic(topic: str, persona_name: str, audience: str = "", progress=gr.Progress()):
47
  """Main function to explain a topic in a persona's voice.
48
 
49
+ Returns: (explanation_text, sources_md, steps_md, mcp_md)
50
  """
51
  if not topic.strip():
52
  return (
53
  "Please enter a topic to explain!",
 
54
  "",
55
  "❌ No topic provided",
56
  "",
 
62
  steps_log = []
63
  explanation = ""
64
  sources = []
 
 
65
  mcp_tools = []
66
 
67
  # Run the agent pipeline
 
84
  elif update["type"] == "result":
85
  explanation = update["explanation"]
86
  sources = update.get("sources", sources)
 
 
87
  mcp_tools = update.get("mcp_tools", [])
88
+ progress(1.0, desc="Done!")
89
 
90
  # Format the steps log
91
  steps_md = "\n\n---\n\n".join(steps_log)
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  # Format sources
94
  sources_md = format_sources(sources)
95
 
96
  # Format MCP tools
97
  mcp_md = format_mcp_tools(mcp_tools)
98
 
99
+ return explanation, sources_md, steps_md, mcp_md
100
+
101
+
102
+ def generate_audio(explanation: str, persona_name: str, progress=gr.Progress()):
103
+ """Generate audio from the explanation text.
104
+
105
+ Returns: audio_path
106
+ """
107
+ if not explanation or not explanation.strip():
108
+ return None
109
+
110
+ if not persona_name:
111
+ persona_name = "5-Year-Old"
112
+
113
+ # Get persona voice settings
114
+ persona = get_persona(persona_name)
115
+ voice_id = persona["voice_id"]
116
+ voice_settings = persona.get("voice_settings")
117
+
118
+ progress(0.3, desc="Generating audio...")
119
+
120
+ try:
121
+ audio_bytes = generate_speech(explanation, voice_id, voice_settings)
122
+ # Save to temp file for Gradio
123
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
124
+ f.write(audio_bytes)
125
+ audio_path = f.name
126
+ progress(1.0, desc="Audio ready!")
127
+ return audio_path
128
+ except Exception as e:
129
+ progress(1.0, desc="Audio failed")
130
+ raise gr.Error(f"Audio generation failed: {str(e)}")
131
 
132
 
133
  # Build the Gradio interface
 
202
  max_lines=15,
203
  )
204
 
205
+ read_aloud_btn = gr.Button(
206
+ "πŸ”Š Read Aloud",
207
+ variant="secondary",
208
+ size="sm",
209
  )
210
  audio_output = gr.Audio(
211
  label="πŸ”Š Listen to the explanation",
212
  type="filepath",
213
+ autoplay=True,
214
  )
215
 
216
  with gr.Row():
 
253
  """
254
  )
255
 
256
+ # Event handler for explanation
257
+ def process_and_explain(topic, persona_with_emoji, audience_with_emoji):
258
  # Extract persona name (remove emoji prefix)
259
  persona_name = persona_with_emoji.split(" ", 1)[1] if " " in persona_with_emoji else persona_with_emoji
260
  # Extract audience (remove emoji prefix), skip if "Just me"
261
  audience = ""
262
  if audience_with_emoji and "Just me" not in audience_with_emoji:
263
  audience = audience_with_emoji.split(" ", 1)[1] if " " in audience_with_emoji else audience_with_emoji
264
+ return explain_topic(topic, persona_name, audience)
265
 
266
  explain_btn.click(
267
  fn=process_and_explain,
268
+ inputs=[topic_input, persona_dropdown, audience_dropdown],
269
+ outputs=[explanation_output, sources_output, steps_output, mcp_output],
270
  )
271
 
272
  # Also trigger on Enter key in topic input
273
  topic_input.submit(
274
  fn=process_and_explain,
275
+ inputs=[topic_input, persona_dropdown, audience_dropdown],
276
+ outputs=[explanation_output, sources_output, steps_output, mcp_output],
277
+ )
278
+
279
+ # Event handler for audio generation
280
+ def process_audio(explanation, persona_with_emoji):
281
+ # Extract persona name (remove emoji prefix)
282
+ persona_name = persona_with_emoji.split(" ", 1)[1] if " " in persona_with_emoji else persona_with_emoji
283
+ return generate_audio(explanation, persona_name)
284
+
285
+ read_aloud_btn.click(
286
+ fn=process_audio,
287
+ inputs=[explanation_output, persona_dropdown],
288
+ outputs=[audio_output],
289
  )
290
 
291
  return app