rairo commited on
Commit
95f0c25
Β·
verified Β·
1 Parent(s): ef1babe

Narration Gemini tts

Browse files
Files changed (1) hide show
  1. app.py +84 -7
app.py CHANGED
@@ -6,6 +6,8 @@ from google.genai import types
6
  import re
7
  import time
8
  import os
 
 
9
 
10
  # Disable Streamlit analytics (prevents PermissionError in some environments)
11
  os.environ["STREAMLIT_ANALYTICS_ENABLED"] = "false"
@@ -34,6 +36,7 @@ except Exception as e:
34
  # 1.3 Constants (model IDs, exactly as in original code)
35
  CATEGORY_MODEL = "gemini-2.0-flash-exp"
36
  GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
 
37
 
38
  # 1.4 Helper to parse numbered steps out of Gemini text
39
  def parse_numbered_steps(text):
@@ -46,6 +49,52 @@ def parse_numbered_steps(text):
46
  steps = re.findall(r"\n\s*(\d+)\.\s*(.*)", text, re.MULTILINE)
47
  return [(int(num), desc.strip()) for num, desc in steps]
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # ─────────────────────────────────────────────────────────────────────────────
50
  # 2. SESSION STATE SETUP
51
  # ─────────────────────────────────────────────────────────────────────────────
@@ -56,7 +105,8 @@ if "app_state" not in st.session_state:
56
  "done_flags": {}, "notes": {}, "timers": {}, "category": None,
57
  "prompt_sent": False, "timer_running": {}, "last_tick": {},
58
  "project_title": "", "project_description": "", "upcycling_options": [],
59
- "plan_approved": False, "initial_plan": "", "user_image": None
 
60
  }
61
 
62
  # ─────────────────────────────────────────────────────────────────────────────
@@ -70,7 +120,8 @@ def reset_state():
70
  "done_flags": {}, "notes": {}, "timers": {}, "category": None,
71
  "prompt_sent": False, "timer_running": {}, "last_tick": {},
72
  "project_title": "", "project_description": "", "upcycling_options": [],
73
- "plan_approved": False, "initial_plan": "", "user_image": None
 
74
  }
75
  st.success("βœ… Reset complete!")
76
  st.rerun()
@@ -224,13 +275,39 @@ def render_sidebar_navigation():
224
  def render_tools_list():
225
  if st.session_state.app_state['tools_list']:
226
  with st.expander("πŸ”§ Required Tools & Materials", expanded=True):
227
- for item in st.session_state.app_state['tools_list']:
228
- st.markdown(f"- {item}")
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
  def render_step(idx, text):
231
  total = len(st.session_state.app_state['steps'])
232
  st.markdown(f"### Step {idx} of {total}")
233
- st.write(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
  if idx in st.session_state.app_state['images']:
236
  st.image(
@@ -265,7 +342,7 @@ with st.expander("ℹ️ How it works", expanded=False):
265
  2. **(Optional) Describe your goal** for more accurate results.
266
  3. **Review the Plan.** The AI will propose a plan. If you didn't provide a description, you'll be asked to approve it.
267
  4. **Get Your Guide** with tools and illustrated step-by-step instructions.
268
- 5. **Follow the Steps** using the interactive checklist.
269
  """)
270
 
271
  if not st.session_state.app_state['prompt_sent']:
@@ -273,7 +350,7 @@ if not st.session_state.app_state['prompt_sent']:
273
  col1, col2 = st.columns([3, 1])
274
  with col1:
275
  uploaded_image = st.file_uploader("πŸ“· Upload a photo of your project", type=["jpg", "jpeg", "png"])
276
- context_text = st.text_area("✏️ Describe the issue or your goal (optional but recommended)", height=80, placeholder="e.g., 'My toaster won’t turn on,' or 'How do I build a desk like this?'")
277
  with col2:
278
  st.markdown("### Actions")
279
  if st.button("πŸš€ Get AI Guidance", type="primary", use_container_width=True):
 
6
  import re
7
  import time
8
  import os
9
+ import wave
10
+ import base64
11
 
12
  # Disable Streamlit analytics (prevents PermissionError in some environments)
13
  os.environ["STREAMLIT_ANALYTICS_ENABLED"] = "false"
 
36
  # 1.3 Constants (model IDs, exactly as in original code)
37
  CATEGORY_MODEL = "gemini-2.0-flash-exp"
38
  GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
39
+ TTS_MODEL = "gemini-2.5-flash-preview-tts"
40
 
41
  # 1.4 Helper to parse numbered steps out of Gemini text
42
  def parse_numbered_steps(text):
 
49
  steps = re.findall(r"\n\s*(\d+)\.\s*(.*)", text, re.MULTILINE)
50
  return [(int(num), desc.strip()) for num, desc in steps]
51
 
52
+ # 1.5 TTS Helper Functions
53
+ def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
54
+ """Create a wave file from PCM data"""
55
+ with wave.open(filename, "wb") as wf:
56
+ wf.setnchannels(channels)
57
+ wf.setsampwidth(sample_width)
58
+ wf.setframerate(rate)
59
+ wf.writeframes(pcm)
60
+
61
+ def generate_speech(text, voice_name='Kore'):
62
+ """Generate speech from text using Gemini TTS"""
63
+ try:
64
+ response = client.models.generate_content(
65
+ model=TTS_MODEL,
66
+ contents=f"Say in a clear, helpful tone: {text}",
67
+ config=types.GenerateContentConfig(
68
+ response_modalities=["AUDIO"],
69
+ speech_config=types.SpeechConfig(
70
+ voice_config=types.VoiceConfig(
71
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
72
+ voice_name=voice_name,
73
+ )
74
+ )
75
+ ),
76
+ )
77
+ )
78
+
79
+ audio_data = response.candidates[0].content.parts[0].inline_data.data
80
+ return audio_data
81
+ except Exception as e:
82
+ st.error(f"TTS generation failed: {str(e)}")
83
+ return None
84
+
85
+ def create_audio_player(audio_data, key):
86
+ """Create an audio player widget for the generated speech"""
87
+ if audio_data:
88
+ # Convert audio data to base64 for HTML audio player
89
+ audio_b64 = base64.b64encode(audio_data).decode()
90
+ audio_html = f"""
91
+ <audio controls style="width: 100%;">
92
+ <source src="data:audio/wav;base64,{audio_b64}" type="audio/wav">
93
+ Your browser does not support the audio element.
94
+ </audio>
95
+ """
96
+ st.markdown(audio_html, unsafe_allow_html=True)
97
+
98
  # ─────────────────────────────────────────────────────────────────────────────
99
  # 2. SESSION STATE SETUP
100
  # ─────────────────────────────────────────────────────────────────────────────
 
105
  "done_flags": {}, "notes": {}, "timers": {}, "category": None,
106
  "prompt_sent": False, "timer_running": {}, "last_tick": {},
107
  "project_title": "", "project_description": "", "upcycling_options": [],
108
+ "plan_approved": False, "initial_plan": "", "user_image": None,
109
+ "audio_cache": {} # Cache for generated audio
110
  }
111
 
112
  # ─────────────────────────────────────────────────────────────────────────────
 
120
  "done_flags": {}, "notes": {}, "timers": {}, "category": None,
121
  "prompt_sent": False, "timer_running": {}, "last_tick": {},
122
  "project_title": "", "project_description": "", "upcycling_options": [],
123
+ "plan_approved": False, "initial_plan": "", "user_image": None,
124
+ "audio_cache": {}
125
  }
126
  st.success("βœ… Reset complete!")
127
  st.rerun()
 
275
  def render_tools_list():
276
  if st.session_state.app_state['tools_list']:
277
  with st.expander("πŸ”§ Required Tools & Materials", expanded=True):
278
+ # Add narration button for tools list
279
+ col1, col2 = st.columns([4, 1])
280
+ with col1:
281
+ for item in st.session_state.app_state['tools_list']:
282
+ st.markdown(f"- {item}")
283
+ with col2:
284
+ if st.button("πŸ”Š Narrate Tools", key="narrate_tools"):
285
+ tools_text = "Here are the required tools and materials: " + ", ".join(st.session_state.app_state['tools_list'])
286
+ if 'tools_audio' not in st.session_state.app_state['audio_cache']:
287
+ with st.spinner("Generating narration..."):
288
+ st.session_state.app_state['audio_cache']['tools_audio'] = generate_speech(tools_text)
289
+
290
+ if st.session_state.app_state['audio_cache']['tools_audio']:
291
+ create_audio_player(st.session_state.app_state['audio_cache']['tools_audio'], "tools_player")
292
 
293
  def render_step(idx, text):
294
  total = len(st.session_state.app_state['steps'])
295
  st.markdown(f"### Step {idx} of {total}")
296
+
297
+ # Add narration button for each step
298
+ col1, col2 = st.columns([4, 1])
299
+ with col1:
300
+ st.write(text)
301
+ with col2:
302
+ if st.button("πŸ”Š Narrate", key=f"narrate_step_{idx}"):
303
+ audio_key = f'step_{idx}_audio'
304
+ if audio_key not in st.session_state.app_state['audio_cache']:
305
+ with st.spinner("Generating narration..."):
306
+ step_text = f"Step {idx}: {text}"
307
+ st.session_state.app_state['audio_cache'][audio_key] = generate_speech(step_text)
308
+
309
+ if st.session_state.app_state['audio_cache'][audio_key]:
310
+ create_audio_player(st.session_state.app_state['audio_cache'][audio_key], f"step_{idx}_player")
311
 
312
  if idx in st.session_state.app_state['images']:
313
  st.image(
 
342
  2. **(Optional) Describe your goal** for more accurate results.
343
  3. **Review the Plan.** The AI will propose a plan. If you didn't provide a description, you'll be asked to approve it.
344
  4. **Get Your Guide** with tools and illustrated step-by-step instructions.
345
+ 5. **Follow the Steps** using the interactive checklist with audio narration.
346
  """)
347
 
348
  if not st.session_state.app_state['prompt_sent']:
 
350
  col1, col2 = st.columns([3, 1])
351
  with col1:
352
  uploaded_image = st.file_uploader("πŸ“· Upload a photo of your project", type=["jpg", "jpeg", "png"])
353
+ context_text = st.text_area("✏️ Describe the issue or your goal (optional but recommended)", height=80, placeholder="e.g., 'My toaster won't turn on,' or 'How do I build a desk like this?'")
354
  with col2:
355
  st.markdown("### Actions")
356
  if st.button("πŸš€ Get AI Guidance", type="primary", use_container_width=True):