Narration Gemini tts
Browse files
app.py
CHANGED
|
@@ -6,6 +6,8 @@ from google.genai import types
|
|
| 6 |
import re
|
| 7 |
import time
|
| 8 |
import os
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# Disable Streamlit analytics (prevents PermissionError in some environments)
|
| 11 |
os.environ["STREAMLIT_ANALYTICS_ENABLED"] = "false"
|
|
@@ -34,6 +36,7 @@ except Exception as e:
|
|
| 34 |
# 1.3 Constants (model IDs, exactly as in original code)
|
| 35 |
CATEGORY_MODEL = "gemini-2.0-flash-exp"
|
| 36 |
GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
|
|
|
|
| 37 |
|
| 38 |
# 1.4 Helper to parse numbered steps out of Gemini text
|
| 39 |
def parse_numbered_steps(text):
|
|
@@ -46,6 +49,52 @@ def parse_numbered_steps(text):
|
|
| 46 |
steps = re.findall(r"\n\s*(\d+)\.\s*(.*)", text, re.MULTILINE)
|
| 47 |
return [(int(num), desc.strip()) for num, desc in steps]
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
# 2. SESSION STATE SETUP
|
| 51 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -56,7 +105,8 @@ if "app_state" not in st.session_state:
|
|
| 56 |
"done_flags": {}, "notes": {}, "timers": {}, "category": None,
|
| 57 |
"prompt_sent": False, "timer_running": {}, "last_tick": {},
|
| 58 |
"project_title": "", "project_description": "", "upcycling_options": [],
|
| 59 |
-
"plan_approved": False, "initial_plan": "", "user_image": None
|
|
|
|
| 60 |
}
|
| 61 |
|
| 62 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -70,7 +120,8 @@ def reset_state():
|
|
| 70 |
"done_flags": {}, "notes": {}, "timers": {}, "category": None,
|
| 71 |
"prompt_sent": False, "timer_running": {}, "last_tick": {},
|
| 72 |
"project_title": "", "project_description": "", "upcycling_options": [],
|
| 73 |
-
"plan_approved": False, "initial_plan": "", "user_image": None
|
|
|
|
| 74 |
}
|
| 75 |
st.success("β
Reset complete!")
|
| 76 |
st.rerun()
|
|
@@ -224,13 +275,39 @@ def render_sidebar_navigation():
|
|
| 224 |
def render_tools_list():
|
| 225 |
if st.session_state.app_state['tools_list']:
|
| 226 |
with st.expander("π§ Required Tools & Materials", expanded=True):
|
| 227 |
-
for
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
def render_step(idx, text):
|
| 231 |
total = len(st.session_state.app_state['steps'])
|
| 232 |
st.markdown(f"### Step {idx} of {total}")
|
| 233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
if idx in st.session_state.app_state['images']:
|
| 236 |
st.image(
|
|
@@ -265,7 +342,7 @@ with st.expander("βΉοΈ How it works", expanded=False):
|
|
| 265 |
2. **(Optional) Describe your goal** for more accurate results.
|
| 266 |
3. **Review the Plan.** The AI will propose a plan. If you didn't provide a description, you'll be asked to approve it.
|
| 267 |
4. **Get Your Guide** with tools and illustrated step-by-step instructions.
|
| 268 |
-
5. **Follow the Steps** using the interactive checklist.
|
| 269 |
""")
|
| 270 |
|
| 271 |
if not st.session_state.app_state['prompt_sent']:
|
|
@@ -273,7 +350,7 @@ if not st.session_state.app_state['prompt_sent']:
|
|
| 273 |
col1, col2 = st.columns([3, 1])
|
| 274 |
with col1:
|
| 275 |
uploaded_image = st.file_uploader("π· Upload a photo of your project", type=["jpg", "jpeg", "png"])
|
| 276 |
-
context_text = st.text_area("βοΈ Describe the issue or your goal (optional but recommended)", height=80, placeholder="e.g., 'My toaster won
|
| 277 |
with col2:
|
| 278 |
st.markdown("### Actions")
|
| 279 |
if st.button("π Get AI Guidance", type="primary", use_container_width=True):
|
|
|
|
| 6 |
import re
|
| 7 |
import time
|
| 8 |
import os
|
| 9 |
+
import wave
|
| 10 |
+
import base64
|
| 11 |
|
| 12 |
# Disable Streamlit analytics (prevents PermissionError in some environments)
|
| 13 |
os.environ["STREAMLIT_ANALYTICS_ENABLED"] = "false"
|
|
|
|
| 36 |
# 1.3 Constants (model IDs, exactly as in original code)
|
| 37 |
CATEGORY_MODEL = "gemini-2.0-flash-exp"
|
| 38 |
GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
|
| 39 |
+
TTS_MODEL = "gemini-2.5-flash-preview-tts"
|
| 40 |
|
| 41 |
# 1.4 Helper to parse numbered steps out of Gemini text
|
| 42 |
def parse_numbered_steps(text):
|
|
|
|
| 49 |
steps = re.findall(r"\n\s*(\d+)\.\s*(.*)", text, re.MULTILINE)
|
| 50 |
return [(int(num), desc.strip()) for num, desc in steps]
|
| 51 |
|
| 52 |
+
# 1.5 TTS Helper Functions
|
| 53 |
+
def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
|
| 54 |
+
"""Create a wave file from PCM data"""
|
| 55 |
+
with wave.open(filename, "wb") as wf:
|
| 56 |
+
wf.setnchannels(channels)
|
| 57 |
+
wf.setsampwidth(sample_width)
|
| 58 |
+
wf.setframerate(rate)
|
| 59 |
+
wf.writeframes(pcm)
|
| 60 |
+
|
| 61 |
+
def generate_speech(text, voice_name='Kore'):
|
| 62 |
+
"""Generate speech from text using Gemini TTS"""
|
| 63 |
+
try:
|
| 64 |
+
response = client.models.generate_content(
|
| 65 |
+
model=TTS_MODEL,
|
| 66 |
+
contents=f"Say in a clear, helpful tone: {text}",
|
| 67 |
+
config=types.GenerateContentConfig(
|
| 68 |
+
response_modalities=["AUDIO"],
|
| 69 |
+
speech_config=types.SpeechConfig(
|
| 70 |
+
voice_config=types.VoiceConfig(
|
| 71 |
+
prebuilt_voice_config=types.PrebuiltVoiceConfig(
|
| 72 |
+
voice_name=voice_name,
|
| 73 |
+
)
|
| 74 |
+
)
|
| 75 |
+
),
|
| 76 |
+
)
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
audio_data = response.candidates[0].content.parts[0].inline_data.data
|
| 80 |
+
return audio_data
|
| 81 |
+
except Exception as e:
|
| 82 |
+
st.error(f"TTS generation failed: {str(e)}")
|
| 83 |
+
return None
|
| 84 |
+
|
| 85 |
+
def create_audio_player(audio_data, key):
|
| 86 |
+
"""Create an audio player widget for the generated speech"""
|
| 87 |
+
if audio_data:
|
| 88 |
+
# Convert audio data to base64 for HTML audio player
|
| 89 |
+
audio_b64 = base64.b64encode(audio_data).decode()
|
| 90 |
+
audio_html = f"""
|
| 91 |
+
<audio controls style="width: 100%;">
|
| 92 |
+
<source src="data:audio/wav;base64,{audio_b64}" type="audio/wav">
|
| 93 |
+
Your browser does not support the audio element.
|
| 94 |
+
</audio>
|
| 95 |
+
"""
|
| 96 |
+
st.markdown(audio_html, unsafe_allow_html=True)
|
| 97 |
+
|
| 98 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 99 |
# 2. SESSION STATE SETUP
|
| 100 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 105 |
"done_flags": {}, "notes": {}, "timers": {}, "category": None,
|
| 106 |
"prompt_sent": False, "timer_running": {}, "last_tick": {},
|
| 107 |
"project_title": "", "project_description": "", "upcycling_options": [],
|
| 108 |
+
"plan_approved": False, "initial_plan": "", "user_image": None,
|
| 109 |
+
"audio_cache": {} # Cache for generated audio
|
| 110 |
}
|
| 111 |
|
| 112 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 120 |
"done_flags": {}, "notes": {}, "timers": {}, "category": None,
|
| 121 |
"prompt_sent": False, "timer_running": {}, "last_tick": {},
|
| 122 |
"project_title": "", "project_description": "", "upcycling_options": [],
|
| 123 |
+
"plan_approved": False, "initial_plan": "", "user_image": None,
|
| 124 |
+
"audio_cache": {}
|
| 125 |
}
|
| 126 |
st.success("β
Reset complete!")
|
| 127 |
st.rerun()
|
|
|
|
| 275 |
def render_tools_list():
|
| 276 |
if st.session_state.app_state['tools_list']:
|
| 277 |
with st.expander("π§ Required Tools & Materials", expanded=True):
|
| 278 |
+
# Add narration button for tools list
|
| 279 |
+
col1, col2 = st.columns([4, 1])
|
| 280 |
+
with col1:
|
| 281 |
+
for item in st.session_state.app_state['tools_list']:
|
| 282 |
+
st.markdown(f"- {item}")
|
| 283 |
+
with col2:
|
| 284 |
+
if st.button("π Narrate Tools", key="narrate_tools"):
|
| 285 |
+
tools_text = "Here are the required tools and materials: " + ", ".join(st.session_state.app_state['tools_list'])
|
| 286 |
+
if 'tools_audio' not in st.session_state.app_state['audio_cache']:
|
| 287 |
+
with st.spinner("Generating narration..."):
|
| 288 |
+
st.session_state.app_state['audio_cache']['tools_audio'] = generate_speech(tools_text)
|
| 289 |
+
|
| 290 |
+
if st.session_state.app_state['audio_cache']['tools_audio']:
|
| 291 |
+
create_audio_player(st.session_state.app_state['audio_cache']['tools_audio'], "tools_player")
|
| 292 |
|
| 293 |
def render_step(idx, text):
|
| 294 |
total = len(st.session_state.app_state['steps'])
|
| 295 |
st.markdown(f"### Step {idx} of {total}")
|
| 296 |
+
|
| 297 |
+
# Add narration button for each step
|
| 298 |
+
col1, col2 = st.columns([4, 1])
|
| 299 |
+
with col1:
|
| 300 |
+
st.write(text)
|
| 301 |
+
with col2:
|
| 302 |
+
if st.button("π Narrate", key=f"narrate_step_{idx}"):
|
| 303 |
+
audio_key = f'step_{idx}_audio'
|
| 304 |
+
if audio_key not in st.session_state.app_state['audio_cache']:
|
| 305 |
+
with st.spinner("Generating narration..."):
|
| 306 |
+
step_text = f"Step {idx}: {text}"
|
| 307 |
+
st.session_state.app_state['audio_cache'][audio_key] = generate_speech(step_text)
|
| 308 |
+
|
| 309 |
+
if st.session_state.app_state['audio_cache'][audio_key]:
|
| 310 |
+
create_audio_player(st.session_state.app_state['audio_cache'][audio_key], f"step_{idx}_player")
|
| 311 |
|
| 312 |
if idx in st.session_state.app_state['images']:
|
| 313 |
st.image(
|
|
|
|
| 342 |
2. **(Optional) Describe your goal** for more accurate results.
|
| 343 |
3. **Review the Plan.** The AI will propose a plan. If you didn't provide a description, you'll be asked to approve it.
|
| 344 |
4. **Get Your Guide** with tools and illustrated step-by-step instructions.
|
| 345 |
+
5. **Follow the Steps** using the interactive checklist with audio narration.
|
| 346 |
""")
|
| 347 |
|
| 348 |
if not st.session_state.app_state['prompt_sent']:
|
|
|
|
| 350 |
col1, col2 = st.columns([3, 1])
|
| 351 |
with col1:
|
| 352 |
uploaded_image = st.file_uploader("π· Upload a photo of your project", type=["jpg", "jpeg", "png"])
|
| 353 |
+
context_text = st.text_area("βοΈ Describe the issue or your goal (optional but recommended)", height=80, placeholder="e.g., 'My toaster won't turn on,' or 'How do I build a desk like this?'")
|
| 354 |
with col2:
|
| 355 |
st.markdown("### Actions")
|
| 356 |
if st.button("π Get AI Guidance", type="primary", use_container_width=True):
|