NeoTest

Sleeping

App Files Files Community

rairo commited on Jun 6, 2025

Commit

95f0c25

verified ·

1 Parent(s): ef1babe

Narration Gemini tts

Browse files

Files changed (1) hide show

app.py +84 -7

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ from google.genai import types
 import re
 import time
 import os
 # Disable Streamlit analytics (prevents PermissionError in some environments)
 os.environ["STREAMLIT_ANALYTICS_ENABLED"] = "false"
@@ -34,6 +36,7 @@ except Exception as e:
 # 1.3 Constants (model IDs, exactly as in original code)
 CATEGORY_MODEL   = "gemini-2.0-flash-exp"
 GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
 # 1.4 Helper to parse numbered steps out of Gemini text
 def parse_numbered_steps(text):
@@ -46,6 +49,52 @@ def parse_numbered_steps(text):
     steps = re.findall(r"\n\s*(\d+)\.\s*(.*)", text, re.MULTILINE)
     return [(int(num), desc.strip()) for num, desc in steps]
 # ─────────────────────────────────────────────────────────────────────────────
 # 2. SESSION STATE SETUP
 # ─────────────────────────────────────────────────────────────────────────────
@@ -56,7 +105,8 @@ if "app_state" not in st.session_state:
         "done_flags": {}, "notes": {}, "timers": {}, "category": None,
         "prompt_sent": False, "timer_running": {}, "last_tick": {},
         "project_title": "", "project_description": "", "upcycling_options": [],
-        "plan_approved": False, "initial_plan": "", "user_image": None
     }
 # ─────────────────────────────────────────────────────────────────────────────
@@ -70,7 +120,8 @@ def reset_state():
         "done_flags": {}, "notes": {}, "timers": {}, "category": None,
         "prompt_sent": False, "timer_running": {}, "last_tick": {},
         "project_title": "", "project_description": "", "upcycling_options": [],
-        "plan_approved": False, "initial_plan": "", "user_image": None
     }
     st.success("✅ Reset complete!")
     st.rerun()
@@ -224,13 +275,39 @@ def render_sidebar_navigation():
 def render_tools_list():
     if st.session_state.app_state['tools_list']:
         with st.expander("🔧 Required Tools & Materials", expanded=True):
-            for item in st.session_state.app_state['tools_list']:
-                st.markdown(f"- {item}")
 def render_step(idx, text):
     total = len(st.session_state.app_state['steps'])
     st.markdown(f"### Step {idx} of {total}")
-    st.write(text)
     if idx in st.session_state.app_state['images']:
         st.image(
@@ -265,7 +342,7 @@ with st.expander("ℹ️ How it works", expanded=False):
     2.  **(Optional) Describe your goal** for more accurate results.
     3.  **Review the Plan.** The AI will propose a plan. If you didn't provide a description, you'll be asked to approve it.
     4.  **Get Your Guide** with tools and illustrated step-by-step instructions.
-    5.  **Follow the Steps** using the interactive checklist.
     """)
 if not st.session_state.app_state['prompt_sent']:
@@ -273,7 +350,7 @@ if not st.session_state.app_state['prompt_sent']:
     col1, col2 = st.columns([3, 1])
     with col1:
         uploaded_image = st.file_uploader("📷 Upload a photo of your project", type=["jpg", "jpeg", "png"])
-        context_text = st.text_area("✏️ Describe the issue or your goal (optional but recommended)", height=80, placeholder="e.g., 'My toaster won’t turn on,' or 'How do I build a desk like this?'")
     with col2:
         st.markdown("### Actions")
         if st.button("🚀 Get AI Guidance", type="primary", use_container_width=True):

 import re
 import time
 import os
+import wave
+import base64
 # Disable Streamlit analytics (prevents PermissionError in some environments)
 os.environ["STREAMLIT_ANALYTICS_ENABLED"] = "false"
 # 1.3 Constants (model IDs, exactly as in original code)
 CATEGORY_MODEL   = "gemini-2.0-flash-exp"
 GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
+TTS_MODEL = "gemini-2.5-flash-preview-tts"
 # 1.4 Helper to parse numbered steps out of Gemini text
 def parse_numbered_steps(text):
     steps = re.findall(r"\n\s*(\d+)\.\s*(.*)", text, re.MULTILINE)
     return [(int(num), desc.strip()) for num, desc in steps]
+# 1.5 TTS Helper Functions
+def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
+    """Create a wave file from PCM data"""
+    with wave.open(filename, "wb") as wf:
+        wf.setnchannels(channels)
+        wf.setsampwidth(sample_width)
+        wf.setframerate(rate)
+        wf.writeframes(pcm)
+def generate_speech(text, voice_name='Kore'):
+    """Generate speech from text using Gemini TTS"""
+    try:
+        response = client.models.generate_content(
+            model=TTS_MODEL,
+            contents=f"Say in a clear, helpful tone: {text}",
+            config=types.GenerateContentConfig(
+                response_modalities=["AUDIO"],
+                speech_config=types.SpeechConfig(
+                    voice_config=types.VoiceConfig(
+                        prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                            voice_name=voice_name,
+                        )
+                    )
+                ),
+            )
+        )
+        audio_data = response.candidates[0].content.parts[0].inline_data.data
+        return audio_data
+    except Exception as e:
+        st.error(f"TTS generation failed: {str(e)}")
+        return None
+def create_audio_player(audio_data, key):
+    """Create an audio player widget for the generated speech"""
+    if audio_data:
+        # Convert audio data to base64 for HTML audio player
+        audio_b64 = base64.b64encode(audio_data).decode()
+        audio_html = f"""
+        <audio controls style="width: 100%;">
+            <source src="data:audio/wav;base64,{audio_b64}" type="audio/wav">
+            Your browser does not support the audio element.
+        </audio>
+        """
+        st.markdown(audio_html, unsafe_allow_html=True)
 # ─────────────────────────────────────────────────────────────────────────────
 # 2. SESSION STATE SETUP
 # ─────────────────────────────────────────────────────────────────────────────
         "done_flags": {}, "notes": {}, "timers": {}, "category": None,
         "prompt_sent": False, "timer_running": {}, "last_tick": {},
         "project_title": "", "project_description": "", "upcycling_options": [],
+        "plan_approved": False, "initial_plan": "", "user_image": None,
+        "audio_cache": {}  # Cache for generated audio
     }
 # ─────────────────────────────────────────────────────────────────────────────
         "done_flags": {}, "notes": {}, "timers": {}, "category": None,
         "prompt_sent": False, "timer_running": {}, "last_tick": {},
         "project_title": "", "project_description": "", "upcycling_options": [],
+        "plan_approved": False, "initial_plan": "", "user_image": None,
+        "audio_cache": {}
     }
     st.success("✅ Reset complete!")
     st.rerun()
 def render_tools_list():
     if st.session_state.app_state['tools_list']:
         with st.expander("🔧 Required Tools & Materials", expanded=True):
+            # Add narration button for tools list
+            col1, col2 = st.columns([4, 1])
+            with col1:
+                for item in st.session_state.app_state['tools_list']:
+                    st.markdown(f"- {item}")
+            with col2:
+                if st.button("🔊 Narrate Tools", key="narrate_tools"):
+                    tools_text = "Here are the required tools and materials: " + ", ".join(st.session_state.app_state['tools_list'])
+                    if 'tools_audio' not in st.session_state.app_state['audio_cache']:
+                        with st.spinner("Generating narration..."):
+                            st.session_state.app_state['audio_cache']['tools_audio'] = generate_speech(tools_text)
+                    if st.session_state.app_state['audio_cache']['tools_audio']:
+                        create_audio_player(st.session_state.app_state['audio_cache']['tools_audio'], "tools_player")
 def render_step(idx, text):
     total = len(st.session_state.app_state['steps'])
     st.markdown(f"### Step {idx} of {total}")
+    # Add narration button for each step
+    col1, col2 = st.columns([4, 1])
+    with col1:
+        st.write(text)
+    with col2:
+        if st.button("🔊 Narrate", key=f"narrate_step_{idx}"):
+            audio_key = f'step_{idx}_audio'
+            if audio_key not in st.session_state.app_state['audio_cache']:
+                with st.spinner("Generating narration..."):
+                    step_text = f"Step {idx}: {text}"
+                    st.session_state.app_state['audio_cache'][audio_key] = generate_speech(step_text)
+            if st.session_state.app_state['audio_cache'][audio_key]:
+                create_audio_player(st.session_state.app_state['audio_cache'][audio_key], f"step_{idx}_player")
     if idx in st.session_state.app_state['images']:
         st.image(
     2.  **(Optional) Describe your goal** for more accurate results.
     3.  **Review the Plan.** The AI will propose a plan. If you didn't provide a description, you'll be asked to approve it.
     4.  **Get Your Guide** with tools and illustrated step-by-step instructions.
+    5.  **Follow the Steps** using the interactive checklist with audio narration.
     """)
 if not st.session_state.app_state['prompt_sent']:
     col1, col2 = st.columns([3, 1])
     with col1:
         uploaded_image = st.file_uploader("📷 Upload a photo of your project", type=["jpg", "jpeg", "png"])
+        context_text = st.text_area("✏️ Describe the issue or your goal (optional but recommended)", height=80, placeholder="e.g., 'My toaster won't turn on,' or 'How do I build a desk like this?'")
     with col2:
         st.markdown("### Actions")
         if st.button("🚀 Get AI Guidance", type="primary", use_container_width=True):