NeoTest

Sleeping

App Files Files Community

rairo commited on Jun 6, 2025

Commit

899cd7e

verified ·

1 Parent(s): 8559392

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -72

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ from google.genai import types
 import re
 import time
 import os
-import io
 import wave
 # Disable Streamlit analytics (prevents PermissionError in some environments)
@@ -26,39 +25,47 @@ if not API_KEY:
     st.error("Please set GOOGLE_API_KEY in your environment variables or Streamlit secrets")
     st.stop()
-# 1.2 Initialize the GenAI client (as per original code)
 try:
     client = genai.Client(api_key=API_KEY)
 except Exception as e:
     st.error(f"Failed to initialize GenAI Client: {e}")
     st.stop()
-# 1.3 Constants (model IDs, exactly as in original code)
-CATEGORY_MODEL   = "gemini-2.0-flash-exp"
 GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
-TTS_MODEL        = "gemini-2.5-flash-preview-tts"
-VOICE_NAME       = "Kore"
 # 1.4 Helper to parse numbered steps out of Gemini text
 def parse_numbered_steps(text):
-    """
-    Parses text with numbered steps into a list of tuples.
-    Example: "1. Do this.\n2. Do that." -> [(1, "Do this."), (2, "Do that.")]
-    """
-    # Add a leading newline to help regex find the first step
     text = "\n" + text
-    steps = re.findall(r"\n\s*(\d+)\.\s*(.*)", text, re.MULTILINE)
     return [(int(num), desc.strip()) for num, desc in steps]
-# 1.5 Helper to convert raw PCM into WAV bytes (for in-memory playback)
-def tts_wav_bytes(pcm, channels=1, rate=24000, sample_width=2):
-    buf = io.BytesIO()
-    with wave.open(buf, "wb") as wf:
-        wf.setnchannels(channels)
-        wf.setsampwidth(sample_width)
-        wf.setframerate(rate)
-        wf.writeframes(pcm)
-    return buf.getvalue()
 # ─────────────────────────────────────────────────────────────────────────────
 # 2. SESSION STATE SETUP
@@ -70,8 +77,7 @@ if "app_state" not in st.session_state:
         "done_flags": {}, "notes": {}, "timers": {}, "category": None,
         "prompt_sent": False, "timer_running": {}, "last_tick": {},
         "project_title": "", "project_description": "", "upcycling_options": [],
-        "plan_approved": False, "initial_plan": "", "user_image": None,
-        "tts": {}  # store TTS WAV bytes per step index
     }
 # ─────────────────────────────────────────────────────────────────────────────
@@ -85,8 +91,7 @@ def reset_state():
         "done_flags": {}, "notes": {}, "timers": {}, "category": None,
         "prompt_sent": False, "timer_running": {}, "last_tick": {},
         "project_title": "", "project_description": "", "upcycling_options": [],
-        "plan_approved": False, "initial_plan": "", "user_image": None,
-        "tts": {}
     }
     st.success("✅ Reset complete!")
     st.rerun()
@@ -117,8 +122,7 @@ def initial_analysis(uploaded_file, context_text):
             "Reply with ONLY the category name."
         )
         category = send_text_request(CATEGORY_MODEL, category_prompt, image)
-        if not category:
-            return
         st.session_state.app_state['category'] = category
         plan_prompt = f"""
@@ -137,8 +141,7 @@ def initial_analysis(uploaded_file, context_text):
         [Your plan or 3 options]
         """
         plan_response = send_text_request(GENERATION_MODEL, plan_prompt, image)
-        if not plan_response:
-            return
     try:
         st.session_state.app_state['project_title'] = re.search(r"TITLE:\s*(.*)", plan_response).group(1).strip()
@@ -165,8 +168,7 @@ def generate_detailed_guide_with_images(selected_option=None):
     """Generates the detailed guide with steps and illustrations."""
     image = st.session_state.app_state.get('user_image')
     if not image:
-        st.error("Image not found. Please start over.")
-        return
     context = f"The user has approved the plan for '{st.session_state.app_state['project_title']}'."
     if selected_option:
@@ -222,16 +224,13 @@ def generate_detailed_guide_with_images(selected_option=None):
                     st.session_state.app_state['timers'][idx] = val * (60 if "minute" in unit else 1)
                 else:
                     st.session_state.app_state['timers'][idx] = 0
-                # Initialize empty TTS slot (will be generated on demand)
-                st.session_state.app_state['tts'][idx] = None
         except Exception as e:
             st.error(f"Failed to generate or parse the illustrated guide: {str(e)}")
 def render_sidebar_navigation():
     st.sidebar.markdown("## Steps Navigation")
     steps = st.session_state.app_state['steps']
-    if not steps:
-        return
     total_steps = len(steps)
     completed = sum(1 for done in st.session_state.app_state['done_flags'].values() if done)
     st.sidebar.progress(completed / total_steps if total_steps > 0 else 0)
@@ -254,7 +253,13 @@ def render_step(idx, text):
     st.markdown(f"### Step {idx} of {total}")
     st.write(text)
-    # Display illustrative image if available
     if idx in st.session_state.app_state['images']:
         st.image(
             st.session_state.app_state['images'][idx],
@@ -262,35 +267,6 @@ def render_step(idx, text):
             use_container_width=True
         )
-    # TTS generation and playback
-    # If we haven't generated TTS for this step yet, do it now
-    if st.session_state.app_state['tts'].get(idx) is None:
-        try:
-            tts_response = client.models.generate_content(
-                model=TTS_MODEL,
-                contents=text,
-                config=types.GenerateContentConfig(
-                    response_modalities=["AUDIO"],
-                    speech_config=types.SpeechConfig(
-                        voice_config=types.VoiceConfig(
-                            prebuilt_voice_config=types.PrebuiltVoiceConfig(
-                                voice_name=VOICE_NAME,
-                            )
-                        )
-                    ),
-                )
-            )
-            pcm_data = tts_response.candidates[0].content.parts[0].inline_data.data
-            wav_bytes = tts_wav_bytes(pcm_data)
-            st.session_state.app_state['tts'][idx] = wav_bytes
-        except Exception as e:
-            st.error(f"Failed to generate TTS for step {idx}: {e}")
-    # If WAV bytes are available, show a play button
-    if st.session_state.app_state['tts'].get(idx):
-        st.audio(st.session_state.app_state['tts'][idx], format="audio/wav")
-    # Checkbox and notes
     done = st.checkbox("✅ Mark this step as completed", value=st.session_state.app_state['done_flags'].get(idx, False), key=f"done_{idx}")
     st.session_state.app_state['done_flags'][idx] = done
     notes = st.text_area("📝 Your notes for this step:", value=st.session_state.app_state['notes'].get(idx, ""), height=100, key=f"notes_{idx}")
@@ -313,12 +289,12 @@ st.title("🛠️ NeoFix AI-Powered DIY Assistant")
 with st.expander("ℹ️ How it works", expanded=False):
     st.write("""
-1.  **Upload a photo** of your project or the item you want to fix or build (appliance, car part, plant, craft project).
-2.  **(Optional) Describe your goal** for more accurate results.
-3.  **Review the Plan.** The AI will propose a plan. If you didn’t provide a description, you’ll be asked to approve it.
-4.  **Get Your Guide** with tools and illustrated step-by-step instructions.
-5.  **Follow the Steps** using the interactive checklist (with audio narration for each step).
-""")
 if not st.session_state.app_state['prompt_sent']:
     st.markdown("---")
@@ -374,7 +350,7 @@ else:
         if total_steps > 0:
             progress = done_count / total_steps
             st.progress(progress)
-            st.markdown(f"**Overall Progress:** {done_count} of {total_steps} steps completed ({progress:.0%})")
             if done_count == total_steps:
                 st.balloons()
                 st.success("🎉 Congratulations! You've completed all steps!")

 import re
 import time
 import os
 import wave
 # Disable Streamlit analytics (prevents PermissionError in some environments)
     st.error("Please set GOOGLE_API_KEY in your environment variables or Streamlit secrets")
     st.stop()
+# 1.2 Initialize the GenAI client
 try:
     client = genai.Client(api_key=API_KEY)
 except Exception as e:
     st.error(f"Failed to initialize GenAI Client: {e}")
     st.stop()
+# 1.3 Constants
+CATEGORY_MODEL = "gemini-2.0-flash-exp"
 GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
+TTS_MODEL = "gemini-2.5-flash-preview-tts"
 # 1.4 Helper to parse numbered steps out of Gemini text
 def parse_numbered_steps(text):
     text = "\n" + text
+    steps = re.findall(r"\n\s*(\d+).\s*(.*)", text, re.MULTILINE)
     return [(int(num), desc.strip()) for num, desc in steps]
+# 1.5 TTS Generation Function
+@st.cache_data
+def generate_tts_audio(_client, text_to_speak):
+    """Generates audio from text using Gemini TTS and returns the audio data."""
+    try:
+        response = _client.models.generate_content(
+            model=TTS_MODEL,
+            contents=f"Say clearly: {text_to_speak}",
+            config=types.GenerateContentConfig(
+                response_modalities=["AUDIO"],
+                speech_config=types.SpeechConfig(
+                    voice_config=types.VoiceConfig(
+                        prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                            voice_name='Kore',
+                        )
+                    )
+                ),
+            )
+        )
+        return response.candidates[0].content.parts[0].inline_data.data
+    except Exception as e:
+        st.error(f"Failed to generate narration: {e}")
+        return None
 # ─────────────────────────────────────────────────────────────────────────────
 # 2. SESSION STATE SETUP
         "done_flags": {}, "notes": {}, "timers": {}, "category": None,
         "prompt_sent": False, "timer_running": {}, "last_tick": {},
         "project_title": "", "project_description": "", "upcycling_options": [],
+        "plan_approved": False, "initial_plan": "", "user_image": None
     }
 # ─────────────────────────────────────────────────────────────────────────────
         "done_flags": {}, "notes": {}, "timers": {}, "category": None,
         "prompt_sent": False, "timer_running": {}, "last_tick": {},
         "project_title": "", "project_description": "", "upcycling_options": [],
+        "plan_approved": False, "initial_plan": "", "user_image": None
     }
     st.success("✅ Reset complete!")
     st.rerun()
             "Reply with ONLY the category name."
         )
         category = send_text_request(CATEGORY_MODEL, category_prompt, image)
+        if not category: return
         st.session_state.app_state['category'] = category
         plan_prompt = f"""
         [Your plan or 3 options]
         """
         plan_response = send_text_request(GENERATION_MODEL, plan_prompt, image)
+        if not plan_response: return
     try:
         st.session_state.app_state['project_title'] = re.search(r"TITLE:\s*(.*)", plan_response).group(1).strip()
     """Generates the detailed guide with steps and illustrations."""
     image = st.session_state.app_state.get('user_image')
     if not image:
+        st.error("Image not found. Please start over."); return
     context = f"The user has approved the plan for '{st.session_state.app_state['project_title']}'."
     if selected_option:
                     st.session_state.app_state['timers'][idx] = val * (60 if "minute" in unit else 1)
                 else:
                     st.session_state.app_state['timers'][idx] = 0
         except Exception as e:
             st.error(f"Failed to generate or parse the illustrated guide: {str(e)}")
 def render_sidebar_navigation():
     st.sidebar.markdown("## Steps Navigation")
     steps = st.session_state.app_state['steps']
+    if not steps: return
     total_steps = len(steps)
     completed = sum(1 for done in st.session_state.app_state['done_flags'].values() if done)
     st.sidebar.progress(completed / total_steps if total_steps > 0 else 0)
     st.markdown(f"### Step {idx} of {total}")
     st.write(text)
+    # TTS Integration
+    if st.button(f"🔊 Narrate Step {idx}", key=f"tts_{idx}"):
+        with st.spinner("Generating narration..."):
+            audio_data = generate_tts_audio(client, text)
+            if audio_data:
+                st.audio(audio_data, format="audio/wav")
     if idx in st.session_state.app_state['images']:
         st.image(
             st.session_state.app_state['images'][idx],
             use_container_width=True
         )
     done = st.checkbox("✅ Mark this step as completed", value=st.session_state.app_state['done_flags'].get(idx, False), key=f"done_{idx}")
     st.session_state.app_state['done_flags'][idx] = done
     notes = st.text_area("📝 Your notes for this step:", value=st.session_state.app_state['notes'].get(idx, ""), height=100, key=f"notes_{idx}")
 with st.expander("ℹ️ How it works", expanded=False):
     st.write("""
+    1.  **Upload a photo** of your project or the item you want to fix or build (appliance, car part, plant, craft project).
+    2.  **(Optional) Describe your goal** for more accurate results.
+    3.  **Review the Plan.** The AI will propose a plan. If you didn’t provide a description, you’ll be asked to approve it.
+    4.  **Get Your Guide** with tools and illustrated step-by-step instructions.
+    5.  **Follow the Steps** using the interactive checklist.
+    """)
 if not st.session_state.app_state['prompt_sent']:
     st.markdown("---")
         if total_steps > 0:
             progress = done_count / total_steps
             st.progress(progress)
+            st.markdown(f"**Overall Progress:** {done_count} of {total_steps} completed ({progress:.0%})")
             if done_count == total_steps:
                 st.balloons()
                 st.success("🎉 Congratulations! You've completed all steps!")