rairo commited on
Commit
899cd7e
Β·
verified Β·
1 Parent(s): 8559392

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -72
app.py CHANGED
@@ -6,7 +6,6 @@ from google.genai import types
6
  import re
7
  import time
8
  import os
9
- import io
10
  import wave
11
 
12
  # Disable Streamlit analytics (prevents PermissionError in some environments)
@@ -26,39 +25,47 @@ if not API_KEY:
26
  st.error("Please set GOOGLE_API_KEY in your environment variables or Streamlit secrets")
27
  st.stop()
28
 
29
- # 1.2 Initialize the GenAI client (as per original code)
30
  try:
31
  client = genai.Client(api_key=API_KEY)
32
  except Exception as e:
33
  st.error(f"Failed to initialize GenAI Client: {e}")
34
  st.stop()
35
 
36
- # 1.3 Constants (model IDs, exactly as in original code)
37
- CATEGORY_MODEL = "gemini-2.0-flash-exp"
38
  GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
39
- TTS_MODEL = "gemini-2.5-flash-preview-tts"
40
- VOICE_NAME = "Kore"
41
 
42
  # 1.4 Helper to parse numbered steps out of Gemini text
43
  def parse_numbered_steps(text):
44
- """
45
- Parses text with numbered steps into a list of tuples.
46
- Example: "1. Do this.\n2. Do that." -> [(1, "Do this."), (2, "Do that.")]
47
- """
48
- # Add a leading newline to help regex find the first step
49
  text = "\n" + text
50
- steps = re.findall(r"\n\s*(\d+)\.\s*(.*)", text, re.MULTILINE)
51
  return [(int(num), desc.strip()) for num, desc in steps]
52
 
53
- # 1.5 Helper to convert raw PCM into WAV bytes (for in-memory playback)
54
- def tts_wav_bytes(pcm, channels=1, rate=24000, sample_width=2):
55
- buf = io.BytesIO()
56
- with wave.open(buf, "wb") as wf:
57
- wf.setnchannels(channels)
58
- wf.setsampwidth(sample_width)
59
- wf.setframerate(rate)
60
- wf.writeframes(pcm)
61
- return buf.getvalue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  # ─────────────────────────────────────────────────────────────────────────────
64
  # 2. SESSION STATE SETUP
@@ -70,8 +77,7 @@ if "app_state" not in st.session_state:
70
  "done_flags": {}, "notes": {}, "timers": {}, "category": None,
71
  "prompt_sent": False, "timer_running": {}, "last_tick": {},
72
  "project_title": "", "project_description": "", "upcycling_options": [],
73
- "plan_approved": False, "initial_plan": "", "user_image": None,
74
- "tts": {} # store TTS WAV bytes per step index
75
  }
76
 
77
  # ─────────────────────────────────────────────────────────────────────────────
@@ -85,8 +91,7 @@ def reset_state():
85
  "done_flags": {}, "notes": {}, "timers": {}, "category": None,
86
  "prompt_sent": False, "timer_running": {}, "last_tick": {},
87
  "project_title": "", "project_description": "", "upcycling_options": [],
88
- "plan_approved": False, "initial_plan": "", "user_image": None,
89
- "tts": {}
90
  }
91
  st.success("βœ… Reset complete!")
92
  st.rerun()
@@ -117,8 +122,7 @@ def initial_analysis(uploaded_file, context_text):
117
  "Reply with ONLY the category name."
118
  )
119
  category = send_text_request(CATEGORY_MODEL, category_prompt, image)
120
- if not category:
121
- return
122
  st.session_state.app_state['category'] = category
123
 
124
  plan_prompt = f"""
@@ -137,8 +141,7 @@ def initial_analysis(uploaded_file, context_text):
137
  [Your plan or 3 options]
138
  """
139
  plan_response = send_text_request(GENERATION_MODEL, plan_prompt, image)
140
- if not plan_response:
141
- return
142
 
143
  try:
144
  st.session_state.app_state['project_title'] = re.search(r"TITLE:\s*(.*)", plan_response).group(1).strip()
@@ -165,8 +168,7 @@ def generate_detailed_guide_with_images(selected_option=None):
165
  """Generates the detailed guide with steps and illustrations."""
166
  image = st.session_state.app_state.get('user_image')
167
  if not image:
168
- st.error("Image not found. Please start over.")
169
- return
170
 
171
  context = f"The user has approved the plan for '{st.session_state.app_state['project_title']}'."
172
  if selected_option:
@@ -222,16 +224,13 @@ def generate_detailed_guide_with_images(selected_option=None):
222
  st.session_state.app_state['timers'][idx] = val * (60 if "minute" in unit else 1)
223
  else:
224
  st.session_state.app_state['timers'][idx] = 0
225
- # Initialize empty TTS slot (will be generated on demand)
226
- st.session_state.app_state['tts'][idx] = None
227
  except Exception as e:
228
  st.error(f"Failed to generate or parse the illustrated guide: {str(e)}")
229
 
230
  def render_sidebar_navigation():
231
  st.sidebar.markdown("## Steps Navigation")
232
  steps = st.session_state.app_state['steps']
233
- if not steps:
234
- return
235
  total_steps = len(steps)
236
  completed = sum(1 for done in st.session_state.app_state['done_flags'].values() if done)
237
  st.sidebar.progress(completed / total_steps if total_steps > 0 else 0)
@@ -254,7 +253,13 @@ def render_step(idx, text):
254
  st.markdown(f"### Step {idx} of {total}")
255
  st.write(text)
256
 
257
- # Display illustrative image if available
 
 
 
 
 
 
258
  if idx in st.session_state.app_state['images']:
259
  st.image(
260
  st.session_state.app_state['images'][idx],
@@ -262,35 +267,6 @@ def render_step(idx, text):
262
  use_container_width=True
263
  )
264
 
265
- # TTS generation and playback
266
- # If we haven't generated TTS for this step yet, do it now
267
- if st.session_state.app_state['tts'].get(idx) is None:
268
- try:
269
- tts_response = client.models.generate_content(
270
- model=TTS_MODEL,
271
- contents=text,
272
- config=types.GenerateContentConfig(
273
- response_modalities=["AUDIO"],
274
- speech_config=types.SpeechConfig(
275
- voice_config=types.VoiceConfig(
276
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
277
- voice_name=VOICE_NAME,
278
- )
279
- )
280
- ),
281
- )
282
- )
283
- pcm_data = tts_response.candidates[0].content.parts[0].inline_data.data
284
- wav_bytes = tts_wav_bytes(pcm_data)
285
- st.session_state.app_state['tts'][idx] = wav_bytes
286
- except Exception as e:
287
- st.error(f"Failed to generate TTS for step {idx}: {e}")
288
-
289
- # If WAV bytes are available, show a play button
290
- if st.session_state.app_state['tts'].get(idx):
291
- st.audio(st.session_state.app_state['tts'][idx], format="audio/wav")
292
-
293
- # Checkbox and notes
294
  done = st.checkbox("βœ… Mark this step as completed", value=st.session_state.app_state['done_flags'].get(idx, False), key=f"done_{idx}")
295
  st.session_state.app_state['done_flags'][idx] = done
296
  notes = st.text_area("πŸ“ Your notes for this step:", value=st.session_state.app_state['notes'].get(idx, ""), height=100, key=f"notes_{idx}")
@@ -313,12 +289,12 @@ st.title("πŸ› οΈ NeoFix AI-Powered DIY Assistant")
313
 
314
  with st.expander("ℹ️ How it works", expanded=False):
315
  st.write("""
316
- 1. **Upload a photo** of your project or the item you want to fix or build (appliance, car part, plant, craft project).
317
- 2. **(Optional) Describe your goal** for more accurate results.
318
- 3. **Review the Plan.** The AI will propose a plan. If you didn’t provide a description, you’ll be asked to approve it.
319
- 4. **Get Your Guide** with tools and illustrated step-by-step instructions.
320
- 5. **Follow the Steps** using the interactive checklist (with audio narration for each step).
321
- """)
322
 
323
  if not st.session_state.app_state['prompt_sent']:
324
  st.markdown("---")
@@ -374,7 +350,7 @@ else:
374
  if total_steps > 0:
375
  progress = done_count / total_steps
376
  st.progress(progress)
377
- st.markdown(f"**Overall Progress:** {done_count} of {total_steps} steps completed ({progress:.0%})")
378
  if done_count == total_steps:
379
  st.balloons()
380
  st.success("πŸŽ‰ Congratulations! You've completed all steps!")
 
6
  import re
7
  import time
8
  import os
 
9
  import wave
10
 
11
  # Disable Streamlit analytics (prevents PermissionError in some environments)
 
25
  st.error("Please set GOOGLE_API_KEY in your environment variables or Streamlit secrets")
26
  st.stop()
27
 
28
+ # 1.2 Initialize the GenAI client
29
  try:
30
  client = genai.Client(api_key=API_KEY)
31
  except Exception as e:
32
  st.error(f"Failed to initialize GenAI Client: {e}")
33
  st.stop()
34
 
35
+ # 1.3 Constants
36
+ CATEGORY_MODEL = "gemini-2.0-flash-exp"
37
  GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
38
+ TTS_MODEL = "gemini-2.5-flash-preview-tts"
 
39
 
40
  # 1.4 Helper to parse numbered steps out of Gemini text
41
  def parse_numbered_steps(text):
 
 
 
 
 
42
  text = "\n" + text
43
+ steps = re.findall(r"\n\s*(\d+).\s*(.*)", text, re.MULTILINE)
44
  return [(int(num), desc.strip()) for num, desc in steps]
45
 
46
+ # 1.5 TTS Generation Function
47
+ @st.cache_data
48
+ def generate_tts_audio(_client, text_to_speak):
49
+ """Generates audio from text using Gemini TTS and returns the audio data."""
50
+ try:
51
+ response = _client.models.generate_content(
52
+ model=TTS_MODEL,
53
+ contents=f"Say clearly: {text_to_speak}",
54
+ config=types.GenerateContentConfig(
55
+ response_modalities=["AUDIO"],
56
+ speech_config=types.SpeechConfig(
57
+ voice_config=types.VoiceConfig(
58
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
59
+ voice_name='Kore',
60
+ )
61
+ )
62
+ ),
63
+ )
64
+ )
65
+ return response.candidates[0].content.parts[0].inline_data.data
66
+ except Exception as e:
67
+ st.error(f"Failed to generate narration: {e}")
68
+ return None
69
 
70
  # ─────────────────────────────────────────────────────────────────────────────
71
  # 2. SESSION STATE SETUP
 
77
  "done_flags": {}, "notes": {}, "timers": {}, "category": None,
78
  "prompt_sent": False, "timer_running": {}, "last_tick": {},
79
  "project_title": "", "project_description": "", "upcycling_options": [],
80
+ "plan_approved": False, "initial_plan": "", "user_image": None
 
81
  }
82
 
83
  # ─────────────────────────────────────────────────────────────────────────────
 
91
  "done_flags": {}, "notes": {}, "timers": {}, "category": None,
92
  "prompt_sent": False, "timer_running": {}, "last_tick": {},
93
  "project_title": "", "project_description": "", "upcycling_options": [],
94
+ "plan_approved": False, "initial_plan": "", "user_image": None
 
95
  }
96
  st.success("βœ… Reset complete!")
97
  st.rerun()
 
122
  "Reply with ONLY the category name."
123
  )
124
  category = send_text_request(CATEGORY_MODEL, category_prompt, image)
125
+ if not category: return
 
126
  st.session_state.app_state['category'] = category
127
 
128
  plan_prompt = f"""
 
141
  [Your plan or 3 options]
142
  """
143
  plan_response = send_text_request(GENERATION_MODEL, plan_prompt, image)
144
+ if not plan_response: return
 
145
 
146
  try:
147
  st.session_state.app_state['project_title'] = re.search(r"TITLE:\s*(.*)", plan_response).group(1).strip()
 
168
  """Generates the detailed guide with steps and illustrations."""
169
  image = st.session_state.app_state.get('user_image')
170
  if not image:
171
+ st.error("Image not found. Please start over."); return
 
172
 
173
  context = f"The user has approved the plan for '{st.session_state.app_state['project_title']}'."
174
  if selected_option:
 
224
  st.session_state.app_state['timers'][idx] = val * (60 if "minute" in unit else 1)
225
  else:
226
  st.session_state.app_state['timers'][idx] = 0
 
 
227
  except Exception as e:
228
  st.error(f"Failed to generate or parse the illustrated guide: {str(e)}")
229
 
230
  def render_sidebar_navigation():
231
  st.sidebar.markdown("## Steps Navigation")
232
  steps = st.session_state.app_state['steps']
233
+ if not steps: return
 
234
  total_steps = len(steps)
235
  completed = sum(1 for done in st.session_state.app_state['done_flags'].values() if done)
236
  st.sidebar.progress(completed / total_steps if total_steps > 0 else 0)
 
253
  st.markdown(f"### Step {idx} of {total}")
254
  st.write(text)
255
 
256
+ # TTS Integration
257
+ if st.button(f"πŸ”Š Narrate Step {idx}", key=f"tts_{idx}"):
258
+ with st.spinner("Generating narration..."):
259
+ audio_data = generate_tts_audio(client, text)
260
+ if audio_data:
261
+ st.audio(audio_data, format="audio/wav")
262
+
263
  if idx in st.session_state.app_state['images']:
264
  st.image(
265
  st.session_state.app_state['images'][idx],
 
267
  use_container_width=True
268
  )
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  done = st.checkbox("βœ… Mark this step as completed", value=st.session_state.app_state['done_flags'].get(idx, False), key=f"done_{idx}")
271
  st.session_state.app_state['done_flags'][idx] = done
272
  notes = st.text_area("πŸ“ Your notes for this step:", value=st.session_state.app_state['notes'].get(idx, ""), height=100, key=f"notes_{idx}")
 
289
 
290
  with st.expander("ℹ️ How it works", expanded=False):
291
  st.write("""
292
+ 1. **Upload a photo** of your project or the item you want to fix or build (appliance, car part, plant, craft project).
293
+ 2. **(Optional) Describe your goal** for more accurate results.
294
+ 3. **Review the Plan.** The AI will propose a plan. If you didn’t provide a description, you’ll be asked to approve it.
295
+ 4. **Get Your Guide** with tools and illustrated step-by-step instructions.
296
+ 5. **Follow the Steps** using the interactive checklist.
297
+ """)
298
 
299
  if not st.session_state.app_state['prompt_sent']:
300
  st.markdown("---")
 
350
  if total_steps > 0:
351
  progress = done_count / total_steps
352
  st.progress(progress)
353
+ st.markdown(f"**Overall Progress:** {done_count} of {total_steps} completed ({progress:.0%})")
354
  if done_count == total_steps:
355
  st.balloons()
356
  st.success("πŸŽ‰ Congratulations! You've completed all steps!")