Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,6 @@ from google.genai import types
|
|
| 6 |
import re
|
| 7 |
import time
|
| 8 |
import os
|
| 9 |
-
import io
|
| 10 |
import wave
|
| 11 |
|
| 12 |
# Disable Streamlit analytics (prevents PermissionError in some environments)
|
|
@@ -26,39 +25,47 @@ if not API_KEY:
|
|
| 26 |
st.error("Please set GOOGLE_API_KEY in your environment variables or Streamlit secrets")
|
| 27 |
st.stop()
|
| 28 |
|
| 29 |
-
# 1.2 Initialize the GenAI client
|
| 30 |
try:
|
| 31 |
client = genai.Client(api_key=API_KEY)
|
| 32 |
except Exception as e:
|
| 33 |
st.error(f"Failed to initialize GenAI Client: {e}")
|
| 34 |
st.stop()
|
| 35 |
|
| 36 |
-
# 1.3 Constants
|
| 37 |
-
CATEGORY_MODEL
|
| 38 |
GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
|
| 39 |
-
TTS_MODEL
|
| 40 |
-
VOICE_NAME = "Kore"
|
| 41 |
|
| 42 |
# 1.4 Helper to parse numbered steps out of Gemini text
|
| 43 |
def parse_numbered_steps(text):
|
| 44 |
-
"""
|
| 45 |
-
Parses text with numbered steps into a list of tuples.
|
| 46 |
-
Example: "1. Do this.\n2. Do that." -> [(1, "Do this."), (2, "Do that.")]
|
| 47 |
-
"""
|
| 48 |
-
# Add a leading newline to help regex find the first step
|
| 49 |
text = "\n" + text
|
| 50 |
-
steps = re.findall(r"\n\s*(\d+)
|
| 51 |
return [(int(num), desc.strip()) for num, desc in steps]
|
| 52 |
|
| 53 |
-
# 1.5
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 64 |
# 2. SESSION STATE SETUP
|
|
@@ -70,8 +77,7 @@ if "app_state" not in st.session_state:
|
|
| 70 |
"done_flags": {}, "notes": {}, "timers": {}, "category": None,
|
| 71 |
"prompt_sent": False, "timer_running": {}, "last_tick": {},
|
| 72 |
"project_title": "", "project_description": "", "upcycling_options": [],
|
| 73 |
-
"plan_approved": False, "initial_plan": "", "user_image": None
|
| 74 |
-
"tts": {} # store TTS WAV bytes per step index
|
| 75 |
}
|
| 76 |
|
| 77 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -85,8 +91,7 @@ def reset_state():
|
|
| 85 |
"done_flags": {}, "notes": {}, "timers": {}, "category": None,
|
| 86 |
"prompt_sent": False, "timer_running": {}, "last_tick": {},
|
| 87 |
"project_title": "", "project_description": "", "upcycling_options": [],
|
| 88 |
-
"plan_approved": False, "initial_plan": "", "user_image": None
|
| 89 |
-
"tts": {}
|
| 90 |
}
|
| 91 |
st.success("β
Reset complete!")
|
| 92 |
st.rerun()
|
|
@@ -117,8 +122,7 @@ def initial_analysis(uploaded_file, context_text):
|
|
| 117 |
"Reply with ONLY the category name."
|
| 118 |
)
|
| 119 |
category = send_text_request(CATEGORY_MODEL, category_prompt, image)
|
| 120 |
-
if not category:
|
| 121 |
-
return
|
| 122 |
st.session_state.app_state['category'] = category
|
| 123 |
|
| 124 |
plan_prompt = f"""
|
|
@@ -137,8 +141,7 @@ def initial_analysis(uploaded_file, context_text):
|
|
| 137 |
[Your plan or 3 options]
|
| 138 |
"""
|
| 139 |
plan_response = send_text_request(GENERATION_MODEL, plan_prompt, image)
|
| 140 |
-
if not plan_response:
|
| 141 |
-
return
|
| 142 |
|
| 143 |
try:
|
| 144 |
st.session_state.app_state['project_title'] = re.search(r"TITLE:\s*(.*)", plan_response).group(1).strip()
|
|
@@ -165,8 +168,7 @@ def generate_detailed_guide_with_images(selected_option=None):
|
|
| 165 |
"""Generates the detailed guide with steps and illustrations."""
|
| 166 |
image = st.session_state.app_state.get('user_image')
|
| 167 |
if not image:
|
| 168 |
-
st.error("Image not found. Please start over.")
|
| 169 |
-
return
|
| 170 |
|
| 171 |
context = f"The user has approved the plan for '{st.session_state.app_state['project_title']}'."
|
| 172 |
if selected_option:
|
|
@@ -222,16 +224,13 @@ def generate_detailed_guide_with_images(selected_option=None):
|
|
| 222 |
st.session_state.app_state['timers'][idx] = val * (60 if "minute" in unit else 1)
|
| 223 |
else:
|
| 224 |
st.session_state.app_state['timers'][idx] = 0
|
| 225 |
-
# Initialize empty TTS slot (will be generated on demand)
|
| 226 |
-
st.session_state.app_state['tts'][idx] = None
|
| 227 |
except Exception as e:
|
| 228 |
st.error(f"Failed to generate or parse the illustrated guide: {str(e)}")
|
| 229 |
|
| 230 |
def render_sidebar_navigation():
|
| 231 |
st.sidebar.markdown("## Steps Navigation")
|
| 232 |
steps = st.session_state.app_state['steps']
|
| 233 |
-
if not steps:
|
| 234 |
-
return
|
| 235 |
total_steps = len(steps)
|
| 236 |
completed = sum(1 for done in st.session_state.app_state['done_flags'].values() if done)
|
| 237 |
st.sidebar.progress(completed / total_steps if total_steps > 0 else 0)
|
|
@@ -254,7 +253,13 @@ def render_step(idx, text):
|
|
| 254 |
st.markdown(f"### Step {idx} of {total}")
|
| 255 |
st.write(text)
|
| 256 |
|
| 257 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
if idx in st.session_state.app_state['images']:
|
| 259 |
st.image(
|
| 260 |
st.session_state.app_state['images'][idx],
|
|
@@ -262,35 +267,6 @@ def render_step(idx, text):
|
|
| 262 |
use_container_width=True
|
| 263 |
)
|
| 264 |
|
| 265 |
-
# TTS generation and playback
|
| 266 |
-
# If we haven't generated TTS for this step yet, do it now
|
| 267 |
-
if st.session_state.app_state['tts'].get(idx) is None:
|
| 268 |
-
try:
|
| 269 |
-
tts_response = client.models.generate_content(
|
| 270 |
-
model=TTS_MODEL,
|
| 271 |
-
contents=text,
|
| 272 |
-
config=types.GenerateContentConfig(
|
| 273 |
-
response_modalities=["AUDIO"],
|
| 274 |
-
speech_config=types.SpeechConfig(
|
| 275 |
-
voice_config=types.VoiceConfig(
|
| 276 |
-
prebuilt_voice_config=types.PrebuiltVoiceConfig(
|
| 277 |
-
voice_name=VOICE_NAME,
|
| 278 |
-
)
|
| 279 |
-
)
|
| 280 |
-
),
|
| 281 |
-
)
|
| 282 |
-
)
|
| 283 |
-
pcm_data = tts_response.candidates[0].content.parts[0].inline_data.data
|
| 284 |
-
wav_bytes = tts_wav_bytes(pcm_data)
|
| 285 |
-
st.session_state.app_state['tts'][idx] = wav_bytes
|
| 286 |
-
except Exception as e:
|
| 287 |
-
st.error(f"Failed to generate TTS for step {idx}: {e}")
|
| 288 |
-
|
| 289 |
-
# If WAV bytes are available, show a play button
|
| 290 |
-
if st.session_state.app_state['tts'].get(idx):
|
| 291 |
-
st.audio(st.session_state.app_state['tts'][idx], format="audio/wav")
|
| 292 |
-
|
| 293 |
-
# Checkbox and notes
|
| 294 |
done = st.checkbox("β
Mark this step as completed", value=st.session_state.app_state['done_flags'].get(idx, False), key=f"done_{idx}")
|
| 295 |
st.session_state.app_state['done_flags'][idx] = done
|
| 296 |
notes = st.text_area("π Your notes for this step:", value=st.session_state.app_state['notes'].get(idx, ""), height=100, key=f"notes_{idx}")
|
|
@@ -313,12 +289,12 @@ st.title("π οΈ NeoFix AI-Powered DIY Assistant")
|
|
| 313 |
|
| 314 |
with st.expander("βΉοΈ How it works", expanded=False):
|
| 315 |
st.write("""
|
| 316 |
-
1. **Upload a photo** of your project or the item you want to fix or build (appliance, car part, plant, craft project).
|
| 317 |
-
2. **(Optional) Describe your goal** for more accurate results.
|
| 318 |
-
3. **Review the Plan.** The AI will propose a plan. If you didnβt provide a description, youβll be asked to approve it.
|
| 319 |
-
4. **Get Your Guide** with tools and illustrated step-by-step instructions.
|
| 320 |
-
5. **Follow the Steps** using the interactive checklist
|
| 321 |
-
""")
|
| 322 |
|
| 323 |
if not st.session_state.app_state['prompt_sent']:
|
| 324 |
st.markdown("---")
|
|
@@ -374,7 +350,7 @@ else:
|
|
| 374 |
if total_steps > 0:
|
| 375 |
progress = done_count / total_steps
|
| 376 |
st.progress(progress)
|
| 377 |
-
st.markdown(f"**Overall Progress:** {done_count} of {total_steps}
|
| 378 |
if done_count == total_steps:
|
| 379 |
st.balloons()
|
| 380 |
st.success("π Congratulations! You've completed all steps!")
|
|
|
|
| 6 |
import re
|
| 7 |
import time
|
| 8 |
import os
|
|
|
|
| 9 |
import wave
|
| 10 |
|
| 11 |
# Disable Streamlit analytics (prevents PermissionError in some environments)
|
|
|
|
| 25 |
st.error("Please set GOOGLE_API_KEY in your environment variables or Streamlit secrets")
|
| 26 |
st.stop()
|
| 27 |
|
| 28 |
+
# 1.2 Initialize the GenAI client
|
| 29 |
try:
|
| 30 |
client = genai.Client(api_key=API_KEY)
|
| 31 |
except Exception as e:
|
| 32 |
st.error(f"Failed to initialize GenAI Client: {e}")
|
| 33 |
st.stop()
|
| 34 |
|
| 35 |
+
# 1.3 Constants
|
| 36 |
+
CATEGORY_MODEL = "gemini-2.0-flash-exp"
|
| 37 |
GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
|
| 38 |
+
TTS_MODEL = "gemini-2.5-flash-preview-tts"
|
|
|
|
| 39 |
|
| 40 |
# 1.4 Helper to parse numbered steps out of Gemini text
|
| 41 |
def parse_numbered_steps(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
text = "\n" + text
|
| 43 |
+
steps = re.findall(r"\n\s*(\d+).\s*(.*)", text, re.MULTILINE)
|
| 44 |
return [(int(num), desc.strip()) for num, desc in steps]
|
| 45 |
|
| 46 |
+
# 1.5 TTS Generation Function
|
| 47 |
+
@st.cache_data
|
| 48 |
+
def generate_tts_audio(_client, text_to_speak):
|
| 49 |
+
"""Generates audio from text using Gemini TTS and returns the audio data."""
|
| 50 |
+
try:
|
| 51 |
+
response = _client.models.generate_content(
|
| 52 |
+
model=TTS_MODEL,
|
| 53 |
+
contents=f"Say clearly: {text_to_speak}",
|
| 54 |
+
config=types.GenerateContentConfig(
|
| 55 |
+
response_modalities=["AUDIO"],
|
| 56 |
+
speech_config=types.SpeechConfig(
|
| 57 |
+
voice_config=types.VoiceConfig(
|
| 58 |
+
prebuilt_voice_config=types.PrebuiltVoiceConfig(
|
| 59 |
+
voice_name='Kore',
|
| 60 |
+
)
|
| 61 |
+
)
|
| 62 |
+
),
|
| 63 |
+
)
|
| 64 |
+
)
|
| 65 |
+
return response.candidates[0].content.parts[0].inline_data.data
|
| 66 |
+
except Exception as e:
|
| 67 |
+
st.error(f"Failed to generate narration: {e}")
|
| 68 |
+
return None
|
| 69 |
|
| 70 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 71 |
# 2. SESSION STATE SETUP
|
|
|
|
| 77 |
"done_flags": {}, "notes": {}, "timers": {}, "category": None,
|
| 78 |
"prompt_sent": False, "timer_running": {}, "last_tick": {},
|
| 79 |
"project_title": "", "project_description": "", "upcycling_options": [],
|
| 80 |
+
"plan_approved": False, "initial_plan": "", "user_image": None
|
|
|
|
| 81 |
}
|
| 82 |
|
| 83 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 91 |
"done_flags": {}, "notes": {}, "timers": {}, "category": None,
|
| 92 |
"prompt_sent": False, "timer_running": {}, "last_tick": {},
|
| 93 |
"project_title": "", "project_description": "", "upcycling_options": [],
|
| 94 |
+
"plan_approved": False, "initial_plan": "", "user_image": None
|
|
|
|
| 95 |
}
|
| 96 |
st.success("β
Reset complete!")
|
| 97 |
st.rerun()
|
|
|
|
| 122 |
"Reply with ONLY the category name."
|
| 123 |
)
|
| 124 |
category = send_text_request(CATEGORY_MODEL, category_prompt, image)
|
| 125 |
+
if not category: return
|
|
|
|
| 126 |
st.session_state.app_state['category'] = category
|
| 127 |
|
| 128 |
plan_prompt = f"""
|
|
|
|
| 141 |
[Your plan or 3 options]
|
| 142 |
"""
|
| 143 |
plan_response = send_text_request(GENERATION_MODEL, plan_prompt, image)
|
| 144 |
+
if not plan_response: return
|
|
|
|
| 145 |
|
| 146 |
try:
|
| 147 |
st.session_state.app_state['project_title'] = re.search(r"TITLE:\s*(.*)", plan_response).group(1).strip()
|
|
|
|
| 168 |
"""Generates the detailed guide with steps and illustrations."""
|
| 169 |
image = st.session_state.app_state.get('user_image')
|
| 170 |
if not image:
|
| 171 |
+
st.error("Image not found. Please start over."); return
|
|
|
|
| 172 |
|
| 173 |
context = f"The user has approved the plan for '{st.session_state.app_state['project_title']}'."
|
| 174 |
if selected_option:
|
|
|
|
| 224 |
st.session_state.app_state['timers'][idx] = val * (60 if "minute" in unit else 1)
|
| 225 |
else:
|
| 226 |
st.session_state.app_state['timers'][idx] = 0
|
|
|
|
|
|
|
| 227 |
except Exception as e:
|
| 228 |
st.error(f"Failed to generate or parse the illustrated guide: {str(e)}")
|
| 229 |
|
| 230 |
def render_sidebar_navigation():
|
| 231 |
st.sidebar.markdown("## Steps Navigation")
|
| 232 |
steps = st.session_state.app_state['steps']
|
| 233 |
+
if not steps: return
|
|
|
|
| 234 |
total_steps = len(steps)
|
| 235 |
completed = sum(1 for done in st.session_state.app_state['done_flags'].values() if done)
|
| 236 |
st.sidebar.progress(completed / total_steps if total_steps > 0 else 0)
|
|
|
|
| 253 |
st.markdown(f"### Step {idx} of {total}")
|
| 254 |
st.write(text)
|
| 255 |
|
| 256 |
+
# TTS Integration
|
| 257 |
+
if st.button(f"π Narrate Step {idx}", key=f"tts_{idx}"):
|
| 258 |
+
with st.spinner("Generating narration..."):
|
| 259 |
+
audio_data = generate_tts_audio(client, text)
|
| 260 |
+
if audio_data:
|
| 261 |
+
st.audio(audio_data, format="audio/wav")
|
| 262 |
+
|
| 263 |
if idx in st.session_state.app_state['images']:
|
| 264 |
st.image(
|
| 265 |
st.session_state.app_state['images'][idx],
|
|
|
|
| 267 |
use_container_width=True
|
| 268 |
)
|
| 269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
done = st.checkbox("β
Mark this step as completed", value=st.session_state.app_state['done_flags'].get(idx, False), key=f"done_{idx}")
|
| 271 |
st.session_state.app_state['done_flags'][idx] = done
|
| 272 |
notes = st.text_area("π Your notes for this step:", value=st.session_state.app_state['notes'].get(idx, ""), height=100, key=f"notes_{idx}")
|
|
|
|
| 289 |
|
| 290 |
with st.expander("βΉοΈ How it works", expanded=False):
|
| 291 |
st.write("""
|
| 292 |
+
1. **Upload a photo** of your project or the item you want to fix or build (appliance, car part, plant, craft project).
|
| 293 |
+
2. **(Optional) Describe your goal** for more accurate results.
|
| 294 |
+
3. **Review the Plan.** The AI will propose a plan. If you didnβt provide a description, youβll be asked to approve it.
|
| 295 |
+
4. **Get Your Guide** with tools and illustrated step-by-step instructions.
|
| 296 |
+
5. **Follow the Steps** using the interactive checklist.
|
| 297 |
+
""")
|
| 298 |
|
| 299 |
if not st.session_state.app_state['prompt_sent']:
|
| 300 |
st.markdown("---")
|
|
|
|
| 350 |
if total_steps > 0:
|
| 351 |
progress = done_count / total_steps
|
| 352 |
st.progress(progress)
|
| 353 |
+
st.markdown(f"**Overall Progress:** {done_count} of {total_steps} completed ({progress:.0%})")
|
| 354 |
if done_count == total_steps:
|
| 355 |
st.balloons()
|
| 356 |
st.success("π Congratulations! You've completed all steps!")
|