rairo commited on
Commit
d26d92a
Β·
verified Β·
1 Parent(s): a1027dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +261 -537
app.py CHANGED
@@ -1,573 +1,297 @@
 
 
 
 
 
 
1
  import streamlit as st
2
- from PIL import Image
3
- from io import BytesIO
 
 
 
 
 
 
4
  from google import genai
5
  from google.genai import types
6
- import re
7
- import time
8
- import os
9
- import wave
10
- import io
11
- import tempfile
12
- import base64
13
-
14
- # Disable Streamlit analytics (prevents PermissionError in some environments)
15
- os.environ["STREAMLIT_ANALYTICS_ENABLED"] = "false"
16
 
17
  # ─────────────────────────────────────────────────────────────────────────────
18
- # 1. CONFIGURATION
19
  # ─────────────────────────────────────────────────────────────────────────────
 
 
 
20
 
21
- # 1.1 Load your Google API key from environment or Streamlit secrets
22
- try:
23
- API_KEY = st.secrets["GOOGLE_API_KEY"]
24
- except (AttributeError, KeyError):
25
- API_KEY = os.environ.get("GOOGLE_API_KEY")
 
 
 
 
 
26
 
27
  if not API_KEY:
28
- st.error("Please set GOOGLE_API_KEY in your environment variables or Streamlit secrets")
29
- st.stop()
30
 
31
- # 1.2 Initialize the GenAI client
32
  try:
33
- client = genai.Client(api_key=API_KEY)
34
  except Exception as e:
35
- st.error(f"Failed to initialize GenAI Client: {e}")
36
- st.stop()
37
-
38
- # 1.3 Constants
39
- CATEGORY_MODEL = "gemini-2.0-flash-exp"
40
- GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
41
- TTS_MODEL = "gemini-2.5-flash-preview-tts"
42
-
43
- # 1.4 Helper to parse numbered steps out of Gemini text
44
- def parse_numbered_steps(text):
45
- text = "\n" + text
46
- steps = re.findall(r"\n\s*(\d+).\s*(.*)", text, re.MULTILINE)
47
- return [(int(num), desc.strip()) for num, desc in steps]
48
-
49
- # 1.5 FIXED File Upload Handler
50
- def handle_uploaded_file(uploaded_file):
51
- """Enhanced file handler with better error handling and validation for Hugging Face Spaces."""
52
- if uploaded_file is None:
53
- return None, "No file uploaded"
54
-
55
- try:
56
- # Get file info
57
- file_details = {
58
- "filename": uploaded_file.name,
59
- "filetype": uploaded_file.type,
60
- "filesize": uploaded_file.size
61
- }
62
-
63
- # Validate file size (limit to 5MB for better performance in HF Spaces)
64
- max_size = 5 * 1024 * 1024 # 5MB
65
- if uploaded_file.size > max_size:
66
- return None, f"File size ({uploaded_file.size / 1024 / 1024:.1f}MB) exceeds limit (5MB)"
67
-
68
- # Validate file type more strictly
69
- allowed_types = ['image/jpeg', 'image/jpg', 'image/png', 'image/bmp', 'image/gif']
70
- if uploaded_file.type not in allowed_types:
71
- return None, f"Unsupported file type: {uploaded_file.type}. Allowed: JPG, PNG, BMP, GIF"
72
-
73
- # Read file bytes with error handling
74
- try:
75
- file_bytes = uploaded_file.read()
76
- if len(file_bytes) == 0:
77
- return None, "File appears to be empty"
78
- except Exception as read_error:
79
- return None, f"Error reading file: {str(read_error)}"
80
-
81
- # Reset file pointer for PIL
82
- uploaded_file.seek(0)
83
-
84
- # Try to open and validate the image
85
- try:
86
- image = Image.open(BytesIO(file_bytes))
87
-
88
- # Verify image is valid
89
- image.verify()
90
-
91
- # Reopen for actual use (verify() closes the image)
92
- image = Image.open(BytesIO(file_bytes))
93
-
94
- # Convert to RGB if necessary (handles RGBA, P mode, etc.)
95
- if image.mode not in ('RGB', 'L'):
96
- image = image.convert('RGB')
97
-
98
- # Resize if too large (helps with memory in HF Spaces)
99
- max_dimension = 1024
100
- if max(image.size) > max_dimension:
101
- image.thumbnail((max_dimension, max_dimension), Image.Resampling.LANCZOS)
102
-
103
- return image, "Success"
104
-
105
- except Exception as img_error:
106
- return None, f"Invalid or corrupted image: {str(img_error)}"
107
-
108
- except Exception as e:
109
- return None, f"Unexpected error processing file: {str(e)}"
110
 
111
- # 1.6 TTS Generation Function with better error handling
112
- @st.cache_data
113
- def generate_tts_audio(_client, text_to_speak):
114
- """Generates audio from text using Gemini TTS and returns the audio data and its mime type."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  try:
116
- # Limit text length to prevent timeout
117
- if len(text_to_speak) > 500:
118
- text_to_speak = text_to_speak[:500] + "..."
119
-
120
- response = _client.models.generate_content(
121
  model=TTS_MODEL,
122
- contents=f"Say clearly: {text_to_speak}",
123
  config=types.GenerateContentConfig(
124
  response_modalities=["AUDIO"],
125
  speech_config=types.SpeechConfig(
126
  voice_config=types.VoiceConfig(
127
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
128
- voice_name='Kore',
129
- )
130
  )
131
  ),
132
  )
133
  )
134
- audio_part = response.candidates[0].content.parts[0]
135
- return audio_part.inline_data.data, audio_part.inline_data.mime_type
136
- except Exception as e:
137
- st.error(f"Failed to generate narration: {e}")
138
  return None, None
139
 
140
- # 1.7 NEW HELPER FUNCTION TO CREATE A WAV FILE IN MEMORY
141
- def _convert_pcm_to_wav(pcm_data, sample_rate=24000, channels=1, sample_width=2):
142
- """Wraps raw PCM audio data in a WAV container in memory."""
143
- audio_buffer = io.BytesIO()
144
- with wave.open(audio_buffer, 'wb') as wf:
145
- wf.setnchannels(channels)
146
- wf.setsampwidth(sample_width)
147
- wf.setframerate(sample_rate)
148
- wf.writeframes(pcm_data)
149
- audio_buffer.seek(0)
150
- return audio_buffer.getvalue()
151
 
152
- # ─────────────────────────────────────────────────────────────────────────────
153
- # 2. SESSION STATE SETUP
154
- # ─────────────────────────────────────────────────────────────────────────────
155
 
156
- if "app_state" not in st.session_state:
157
- st.session_state.app_state = {
158
- "steps": [], "images": {}, "tools_list": [], "current_step": 1,
159
- "done_flags": {}, "notes": {}, "timers": {}, "category": None,
160
- "prompt_sent": False, "timer_running": {}, "last_tick": {},
161
- "project_title": "", "project_description": "", "upcycling_options": [],
162
- "plan_approved": False, "initial_plan": "", "user_image": None,
163
- "upload_error": None, "upload_attempts": 0, "last_uploaded_file": None
164
- }
165
 
166
  # ─────────────────────────────────────────────────────────────────────────────
167
- # 3. LAYOUT & FUNCTIONS
168
  # ─────────────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
- def reset_state():
171
- """Clear out all session state so user can start fresh."""
172
- st.session_state.app_state = {
173
- "steps": [], "images": {}, "tools_list": [], "current_step": 1,
174
- "done_flags": {}, "notes": {}, "timers": {}, "category": None,
175
- "prompt_sent": False, "timer_running": {}, "last_tick": {},
176
- "project_title": "", "project_description": "", "upcycling_options": [],
177
- "plan_approved": False, "initial_plan": "", "user_image": None,
178
- "upload_error": None, "upload_attempts": 0, "last_uploaded_file": None
179
- }
180
- st.success("βœ… Reset complete!")
181
- st.rerun()
182
-
183
- def send_text_request(model_name, prompt, image):
184
- """Helper to send requests that expect only a text response."""
185
- try:
186
- chat = client.chats.create(model=model_name)
187
- response = chat.send_message([prompt, image])
188
- response_text = "".join(part.text for part in response.candidates[0].content.parts if part.text)
189
- return response_text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  except Exception as e:
191
- st.error(f"Error with model {model_name}: {str(e)}")
192
- return None
193
-
194
- def initial_analysis(image, context_text):
195
- """First pass with AI: get category, then title, description, and initial plan."""
196
- if image is None:
197
- st.error("No valid image provided for analysis")
198
- return
199
-
200
- st.session_state.app_state['user_image'] = image
201
-
202
- with st.spinner("πŸ€– Analyzing your project and preparing a plan..."):
203
- category_prompt = (
204
- "You are an expert DIY assistant. Analyze the user's image and context. "
205
- f"Context: '{context_text}'. "
206
- "Categorize the project into ONE of the following: "
207
- "Home Appliance Repair, Automotive Maintenance, Gardening & Urban Farming, "
208
- "Upcycling & Sustainable Crafts, or DIY Project Creation. "
209
- "Reply with ONLY the category name."
210
- )
211
- category = send_text_request(CATEGORY_MODEL, category_prompt, image)
212
- if not category: return
213
- st.session_state.app_state['category'] = category
214
-
215
- plan_prompt = f"""
216
- You are an expert DIY assistant in the category: {category}.
217
- User Context: "{context_text if context_text else 'No context provided.'}"
218
- Based on the image and context, perform the following:
219
- 1. **Title:** Create a short, clear title for this project.
220
- 2. **Description:** Write a brief, one-paragraph description of the goal.
221
- 3. **Initial Plan:**
222
- - If 'Upcycling & Sustainable Crafts' AND no specific project is mentioned, propose three distinct project options as a numbered list under "UPCYCLING OPTIONS:".
223
- - For all other cases, briefly outline the main stages of the proposed solution.
224
- Structure your response EXACTLY like this:
225
- TITLE: [Your title]
226
- DESCRIPTION: [Your description]
227
- INITIAL PLAN:
228
- [Your plan or 3 options]
229
- """
230
- plan_response = send_text_request(GENERATION_MODEL, plan_prompt, image)
231
- if not plan_response: return
232
-
233
- try:
234
- st.session_state.app_state['project_title'] = re.search(r"TITLE:\s*(.*)", plan_response).group(1).strip()
235
- st.session_state.app_state['project_description'] = re.search(r"DESCRIPTION:\s*(.*)", plan_response, re.DOTALL).group(1).strip()
236
- initial_plan_text = re.search(r"INITIAL PLAN:\s*(.*)", plan_response, re.DOTALL).group(1).strip()
237
-
238
- if "UPCYCLING OPTIONS:" in initial_plan_text:
239
- options = re.findall(r"^\s*\d+\.\s*(.*)", initial_plan_text, re.MULTILINE)
240
- st.session_state.app_state['upcycling_options'] = options
241
- else:
242
- st.session_state.app_state['initial_plan'] = initial_plan_text
243
-
244
- st.session_state.app_state['prompt_sent'] = True
245
- if context_text:
246
- st.session_state.app_state['plan_approved'] = True
247
- generate_detailed_guide_with_images()
248
- else:
249
- st.session_state.app_state['plan_approved'] = False
250
- except AttributeError:
251
- st.error("The AI response was not in the expected format. Please try again.")
252
- st.session_state.app_state['prompt_sent'] = False
253
-
254
- def generate_detailed_guide_with_images(selected_option=None):
255
- """Generates the detailed guide with steps and illustrations."""
256
- image = st.session_state.app_state.get('user_image')
257
- if not image:
258
- st.error("Image not found. Please start over."); return
259
-
260
- context = f"The user has approved the plan for '{st.session_state.app_state['project_title']}'."
261
- if selected_option:
262
- context = f"The user chose the upcycling project: '{selected_option}'."
263
-
264
- detailed_prompt = f"""
265
- You are a DIY expert. The user wants to proceed with the project titled "{st.session_state.app_state['project_title']}".
266
- {context}
267
- Provide a detailed guide. For each step, you MUST provide a simple, clear illustrative image.
268
- Format your response EXACTLY like this:
269
- TOOLS AND MATERIALS:
270
- - Tool A
271
- - Material B
272
- STEPS(Maximum 7 steps):
273
- 1. First step instructions.
274
- 2. Second step instructions...
275
- """
276
- with st.spinner("πŸ› οΈ Generating your detailed guide with illustrations..."):
277
- try:
278
- chat = client.chats.create(
279
- model=GENERATION_MODEL,
280
- config=types.GenerateContentConfig(response_modalities=["Text", "Image"])
281
- )
282
- full_resp = chat.send_message([detailed_prompt, image])
283
- gen_parts = full_resp.candidates[0].content.parts
284
-
285
- combined_text = ""
286
- inline_images = []
287
- for part in gen_parts:
288
- if part.text is not None:
289
- combined_text += part.text + "\n"
290
- if part.inline_data is not None:
291
- img = Image.open(BytesIO(part.inline_data.data))
292
- inline_images.append(img)
293
- combined_text = combined_text.strip()
294
-
295
- tools_section = re.search(r"TOOLS AND MATERIALS:\s*(.*?)\s*STEPS:", combined_text, re.DOTALL).group(1).strip()
296
- steps_section = re.search(r"STEPS:\s*(.*)", combined_text, re.DOTALL).group(1).strip()
297
- parsed_steps = parse_numbered_steps(steps_section)
298
-
299
- st.session_state.app_state['tools_list'] = [line.strip("- ").strip() for line in tools_section.split('\n') if line.strip()]
300
- st.session_state.app_state['steps'] = parsed_steps
301
- st.session_state.app_state['images'] = {idx: inline_images[idx - 1] for idx, _ in parsed_steps if idx - 1 < len(inline_images)}
302
-
303
- for idx, step_text in parsed_steps:
304
- st.session_state.app_state['done_flags'][idx] = False
305
- st.session_state.app_state['notes'][idx] = ""
306
- timer_match = re.search(r"wait\s+for\s+(\d+)\s+(seconds?|minutes?)", step_text.lower())
307
- if timer_match:
308
- val, unit = int(timer_match.group(1)), timer_match.group(2)
309
- st.session_state.app_state['timers'][idx] = val * (60 if "minute" in unit else 1)
310
- else:
311
- st.session_state.app_state['timers'][idx] = 0
312
- except Exception as e:
313
- st.error(f"Failed to generate or parse the illustrated guide: {str(e)}")
314
-
315
- def render_sidebar_navigation():
316
- st.sidebar.markdown("## Steps Navigation")
317
- steps = st.session_state.app_state['steps']
318
- if not steps: return
319
- total_steps = len(steps)
320
- completed = sum(1 for done in st.session_state.app_state['done_flags'].values() if done)
321
- st.sidebar.progress(completed / total_steps if total_steps > 0 else 0)
322
- st.sidebar.write(f"Progress: {completed}/{total_steps} steps")
323
- for (idx, _) in steps:
324
- is_done = st.session_state.app_state['done_flags'].get(idx, False)
325
- label = f"{'βœ“' if is_done else 'Β·'} Step {idx}"
326
- if st.sidebar.button(label, key=f"nav_{idx}"):
327
- st.session_state.app_state['current_step'] = idx
328
- st.rerun()
329
-
330
- def render_tools_list():
331
- if st.session_state.app_state['tools_list']:
332
- with st.expander("πŸ”§ Required Tools & Materials", expanded=True):
333
- for item in st.session_state.app_state['tools_list']:
334
- st.markdown(f"- {item}")
335
-
336
- def render_step(idx, text):
337
- total = len(st.session_state.app_state['steps'])
338
- st.markdown(f"### Step {idx} of {total}")
339
- st.write(text)
340
-
341
- # FINALIZED TTS Integration
342
- if st.button(f"πŸ”Š Narrate Step {idx}", key=f"tts_{idx}"):
343
- with st.spinner("Generating narration..."):
344
- audio_data, mime_type = generate_tts_audio(client, text)
345
-
346
- if audio_data:
347
- # Check if the audio is raw PCM data
348
- if 'L16' in mime_type or 'pcm' in mime_type:
349
- st.info("Raw audio format detected. Converting to WAV for playback...")
350
- # Convert the raw PCM data to a playable WAV format
351
- wav_data = _convert_pcm_to_wav(audio_data)
352
- st.audio(wav_data, format="audio/wav")
353
- else:
354
- # If it's already in a standard format (like mp3, ogg), play it directly
355
- st.audio(audio_data, format=mime_type)
356
- else:
357
- st.error("Could not generate audio.")
358
-
359
- if idx in st.session_state.app_state['images']:
360
- st.image(
361
- st.session_state.app_state['images'][idx],
362
- caption=f"Illustration for step {idx}",
363
- use_container_width=True
364
- )
365
-
366
- done = st.checkbox("βœ… Mark this step as completed", value=st.session_state.app_state['done_flags'].get(idx, False), key=f"done_{idx}")
367
- st.session_state.app_state['done_flags'][idx] = done
368
- notes = st.text_area("πŸ“ Your notes for this step:", value=st.session_state.app_state['notes'].get(idx, ""), height=100, key=f"notes_{idx}")
369
- st.session_state.app_state['notes'][idx] = notes
370
- st.markdown("---")
371
- col1, col2, col3 = st.columns([1, 2, 1])
372
- if idx > 1 and col1.button("⬅️ Previous", key=f"prev_{idx}"):
373
- st.session_state.app_state['current_step'] -= 1
374
- st.rerun()
375
- if idx < total and col3.button("Next ➑️", key=f"next_{idx}"):
376
- st.session_state.app_state['current_step'] += 1
377
- st.rerun()
378
 
379
  # ─────────────────────────────────────────────────────────────────────────────
380
- # 4. APP LAYOUT - FIXED UPLOAD SECTION
381
  # ─────────────────────────────────────────────────────────────────────────────
382
-
383
- st.set_page_config(page_title="NeoFix DIY Assistant", page_icon="πŸ› οΈ", layout="wide")
384
- st.title("πŸ› οΈ NeoFix AI-Powered DIY Assistant")
385
-
386
- with st.expander("ℹ️ How it works", expanded=False):
387
- st.write("""
388
- 1. **Upload a photo** of your project or the item you want to fix or build (appliance, car part, plant, craft project).
389
- 2. **(Optional) Describe your goal** for more accurate results.
390
- 3. **Review the Plan.** The AI will propose a plan. If you didn't provide a description, you'll be asked to approve it.
391
- 4. **Get Your Guide** with tools and illustrated step-by-step instructions.
392
- 5. **Follow the Steps** using the interactive checklist.
393
- """)
394
-
395
- if not st.session_state.app_state['prompt_sent']:
396
- st.markdown("---")
397
- col1, col2 = st.columns([3, 1])
398
-
399
- with col1:
400
- st.markdown("### πŸ“· Upload Project Image")
401
-
402
- # Show upload status
403
- if st.session_state.app_state.get('upload_error'):
404
- st.error(f"Upload Error: {st.session_state.app_state['upload_error']}")
405
-
406
- if st.session_state.app_state.get('upload_attempts', 0) > 0:
407
- st.info(f"Upload attempts: {st.session_state.app_state['upload_attempts']}")
408
-
409
- # IMPROVED File uploader with unique key to force refresh
410
- upload_key = f"file_upload_{st.session_state.app_state.get('upload_attempts', 0)}"
411
- uploaded_image = st.file_uploader(
412
- "Choose an image file",
413
- type=["jpg", "jpeg", "png", "bmp", "gif"],
414
- accept_multiple_files=False,
415
- key=upload_key,
416
- help="Supported: JPG, PNG, BMP, GIF (max 5MB)"
417
- )
418
-
419
- # Process uploaded image immediately
420
- processed_image = None
421
- upload_status = ""
422
-
423
- if uploaded_image is not None:
424
- # Check if this is a new file upload
425
- current_file_id = f"{uploaded_image.name}_{uploaded_image.size}"
426
- if current_file_id != st.session_state.app_state.get('last_uploaded_file'):
427
- st.session_state.app_state['last_uploaded_file'] = current_file_id
428
-
429
- with st.spinner("Processing uploaded image..."):
430
- processed_image, upload_status = handle_uploaded_file(uploaded_image)
431
-
432
- if processed_image is not None:
433
- st.session_state.app_state['upload_error'] = None
434
- st.success("βœ… Image uploaded and processed successfully!")
435
- st.image(processed_image, caption="Uploaded image preview", use_container_width=True)
436
- else:
437
- st.session_state.app_state['upload_error'] = upload_status
438
- st.session_state.app_state['upload_attempts'] += 1
439
- st.error(f"❌ {upload_status}")
440
- else:
441
- # File already processed, show cached result
442
- if st.session_state.app_state.get('upload_error') is None:
443
- processed_image, _ = handle_uploaded_file(uploaded_image)
444
- if processed_image:
445
- st.success("βœ… Image ready for analysis!")
446
- st.image(processed_image, caption="Uploaded image preview", use_container_width=True)
447
-
448
- # Alternative camera input
449
- st.markdown("##### Alternative: Take a photo")
450
- camera_image = st.camera_input("Take a picture", key=f"camera_{st.session_state.app_state.get('upload_attempts', 0)}")
451
- if camera_image and not uploaded_image:
452
- with st.spinner("Processing camera image..."):
453
- processed_image, upload_status = handle_uploaded_file(camera_image)
454
- if processed_image is not None:
455
- st.session_state.app_state['upload_error'] = None
456
- st.success("βœ… Photo captured and processed!")
457
- st.image(processed_image, caption="Camera photo preview", use_container_width=True)
458
- else:
459
- st.error(f"❌ {upload_status}")
460
-
461
- context_text = st.text_area(
462
- "✏️ Describe the issue or your goal (optional but recommended)",
463
- height=80,
464
- placeholder="e.g., 'My toaster won't turn on,' or 'How do I build a desk like this?'"
465
- )
466
-
467
- with col2:
468
- st.markdown("### Actions")
469
-
470
- # Get AI Guidance button - only enabled when image is ready
471
- has_valid_image = (uploaded_image is not None or camera_image is not None) and st.session_state.app_state.get('upload_error') is None
472
-
473
- if st.button(
474
- "πŸš€ Get AI Guidance",
475
- type="primary",
476
- use_container_width=True,
477
- disabled=not has_valid_image
478
- ):
479
- image_to_analyze = None
480
-
481
- # Determine which image to use
482
- if uploaded_image:
483
- image_to_analyze, status = handle_uploaded_file(uploaded_image)
484
- elif camera_image:
485
- image_to_analyze, status = handle_uploaded_file(camera_image)
486
-
487
- if image_to_analyze is not None:
488
- initial_analysis(image_to_analyze, context_text)
489
- st.rerun()
490
- else:
491
- st.error(f"❌ Image processing failed: {status}")
492
-
493
- # Status message for button
494
- if not has_valid_image:
495
- if uploaded_image is None and camera_image is None:
496
- st.warning("⚠️ Please upload an image first!")
497
- elif st.session_state.app_state.get('upload_error'):
498
- st.warning("⚠️ Fix upload error first!")
499
-
500
- # Troubleshooting section
501
- with st.expander("πŸ”§ Upload Troubleshooting"):
502
- st.markdown("""
503
- **Common fixes:**
504
- 1. **Refresh upload**: Click button below
505
- 2. **Check file size**: Max 5MB
506
- 3. **Try different format**: JPG works best
507
- 4. **Use camera**: If file upload fails
508
- 5. **Clear browser cache**: Ctrl+Shift+Delete
509
- """)
510
-
511
- if st.button("πŸ”„ Reset Upload", use_container_width=True):
512
- st.session_state.app_state['upload_attempts'] = 0
513
- st.session_state.app_state['upload_error'] = None
514
- st.session_state.app_state['last_uploaded_file'] = None
515
- st.rerun()
516
-
517
- # Debug info
518
- if st.checkbox("Show debug info"):
519
- st.json({
520
- "upload_attempts": st.session_state.app_state.get('upload_attempts', 0),
521
- "upload_error": st.session_state.app_state.get('upload_error'),
522
- "last_file": st.session_state.app_state.get('last_uploaded_file'),
523
- "has_uploaded_file": uploaded_image is not None,
524
- "has_camera_image": camera_image is not None
525
- })
526
-
527
- if st.button("πŸ”„ Start Over", use_container_width=True):
528
- reset_state()
529
- else:
530
- render_sidebar_navigation()
531
- st.markdown("---")
532
- st.markdown(f"### {st.session_state.app_state.get('project_title', 'Your Project')}")
533
- st.markdown(f"**Category:** `{st.session_state.app_state.get('category', 'N/A')}`")
534
- st.info(f"**Description:** {st.session_state.app_state.get('project_description', 'N/A')}")
535
- st.markdown("---")
536
-
537
- if not st.session_state.app_state['steps']:
538
- if st.session_state.app_state['upcycling_options']:
539
- st.markdown("#### The AI has suggested a few projects. Please choose one:")
540
- for i, option in enumerate(st.session_state.app_state['upcycling_options']):
541
- if st.button(option, key=f"option_{i}"):
542
- generate_detailed_guide_with_images(selected_option=option)
543
- st.rerun()
544
- elif not st.session_state.app_state['plan_approved']:
545
- st.markdown("#### The AI has proposed the following plan:")
546
- st.success(st.session_state.app_state['initial_plan'])
547
- if st.button("βœ… Looks good, proceed with this plan", type="primary"):
548
- st.session_state.app_state['plan_approved'] = True
549
- generate_detailed_guide_with_images()
550
- st.rerun()
551
  else:
552
- render_tools_list()
553
- st.markdown("---")
554
- current_step_index = st.session_state.app_state['current_step']
555
- try:
556
- step_num, step_text = st.session_state.app_state['steps'][current_step_index - 1]
557
- render_step(step_num, step_text)
558
- except IndexError:
559
- st.session_state.app_state['current_step'] = 1
560
- st.rerun()
561
-
562
- total_steps = len(st.session_state.app_state['steps'])
563
- done_count = sum(1 for d in st.session_state.app_state['done_flags'].values() if d)
564
- if total_steps > 0:
565
- progress = done_count / total_steps
566
- st.progress(progress)
567
- st.markdown(f"**Overall Progress:** {done_count} of {total_steps} completed ({progress:.0%})")
568
- if done_count == total_steps:
569
- st.balloons()
570
- st.success("πŸŽ‰ Congratulations! You've completed all steps!")
571
-
572
- if st.button("πŸ”„ Start Over"):
573
- reset_state()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ###############################################################################
2
+ # Sozo Business Studio Β· AI transforms business data into compelling narratives
3
+ ###############################################################################
4
+ import os, re, json, hashlib, uuid, asyncio, base64, io, tempfile, wave
5
+ from pathlib import Path
6
+
7
  import streamlit as st
8
+ import pandas as pd
9
+ import matplotlib
10
+ matplotlib.use("Agg")
11
+ import matplotlib.pyplot as plt
12
+ from fpdf import FPDF, HTMLMixin
13
+ from markdown_it import MarkdownIt
14
+ from pptx import Presentation
15
+ from pptx.util import Inches, Pt
16
  from google import genai
17
  from google.genai import types
18
+ from google.adk.agents import LlmAgent, SequentialAgent
19
+ from google.adk.sessions import InMemorySessionService
20
+ from google.adk.runners import Runner
21
+ from langchain_experimental.agents import create_pandas_dataframe_agent
22
+ from langchain_google_genai import ChatGoogleGenerativeAI
 
 
 
 
 
23
 
24
  # ─────────────────────────────────────────────────────────────────────────────
25
+ # PAGE CONFIG
26
  # ─────────────────────────────────────────────────────────────────────────────
27
+ st.set_page_config(page_title="Sozo Business Studio", layout="wide")
28
+ st.title("πŸ“Š Sozo Business Studio")
29
+ st.caption("AI transforms business data into compelling narratives.")
30
 
31
+ # ─────────────────────────────────────────────────────────────────────────────
32
+ # CONSTANTS
33
+ # ─────────────────────────────────────────────────────────────────────────────
34
+ FONT_DIR = Path(__file__).parent if "__file__" in globals() else Path(".")
35
+ FONT_REG = FONT_DIR / "NotoSans-Regular.ttf"
36
+ FONT_BLD = FONT_DIR / "NotoSans-Bold.ttf"
37
+ FONT_FAM = "NotoSans"
38
+ SLIDES = 7
39
+ TTS_MODEL = "gemini-2.5-flash-preview-tts"
40
+ API_KEY = os.getenv("GEMINI_API_KEY")
41
 
42
  if not API_KEY:
43
+ st.error("GEMINI_API_KEY not set"); st.stop()
 
44
 
 
45
  try:
46
+ GEM = genai.Client(api_key=API_KEY)
47
  except Exception as e:
48
+ st.error(f"GenAI init failed: {e}"); st.stop()
49
+
50
+ # ─────────────────────────────────────────────────────────────────────────────
51
+ # SESSION STATE
52
+ # ─────────────────────────────────────────────────────────────────────────────
53
+ st.session_state.setdefault("bundles", {})
54
+ st.session_state.setdefault("slide_idx", 0)
55
+
56
+ # ─────────────────────────────────────────────────────────────────────────────
57
+ # HELPERS
58
+ # ─────────────────────────────────────────────────────────────────────────────
59
+ sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ def fix_bullet(text: str) -> str:
62
+ subs = {
63
+ "\x95": "β€’", "\x96": "-", "\x97": "β€”",
64
+ "\x91": "'", "\x92": "'", "\x93": '"', "\x94": '"'
65
+ }
66
+ for bad, good in subs.items():
67
+ text = text.replace(bad, good)
68
+ return re.sub(r'[\x80-\x9f]', '', text)
69
+
70
+ def convert_pcm_to_wav(pcm: bytes, rate=24_000, ch=1, width=2) -> bytes:
71
+ buf = io.BytesIO()
72
+ with wave.open(buf, "wb") as wf:
73
+ wf.setnchannels(ch); wf.setsampwidth(width); wf.setframerate(rate); wf.writeframes(pcm)
74
+ buf.seek(0); return buf.getvalue()
75
+
76
+ @st.cache_data(show_spinner=False)
77
+ def generate_tts_audio(_client, txt: str):
78
+ txt = re.sub(r'[^\w\s\.,!?;:-]', '', txt)[:500]
79
+ if not txt: return None, None
80
  try:
81
+ resp = _client.models.generate_content(
 
 
 
 
82
  model=TTS_MODEL,
83
+ contents=f"Say clearly: {txt}",
84
  config=types.GenerateContentConfig(
85
  response_modalities=["AUDIO"],
86
  speech_config=types.SpeechConfig(
87
  voice_config=types.VoiceConfig(
88
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Kore")
 
 
89
  )
90
  ),
91
  )
92
  )
93
+ part = resp.candidates[0].content.parts[0]
94
+ return part.inline_data.data, part.inline_data.mime_type
95
+ except Exception:
 
96
  return None, None
97
 
98
+ # robust, single-pass regex – matches <> or [] and optional quotes/spaces
99
+ TAG_RE = re.compile(
100
+ r'[<\[]\s*generate_?chart\s*[:=]?\s*["\']?\s*([^>\]"\']+?)\s*["\']?\s*[>\]]',
101
+ flags=re.IGNORECASE
102
+ )
 
 
 
 
 
 
103
 
104
+ def extract_chart_tags(text: str) -> list[str]:
105
+ return list(dict.fromkeys(TAG_RE.findall(text))) # de-dupe while preserving order
 
106
 
107
+ def replace_chart_tags(text: str, cmap: dict[str, str], repl):
108
+ return TAG_RE.sub(lambda m: repl(cmap[m.group(1)]) if m.group(1) in cmap else m.group(0), text)
 
 
 
 
 
 
 
109
 
110
  # ─────────────────────────────────────────────────────────────────────────────
111
+ # PDF / PPTX BUILDERS
112
  # ─────────────────────────────────────────────────────────────────────────────
113
+ class _PDF(FPDF, HTMLMixin): pass
114
+
115
+ def build_pdf(markdown: str, cmap: dict[str, str]) -> bytes:
116
+ markdown = fix_bullet(markdown).replace("β€’", "*")
117
+ markdown = replace_chart_tags(markdown, cmap, lambda p: f'<img src="{p}">')
118
+ html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(markdown)
119
+
120
+ pdf = _PDF(); pdf.set_auto_page_break(True, 15)
121
+ fonts_ok = False
122
+ for s, ttf in [("", FONT_REG), ("B", FONT_BLD)]:
123
+ if ttf.exists():
124
+ try: pdf.add_font(FONT_FAM, s, str(ttf), uni=True); fonts_ok = True
125
+ except: pass
126
+ if fonts_ok: pdf.set_fallback_fonts([FONT_FAM])
127
+
128
+ pdf.add_page()
129
+ pdf.set_font(FONT_FAM if fonts_ok else "Arial", "B", 18)
130
+ pdf.cell(0, 12, "AI-Generated Business Report", ln=True); pdf.ln(3)
131
+ pdf.set_font(FONT_FAM if fonts_ok else "Arial", "", 11)
132
+
133
+ # shrink big tables / images
134
+ html = re.sub(r'(<table[^>]*>)', r'\1<font size="8">', html)
135
+ pdf.write_html(html)
136
+ return pdf.output(dest="S").encode("latin-1", "ignore")
137
+
138
+ def build_pptx(slides: tuple[str, ...], cmap: dict[str, str]) -> bytes:
139
+ prs = Presentation(); layout = prs.slide_layouts[1]
140
+ for raw in slides:
141
+ if not raw.strip(): continue
142
+ raw = fix_bullet(raw)
143
+ chart_here = extract_chart_tags(raw)
144
+ title, *body_lines = [l.strip(" -β€’") for l in raw.splitlines() if l.strip()]
145
+
146
+ slide = prs.slides.add_slide(layout)
147
+ slide.shapes.title.text = title or "Slide"
148
+
149
+ # body text
150
+ tf = slide.shapes.placeholders[1].text_frame; tf.clear(); tf.word_wrap = True
151
+ for line in body_lines:
152
+ if "generate_chart" in line.lower(): continue
153
+ p = tf.add_paragraph(); p.text = line; p.font.size = Pt(20)
154
+
155
+ # first matching chart
156
+ for tag in chart_here:
157
+ if tag in cmap:
158
+ slide.shapes.add_picture(cmap[tag], Inches(1), Inches(3.5), width=Inches(8))
159
+ break
160
+ buf = io.BytesIO(); prs.save(buf); return buf.getvalue()
161
 
162
+ # ───────────────────────���─────────────────────────────────────────────────────
163
+ # MAIN GENERATION (unchanged business logic, but sturdier asyncio)
164
+ # ─────────────────────────────────────────────────────────────────────────────
165
+ @st.cache_data(show_spinner=False)
166
+ def generate_assets(key, file_bytes, filename, mode, ctx):
167
+ # read file
168
+ df = pd.read_excel(io.BytesIO(file_bytes)) if filename.lower().endswith(".xlsx") else pd.read_csv(io.BytesIO(file_bytes))
169
+
170
+ # prompts
171
+ report_prompt = """You are a senior business analyst … <generate_chart: "bar chart of sales by region"> …"""
172
+ pres_prompt = f"""Create exactly {SLIDES} concise slides … one chart tag per slide …"""
173
+
174
+ agents = []
175
+ if mode in ("Report", "Both"):
176
+ agents.append(LlmAgent("ReportAgent", "gemini-2.5-flash", report_prompt))
177
+ if mode in ("Presentation", "Both"):
178
+ agents.append(LlmAgent("PresentationAgent", "gemini-2.5-flash", pres_prompt))
179
+ root = SequentialAgent("Pipeline", sub_agents=agents)
180
+
181
+ async def _go():
182
+ svc = InMemorySessionService(); sid = str(uuid.uuid4())
183
+ await svc.create_session(app_name="studio", user_id="u", session_id=sid)
184
+ runner = Runner(root, "studio", svc)
185
+ ctx_payload = types.Content(role="user", parts=[types.Part(text=json.dumps({
186
+ "shape": df.shape, "columns": list(df.columns), "types": df.dtypes.astype(str).to_dict(),
187
+ "sample": df.head(3).to_dict(), "user_ctx": ctx or "None"
188
+ }, indent=2))])
189
+ out = {}
190
+ async for ev in runner.run_async(user_id="u", session_id=sid, new_message=ctx_payload):
191
+ if ev.is_final_response():
192
+ out[ev.author] = ev.content.parts[0].text
193
+ return out
194
+
195
+ try: out = asyncio.run(_go())
196
  except Exception as e:
197
+ st.error(f"Agent run failed: {e}"); return None
198
+
199
+ if not out: st.error("No output"); return None
200
+
201
+ # charts
202
+ chart_tags = extract_chart_tags("\n".join(out.values()))
203
+ cmap = {}
204
+ if chart_tags:
205
+ chart_agent = create_pandas_dataframe_agent(
206
+ llm=ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.1),
207
+ df=df, verbose=False, allow_dangerous_code=True)
208
+ for tag in chart_tags:
209
+ try:
210
+ chart_agent.run(f"Make a {tag} with matplotlib and save it to 'plt'.")
211
+ fig = plt.gcf()
212
+ if fig.get_axes():
213
+ p = Path(tempfile.gettempdir())/f"ch_{uuid.uuid4()}.png"
214
+ fig.savefig(p, dpi=300, bbox_inches="tight", facecolor="white")
215
+ cmap[tag] = str(p)
216
+ plt.close('all')
217
+ except Exception: pass
218
+
219
+ # outputs
220
+ md_preview = pdf_bytes = pptx_bytes = None; slides = []
221
+ if "ReportAgent" in out:
222
+ md_raw = fix_bullet(out["ReportAgent"])
223
+ pdf_bytes = build_pdf(md_raw, cmap)
224
+ md_preview = replace_chart_tags(md_raw, cmap, lambda p: f'<img src="data:image/png;base64,{base64.b64encode(open(p,"rb").read()).decode()}">')
225
+
226
+ if "PresentationAgent" in out:
227
+ raw = fix_bullet(out["PresentationAgent"])
228
+ parts = re.split(r'(?i)(?=^\s*slide\s+\d+)', raw, flags=re.MULTILINE)
229
+ slides = [p.strip() for p in parts if p.strip()]
230
+ if slides: pptx_bytes = build_pptx(tuple(slides), cmap)
231
+
232
+ return {"preview_md": md_preview, "pdf": pdf_bytes, "slides": slides,
233
+ "pptx": pptx_bytes, "chart_count": len(cmap), "key": key}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
  # ─────────────────────────────────────────────────────────────────────────────
236
+ # UI
237
  # ─────────────────────────────────────────────────────────────────────────────
238
+ mode = st.radio("Choose output format:", ["Report", "Presentation", "Both"], horizontal=True, index=2)
239
+ upl = st.file_uploader("Upload business data", ["csv", "xlsx"])
240
+ ctx = st.text_area("Business context (optional)")
241
+
242
+ if st.button("πŸš€ Generate Narrative", type="primary"):
243
+ if not upl: st.warning("Please upload a file"); st.stop()
244
+ key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
245
+ bundle = generate_assets(key, upl.getvalue(), upl.name, mode, ctx)
246
+ if not bundle: st.stop()
247
+ if bundle["chart_count"]: st.success(f"βœ… Generated {bundle['chart_count']} charts")
248
+
249
+ # ensure slide index reset when new bundle arrives
250
+ if st.session_state.get("bundle_key") != key:
251
+ st.session_state["bundle_key"] = key
252
+ st.session_state["slide_idx"] = 0
253
+
254
+ # Tabs
255
+ if mode == "Both":
256
+ tab_rep, tab_pre = st.tabs(["πŸ“„ Report", "πŸ“‘ Slides"])
257
+ elif mode == "Report":
258
+ tab_rep = st.container(); tab_pre = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  else:
260
+ tab_pre = st.container(); tab_rep = None
261
+
262
+ # Report tab
263
+ if tab_rep:
264
+ with tab_rep:
265
+ st.subheader("Generated Report")
266
+ st.markdown(bundle["preview_md"] or "_no report_", unsafe_allow_html=True)
267
+ if bundle["pdf"]:
268
+ st.download_button("⬇️ PDF", bundle["pdf"], "business_report.pdf", "application/pdf")
269
+
270
+ # Slides tab
271
+ if tab_pre:
272
+ with tab_pre:
273
+ slides = bundle["slides"]; n = len(slides)
274
+ if not slides: st.warning("No slides created"); st.stop()
275
+
276
+ idx = st.session_state["slide_idx"]
277
+ st.markdown(f"##### Slide {idx+1}/{n}")
278
+ text = replace_chart_tags(slides[idx], {}, lambda _: "")
279
+ st.markdown(text)
280
+
281
+ # Narrate
282
+ if st.button("πŸ”Š Narrate", key=f"tts_{idx}"):
283
+ audio, mtype = generate_tts_audio(GEM, re.sub(r'\n+', '. ', text))
284
+ if audio:
285
+ if 'pcm' in (mtype or '').lower() or 'l16' in (mtype or '').lower():
286
+ audio = convert_pcm_to_wav(audio); mtype = "audio/wav"
287
+ st.audio(audio, format=mtype)
288
+
289
+ # Nav buttons
290
+ c1, c2, c3 = st.columns([1,2,1])
291
+ with c1: st.button("⬅️ Prev", on_click=lambda: st.session_state.update(slide_idx=max(idx-1,0)), disabled=idx==0)
292
+ with c3: st.button("Next ➑️", on_click=lambda: st.session_state.update(slide_idx=min(idx+1,n-1)), disabled=idx==n-1)
293
+
294
+ if bundle["pptx"]:
295
+ st.download_button("⬇️ PowerPoint", bundle["pptx"],
296
+ "business_presentation.pptx",
297
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation")