import streamlit as st from PIL import Image from io import BytesIO from google import genai from google.genai import types import re import time import os import wave import io import tempfile import base64 # Disable Streamlit analytics (prevents PermissionError in some environments) os.environ["STREAMLIT_ANALYTICS_ENABLED"] = "false" # ───────────────────────────────────────────────────────────────────────────── # 1. CONFIGURATION # ───────────────────────────────────────────────────────────────────────────── # 1.1 Load your Google API key from environment or Streamlit secrets try: API_KEY = st.secrets["GOOGLE_API_KEY"] except (AttributeError, KeyError): API_KEY = os.environ.get("GOOGLE_API_KEY") if not API_KEY: st.error("Please set GOOGLE_API_KEY in your environment variables or Streamlit secrets") st.stop() # 1.2 Initialize the GenAI client try: client = genai.Client(api_key=API_KEY) except Exception as e: st.error(f"Failed to initialize GenAI Client: {e}") st.stop() # 1.3 Constants CATEGORY_MODEL = "gemini-2.0-flash-exp" GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation" TTS_MODEL = "gemini-2.5-flash-preview-tts" # 1.4 Helper to parse numbered steps out of Gemini text def parse_numbered_steps(text): text = "\n" + text steps = re.findall(r"\n\s*(\d+).\s*(.*)", text, re.MULTILINE) return [(int(num), desc.strip()) for num, desc in steps] # 1.5 FIXED File Upload Handler def handle_uploaded_file(uploaded_file): """Enhanced file handler with better error handling and validation for Hugging Face Spaces.""" if uploaded_file is None: return None, "No file uploaded" try: # Get file info file_details = { "filename": uploaded_file.name, "filetype": uploaded_file.type, "filesize": uploaded_file.size } # Validate file size (limit to 5MB for better performance in HF Spaces) max_size = 5 * 1024 * 1024 # 5MB if uploaded_file.size > max_size: return None, f"File size ({uploaded_file.size / 1024 / 1024:.1f}MB) exceeds limit (5MB)" # Validate file type more strictly allowed_types = ['image/jpeg', 'image/jpg', 'image/png', 'image/bmp', 'image/gif'] if uploaded_file.type not in allowed_types: return None, f"Unsupported file type: {uploaded_file.type}. Allowed: JPG, PNG, BMP, GIF" # Read file bytes with error handling try: file_bytes = uploaded_file.read() if len(file_bytes) == 0: return None, "File appears to be empty" except Exception as read_error: return None, f"Error reading file: {str(read_error)}" # Reset file pointer for PIL uploaded_file.seek(0) # Try to open and validate the image try: image = Image.open(BytesIO(file_bytes)) # Verify image is valid image.verify() # Reopen for actual use (verify() closes the image) image = Image.open(BytesIO(file_bytes)) # Convert to RGB if necessary (handles RGBA, P mode, etc.) if image.mode not in ('RGB', 'L'): image = image.convert('RGB') # Resize if too large (helps with memory in HF Spaces) max_dimension = 1024 if max(image.size) > max_dimension: image.thumbnail((max_dimension, max_dimension), Image.Resampling.LANCZOS) return image, "Success" except Exception as img_error: return None, f"Invalid or corrupted image: {str(img_error)}" except Exception as e: return None, f"Unexpected error processing file: {str(e)}" # 1.6 TTS Generation Function with better error handling @st.cache_data def generate_tts_audio(_client, text_to_speak): """Generates audio from text using Gemini TTS and returns the audio data and its mime type.""" try: # Limit text length to prevent timeout if len(text_to_speak) > 500: text_to_speak = text_to_speak[:500] + "..." response = _client.models.generate_content( model=TTS_MODEL, contents=f"Say clearly: {text_to_speak}", config=types.GenerateContentConfig( response_modalities=["AUDIO"], speech_config=types.SpeechConfig( voice_config=types.VoiceConfig( prebuilt_voice_config=types.PrebuiltVoiceConfig( voice_name='Kore', ) ) ), ) ) audio_part = response.candidates[0].content.parts[0] return audio_part.inline_data.data, audio_part.inline_data.mime_type except Exception as e: st.error(f"Failed to generate narration: {e}") return None, None # 1.7 NEW HELPER FUNCTION TO CREATE A WAV FILE IN MEMORY def _convert_pcm_to_wav(pcm_data, sample_rate=24000, channels=1, sample_width=2): """Wraps raw PCM audio data in a WAV container in memory.""" audio_buffer = io.BytesIO() with wave.open(audio_buffer, 'wb') as wf: wf.setnchannels(channels) wf.setsampwidth(sample_width) wf.setframerate(sample_rate) wf.writeframes(pcm_data) audio_buffer.seek(0) return audio_buffer.getvalue() # ───────────────────────────────────────────────────────────────────────────── # 2. SESSION STATE SETUP # ───────────────────────────────────────────────────────────────────────────── if "app_state" not in st.session_state: st.session_state.app_state = { "steps": [], "images": {}, "tools_list": [], "current_step": 1, "done_flags": {}, "notes": {}, "timers": {}, "category": None, "prompt_sent": False, "timer_running": {}, "last_tick": {}, "project_title": "", "project_description": "", "upcycling_options": [], "plan_approved": False, "initial_plan": "", "user_image": None, "upload_error": None, "upload_attempts": 0, "last_uploaded_file": None } # ───────────────────────────────────────────────────────────────────────────── # 3. LAYOUT & FUNCTIONS # ───────────────────────────────────────────────────────────────────────────── def reset_state(): """Clear out all session state so user can start fresh.""" st.session_state.app_state = { "steps": [], "images": {}, "tools_list": [], "current_step": 1, "done_flags": {}, "notes": {}, "timers": {}, "category": None, "prompt_sent": False, "timer_running": {}, "last_tick": {}, "project_title": "", "project_description": "", "upcycling_options": [], "plan_approved": False, "initial_plan": "", "user_image": None, "upload_error": None, "upload_attempts": 0, "last_uploaded_file": None } st.success("✅ Reset complete!") st.rerun() def send_text_request(model_name, prompt, image): """Helper to send requests that expect only a text response.""" try: chat = client.chats.create(model=model_name) response = chat.send_message([prompt, image]) response_text = "".join(part.text for part in response.candidates[0].content.parts if part.text) return response_text.strip() except Exception as e: st.error(f"Error with model {model_name}: {str(e)}") return None def initial_analysis(image, context_text): """First pass with AI: get category, then title, description, and initial plan.""" if image is None: st.error("No valid image provided for analysis") return st.session_state.app_state['user_image'] = image with st.spinner("🤖 Analyzing your project and preparing a plan..."): category_prompt = ( "You are an expert DIY assistant. Analyze the user's image and context. " f"Context: '{context_text}'. " "Categorize the project into ONE of the following: " "Home Appliance Repair, Automotive Maintenance, Gardening & Urban Farming, " "Upcycling & Sustainable Crafts, or DIY Project Creation. " "Reply with ONLY the category name." ) category = send_text_request(CATEGORY_MODEL, category_prompt, image) if not category: return st.session_state.app_state['category'] = category plan_prompt = f""" You are an expert DIY assistant in the category: {category}. User Context: "{context_text if context_text else 'No context provided.'}" Based on the image and context, perform the following: 1. **Title:** Create a short, clear title for this project. 2. **Description:** Write a brief, one-paragraph description of the goal. 3. **Initial Plan:** - If 'Upcycling & Sustainable Crafts' AND no specific project is mentioned, propose three distinct project options as a numbered list under "UPCYCLING OPTIONS:". - For all other cases, briefly outline the main stages of the proposed solution. Structure your response EXACTLY like this: TITLE: [Your title] DESCRIPTION: [Your description] INITIAL PLAN: [Your plan or 3 options] """ plan_response = send_text_request(GENERATION_MODEL, plan_prompt, image) if not plan_response: return try: st.session_state.app_state['project_title'] = re.search(r"TITLE:\s*(.*)", plan_response).group(1).strip() st.session_state.app_state['project_description'] = re.search(r"DESCRIPTION:\s*(.*)", plan_response, re.DOTALL).group(1).strip() initial_plan_text = re.search(r"INITIAL PLAN:\s*(.*)", plan_response, re.DOTALL).group(1).strip() if "UPCYCLING OPTIONS:" in initial_plan_text: options = re.findall(r"^\s*\d+\.\s*(.*)", initial_plan_text, re.MULTILINE) st.session_state.app_state['upcycling_options'] = options else: st.session_state.app_state['initial_plan'] = initial_plan_text st.session_state.app_state['prompt_sent'] = True if context_text: st.session_state.app_state['plan_approved'] = True generate_detailed_guide_with_images() else: st.session_state.app_state['plan_approved'] = False except AttributeError: st.error("The AI response was not in the expected format. Please try again.") st.session_state.app_state['prompt_sent'] = False def generate_detailed_guide_with_images(selected_option=None): """Generates the detailed guide with steps and illustrations.""" image = st.session_state.app_state.get('user_image') if not image: st.error("Image not found. Please start over."); return context = f"The user has approved the plan for '{st.session_state.app_state['project_title']}'." if selected_option: context = f"The user chose the upcycling project: '{selected_option}'." detailed_prompt = f""" You are a DIY expert. The user wants to proceed with the project titled "{st.session_state.app_state['project_title']}". {context} Provide a detailed guide. For each step, you MUST provide a simple, clear illustrative image. Format your response EXACTLY like this: TOOLS AND MATERIALS: - Tool A - Material B STEPS(Maximum 7 steps): 1. First step instructions. 2. Second step instructions... """ with st.spinner("🛠️ Generating your detailed guide with illustrations..."): try: chat = client.chats.create( model=GENERATION_MODEL, config=types.GenerateContentConfig(response_modalities=["Text", "Image"]) ) full_resp = chat.send_message([detailed_prompt, image]) gen_parts = full_resp.candidates[0].content.parts combined_text = "" inline_images = [] for part in gen_parts: if part.text is not None: combined_text += part.text + "\n" if part.inline_data is not None: img = Image.open(BytesIO(part.inline_data.data)) inline_images.append(img) combined_text = combined_text.strip() tools_section = re.search(r"TOOLS AND MATERIALS:\s*(.*?)\s*STEPS:", combined_text, re.DOTALL).group(1).strip() steps_section = re.search(r"STEPS:\s*(.*)", combined_text, re.DOTALL).group(1).strip() parsed_steps = parse_numbered_steps(steps_section) st.session_state.app_state['tools_list'] = [line.strip("- ").strip() for line in tools_section.split('\n') if line.strip()] st.session_state.app_state['steps'] = parsed_steps st.session_state.app_state['images'] = {idx: inline_images[idx - 1] for idx, _ in parsed_steps if idx - 1 < len(inline_images)} for idx, step_text in parsed_steps: st.session_state.app_state['done_flags'][idx] = False st.session_state.app_state['notes'][idx] = "" timer_match = re.search(r"wait\s+for\s+(\d+)\s+(seconds?|minutes?)", step_text.lower()) if timer_match: val, unit = int(timer_match.group(1)), timer_match.group(2) st.session_state.app_state['timers'][idx] = val * (60 if "minute" in unit else 1) else: st.session_state.app_state['timers'][idx] = 0 except Exception as e: st.error(f"Failed to generate or parse the illustrated guide: {str(e)}") def render_sidebar_navigation(): st.sidebar.markdown("## Steps Navigation") steps = st.session_state.app_state['steps'] if not steps: return total_steps = len(steps) completed = sum(1 for done in st.session_state.app_state['done_flags'].values() if done) st.sidebar.progress(completed / total_steps if total_steps > 0 else 0) st.sidebar.write(f"Progress: {completed}/{total_steps} steps") for (idx, _) in steps: is_done = st.session_state.app_state['done_flags'].get(idx, False) label = f"{'✓' if is_done else '·'} Step {idx}" if st.sidebar.button(label, key=f"nav_{idx}"): st.session_state.app_state['current_step'] = idx st.rerun() def render_tools_list(): if st.session_state.app_state['tools_list']: with st.expander("🔧 Required Tools & Materials", expanded=True): for item in st.session_state.app_state['tools_list']: st.markdown(f"- {item}") def render_step(idx, text): total = len(st.session_state.app_state['steps']) st.markdown(f"### Step {idx} of {total}") st.write(text) # FINALIZED TTS Integration if st.button(f"🔊 Narrate Step {idx}", key=f"tts_{idx}"): with st.spinner("Generating narration..."): audio_data, mime_type = generate_tts_audio(client, text) if audio_data: # Check if the audio is raw PCM data if 'L16' in mime_type or 'pcm' in mime_type: st.info("Raw audio format detected. Converting to WAV for playback...") # Convert the raw PCM data to a playable WAV format wav_data = _convert_pcm_to_wav(audio_data) st.audio(wav_data, format="audio/wav") else: # If it's already in a standard format (like mp3, ogg), play it directly st.audio(audio_data, format=mime_type) else: st.error("Could not generate audio.") if idx in st.session_state.app_state['images']: st.image( st.session_state.app_state['images'][idx], caption=f"Illustration for step {idx}", use_container_width=True ) done = st.checkbox("✅ Mark this step as completed", value=st.session_state.app_state['done_flags'].get(idx, False), key=f"done_{idx}") st.session_state.app_state['done_flags'][idx] = done notes = st.text_area("📝 Your notes for this step:", value=st.session_state.app_state['notes'].get(idx, ""), height=100, key=f"notes_{idx}") st.session_state.app_state['notes'][idx] = notes st.markdown("---") col1, col2, col3 = st.columns([1, 2, 1]) if idx > 1 and col1.button("⬅️ Previous", key=f"prev_{idx}"): st.session_state.app_state['current_step'] -= 1 st.rerun() if idx < total and col3.button("Next ➡️", key=f"next_{idx}"): st.session_state.app_state['current_step'] += 1 st.rerun() # ───────────────────────────────────────────────────────────────────────────── # 4. APP LAYOUT - FIXED UPLOAD SECTION # ───────────────────────────────────────────────────────────────────────────── st.set_page_config(page_title="NeoFix DIY Assistant", page_icon="🛠️", layout="wide") st.title("🛠️ NeoFix AI-Powered DIY Assistant") with st.expander("ℹ️ How it works", expanded=False): st.write(""" 1. **Upload a photo** of your project or the item you want to fix or build (appliance, car part, plant, craft project). 2. **(Optional) Describe your goal** for more accurate results. 3. **Review the Plan.** The AI will propose a plan. If you didn't provide a description, you'll be asked to approve it. 4. **Get Your Guide** with tools and illustrated step-by-step instructions. 5. **Follow the Steps** using the interactive checklist. """) if not st.session_state.app_state['prompt_sent']: st.markdown("---") col1, col2 = st.columns([3, 1]) with col1: st.markdown("### 📷 Upload Project Image") # Show upload status if st.session_state.app_state.get('upload_error'): st.error(f"Upload Error: {st.session_state.app_state['upload_error']}") if st.session_state.app_state.get('upload_attempts', 0) > 0: st.info(f"Upload attempts: {st.session_state.app_state['upload_attempts']}") # IMPROVED File uploader with unique key to force refresh upload_key = f"file_upload_{st.session_state.app_state.get('upload_attempts', 0)}" uploaded_image = st.file_uploader( "Choose an image file", type=["jpg", "jpeg", "png", "bmp", "gif"], accept_multiple_files=False, key=upload_key, help="Supported: JPG, PNG, BMP, GIF (max 5MB)" ) # Process uploaded image immediately processed_image = None upload_status = "" if uploaded_image is not None: # Check if this is a new file upload current_file_id = f"{uploaded_image.name}_{uploaded_image.size}" if current_file_id != st.session_state.app_state.get('last_uploaded_file'): st.session_state.app_state['last_uploaded_file'] = current_file_id with st.spinner("Processing uploaded image..."): processed_image, upload_status = handle_uploaded_file(uploaded_image) if processed_image is not None: st.session_state.app_state['upload_error'] = None st.success("✅ Image uploaded and processed successfully!") st.image(processed_image, caption="Uploaded image preview", use_container_width=True) else: st.session_state.app_state['upload_error'] = upload_status st.session_state.app_state['upload_attempts'] += 1 st.error(f"❌ {upload_status}") else: # File already processed, show cached result if st.session_state.app_state.get('upload_error') is None: processed_image, _ = handle_uploaded_file(uploaded_image) if processed_image: st.success("✅ Image ready for analysis!") st.image(processed_image, caption="Uploaded image preview", use_container_width=True) # Alternative camera input st.markdown("##### Alternative: Take a photo") camera_image = st.camera_input("Take a picture", key=f"camera_{st.session_state.app_state.get('upload_attempts', 0)}") if camera_image and not uploaded_image: with st.spinner("Processing camera image..."): processed_image, upload_status = handle_uploaded_file(camera_image) if processed_image is not None: st.session_state.app_state['upload_error'] = None st.success("✅ Photo captured and processed!") st.image(processed_image, caption="Camera photo preview", use_container_width=True) else: st.error(f"❌ {upload_status}") context_text = st.text_area( "✏️ Describe the issue or your goal (optional but recommended)", height=80, placeholder="e.g., 'My toaster won't turn on,' or 'How do I build a desk like this?'" ) with col2: st.markdown("### Actions") # Get AI Guidance button - only enabled when image is ready has_valid_image = (uploaded_image is not None or camera_image is not None) and st.session_state.app_state.get('upload_error') is None if st.button( "🚀 Get AI Guidance", type="primary", use_container_width=True, disabled=not has_valid_image ): image_to_analyze = None # Determine which image to use if uploaded_image: image_to_analyze, status = handle_uploaded_file(uploaded_image) elif camera_image: image_to_analyze, status = handle_uploaded_file(camera_image) if image_to_analyze is not None: initial_analysis(image_to_analyze, context_text) st.rerun() else: st.error(f"❌ Image processing failed: {status}") # Status message for button if not has_valid_image: if uploaded_image is None and camera_image is None: st.warning("⚠️ Please upload an image first!") elif st.session_state.app_state.get('upload_error'): st.warning("⚠️ Fix upload error first!") # Troubleshooting section with st.expander("🔧 Upload Troubleshooting"): st.markdown(""" **Common fixes:** 1. **Refresh upload**: Click button below 2. **Check file size**: Max 5MB 3. **Try different format**: JPG works best 4. **Use camera**: If file upload fails 5. **Clear browser cache**: Ctrl+Shift+Delete """) if st.button("🔄 Reset Upload", use_container_width=True): st.session_state.app_state['upload_attempts'] = 0 st.session_state.app_state['upload_error'] = None st.session_state.app_state['last_uploaded_file'] = None st.rerun() # Debug info if st.checkbox("Show debug info"): st.json({ "upload_attempts": st.session_state.app_state.get('upload_attempts', 0), "upload_error": st.session_state.app_state.get('upload_error'), "last_file": st.session_state.app_state.get('last_uploaded_file'), "has_uploaded_file": uploaded_image is not None, "has_camera_image": camera_image is not None }) if st.button("🔄 Start Over", use_container_width=True): reset_state() else: render_sidebar_navigation() st.markdown("---") st.markdown(f"### {st.session_state.app_state.get('project_title', 'Your Project')}") st.markdown(f"**Category:** `{st.session_state.app_state.get('category', 'N/A')}`") st.info(f"**Description:** {st.session_state.app_state.get('project_description', 'N/A')}") st.markdown("---") if not st.session_state.app_state['steps']: if st.session_state.app_state['upcycling_options']: st.markdown("#### The AI has suggested a few projects. Please choose one:") for i, option in enumerate(st.session_state.app_state['upcycling_options']): if st.button(option, key=f"option_{i}"): generate_detailed_guide_with_images(selected_option=option) st.rerun() elif not st.session_state.app_state['plan_approved']: st.markdown("#### The AI has proposed the following plan:") st.success(st.session_state.app_state['initial_plan']) if st.button("✅ Looks good, proceed with this plan", type="primary"): st.session_state.app_state['plan_approved'] = True generate_detailed_guide_with_images() st.rerun() else: render_tools_list() st.markdown("---") current_step_index = st.session_state.app_state['current_step'] try: step_num, step_text = st.session_state.app_state['steps'][current_step_index - 1] render_step(step_num, step_text) except IndexError: st.session_state.app_state['current_step'] = 1 st.rerun() total_steps = len(st.session_state.app_state['steps']) done_count = sum(1 for d in st.session_state.app_state['done_flags'].values() if d) if total_steps > 0: progress = done_count / total_steps st.progress(progress) st.markdown(f"**Overall Progress:** {done_count} of {total_steps} completed ({progress:.0%})") if done_count == total_steps: st.balloons() st.success("🎉 Congratulations! You've completed all steps!") if st.button("🔄 Start Over"): reset_state()