NeoTest / app.py
rairo's picture
Update app.py
135e487 verified
import streamlit as st
from PIL import Image
from io import BytesIO
from google import genai
from google.genai import types
import re
import time
import os
import wave
import io
import tempfile
import base64
# Disable Streamlit analytics (prevents PermissionError in some environments)
os.environ["STREAMLIT_ANALYTICS_ENABLED"] = "false"
# ─────────────────────────────────────────────────────────────────────────────
# 1. CONFIGURATION
# ─────────────────────────────────────────────────────────────────────────────
# 1.1 Load your Google API key from environment or Streamlit secrets
try:
API_KEY = st.secrets["GOOGLE_API_KEY"]
except (AttributeError, KeyError):
API_KEY = os.environ.get("GOOGLE_API_KEY")
if not API_KEY:
st.error("Please set GOOGLE_API_KEY in your environment variables or Streamlit secrets")
st.stop()
# 1.2 Initialize the GenAI client
try:
client = genai.Client(api_key=API_KEY)
except Exception as e:
st.error(f"Failed to initialize GenAI Client: {e}")
st.stop()
# 1.3 Constants
CATEGORY_MODEL = "gemini-2.0-flash-exp"
GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
TTS_MODEL = "gemini-2.5-flash-preview-tts"
# 1.4 Helper to parse numbered steps out of Gemini text
def parse_numbered_steps(text):
text = "\n" + text
steps = re.findall(r"\n\s*(\d+).\s*(.*)", text, re.MULTILINE)
return [(int(num), desc.strip()) for num, desc in steps]
# 1.5 FIXED File Upload Handler
def handle_uploaded_file(uploaded_file):
"""Enhanced file handler with better error handling and validation for Hugging Face Spaces."""
if uploaded_file is None:
return None, "No file uploaded"
try:
# Get file info
file_details = {
"filename": uploaded_file.name,
"filetype": uploaded_file.type,
"filesize": uploaded_file.size
}
# Validate file size (limit to 5MB for better performance in HF Spaces)
max_size = 5 * 1024 * 1024 # 5MB
if uploaded_file.size > max_size:
return None, f"File size ({uploaded_file.size / 1024 / 1024:.1f}MB) exceeds limit (5MB)"
# Validate file type more strictly
allowed_types = ['image/jpeg', 'image/jpg', 'image/png', 'image/bmp', 'image/gif']
if uploaded_file.type not in allowed_types:
return None, f"Unsupported file type: {uploaded_file.type}. Allowed: JPG, PNG, BMP, GIF"
# Read file bytes with error handling
try:
file_bytes = uploaded_file.read()
if len(file_bytes) == 0:
return None, "File appears to be empty"
except Exception as read_error:
return None, f"Error reading file: {str(read_error)}"
# Reset file pointer for PIL
uploaded_file.seek(0)
# Try to open and validate the image
try:
image = Image.open(BytesIO(file_bytes))
# Verify image is valid
image.verify()
# Reopen for actual use (verify() closes the image)
image = Image.open(BytesIO(file_bytes))
# Convert to RGB if necessary (handles RGBA, P mode, etc.)
if image.mode not in ('RGB', 'L'):
image = image.convert('RGB')
# Resize if too large (helps with memory in HF Spaces)
max_dimension = 1024
if max(image.size) > max_dimension:
image.thumbnail((max_dimension, max_dimension), Image.Resampling.LANCZOS)
return image, "Success"
except Exception as img_error:
return None, f"Invalid or corrupted image: {str(img_error)}"
except Exception as e:
return None, f"Unexpected error processing file: {str(e)}"
# 1.6 TTS Generation Function with better error handling
@st.cache_data
def generate_tts_audio(_client, text_to_speak):
"""Generates audio from text using Gemini TTS and returns the audio data and its mime type."""
try:
# Limit text length to prevent timeout
if len(text_to_speak) > 500:
text_to_speak = text_to_speak[:500] + "..."
response = _client.models.generate_content(
model=TTS_MODEL,
contents=f"Say clearly: {text_to_speak}",
config=types.GenerateContentConfig(
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name='Kore',
)
)
),
)
)
audio_part = response.candidates[0].content.parts[0]
return audio_part.inline_data.data, audio_part.inline_data.mime_type
except Exception as e:
st.error(f"Failed to generate narration: {e}")
return None, None
# 1.7 NEW HELPER FUNCTION TO CREATE A WAV FILE IN MEMORY
def _convert_pcm_to_wav(pcm_data, sample_rate=24000, channels=1, sample_width=2):
"""Wraps raw PCM audio data in a WAV container in memory."""
audio_buffer = io.BytesIO()
with wave.open(audio_buffer, 'wb') as wf:
wf.setnchannels(channels)
wf.setsampwidth(sample_width)
wf.setframerate(sample_rate)
wf.writeframes(pcm_data)
audio_buffer.seek(0)
return audio_buffer.getvalue()
# ─────────────────────────────────────────────────────────────────────────────
# 2. SESSION STATE SETUP
# ─────────────────────────────────────────────────────────────────────────────
if "app_state" not in st.session_state:
st.session_state.app_state = {
"steps": [], "images": {}, "tools_list": [], "current_step": 1,
"done_flags": {}, "notes": {}, "timers": {}, "category": None,
"prompt_sent": False, "timer_running": {}, "last_tick": {},
"project_title": "", "project_description": "", "upcycling_options": [],
"plan_approved": False, "initial_plan": "", "user_image": None,
"upload_error": None, "upload_attempts": 0, "last_uploaded_file": None
}
# ─────────────────────────────────────────────────────────────────────────────
# 3. LAYOUT & FUNCTIONS
# ─────────────────────────────────────────────────────────────────────────────
def reset_state():
"""Clear out all session state so user can start fresh."""
st.session_state.app_state = {
"steps": [], "images": {}, "tools_list": [], "current_step": 1,
"done_flags": {}, "notes": {}, "timers": {}, "category": None,
"prompt_sent": False, "timer_running": {}, "last_tick": {},
"project_title": "", "project_description": "", "upcycling_options": [],
"plan_approved": False, "initial_plan": "", "user_image": None,
"upload_error": None, "upload_attempts": 0, "last_uploaded_file": None
}
st.success("βœ… Reset complete!")
st.rerun()
def send_text_request(model_name, prompt, image):
"""Helper to send requests that expect only a text response."""
try:
chat = client.chats.create(model=model_name)
response = chat.send_message([prompt, image])
response_text = "".join(part.text for part in response.candidates[0].content.parts if part.text)
return response_text.strip()
except Exception as e:
st.error(f"Error with model {model_name}: {str(e)}")
return None
def initial_analysis(image, context_text):
"""First pass with AI: get category, then title, description, and initial plan."""
if image is None:
st.error("No valid image provided for analysis")
return
st.session_state.app_state['user_image'] = image
with st.spinner("πŸ€– Analyzing your project and preparing a plan..."):
category_prompt = (
"You are an expert DIY assistant. Analyze the user's image and context. "
f"Context: '{context_text}'. "
"Categorize the project into ONE of the following: "
"Home Appliance Repair, Automotive Maintenance, Gardening & Urban Farming, "
"Upcycling & Sustainable Crafts, or DIY Project Creation. "
"Reply with ONLY the category name."
)
category = send_text_request(CATEGORY_MODEL, category_prompt, image)
if not category: return
st.session_state.app_state['category'] = category
plan_prompt = f"""
You are an expert DIY assistant in the category: {category}.
User Context: "{context_text if context_text else 'No context provided.'}"
Based on the image and context, perform the following:
1. **Title:** Create a short, clear title for this project.
2. **Description:** Write a brief, one-paragraph description of the goal.
3. **Initial Plan:**
- If 'Upcycling & Sustainable Crafts' AND no specific project is mentioned, propose three distinct project options as a numbered list under "UPCYCLING OPTIONS:".
- For all other cases, briefly outline the main stages of the proposed solution.
Structure your response EXACTLY like this:
TITLE: [Your title]
DESCRIPTION: [Your description]
INITIAL PLAN:
[Your plan or 3 options]
"""
plan_response = send_text_request(GENERATION_MODEL, plan_prompt, image)
if not plan_response: return
try:
st.session_state.app_state['project_title'] = re.search(r"TITLE:\s*(.*)", plan_response).group(1).strip()
st.session_state.app_state['project_description'] = re.search(r"DESCRIPTION:\s*(.*)", plan_response, re.DOTALL).group(1).strip()
initial_plan_text = re.search(r"INITIAL PLAN:\s*(.*)", plan_response, re.DOTALL).group(1).strip()
if "UPCYCLING OPTIONS:" in initial_plan_text:
options = re.findall(r"^\s*\d+\.\s*(.*)", initial_plan_text, re.MULTILINE)
st.session_state.app_state['upcycling_options'] = options
else:
st.session_state.app_state['initial_plan'] = initial_plan_text
st.session_state.app_state['prompt_sent'] = True
if context_text:
st.session_state.app_state['plan_approved'] = True
generate_detailed_guide_with_images()
else:
st.session_state.app_state['plan_approved'] = False
except AttributeError:
st.error("The AI response was not in the expected format. Please try again.")
st.session_state.app_state['prompt_sent'] = False
def generate_detailed_guide_with_images(selected_option=None):
"""Generates the detailed guide with steps and illustrations."""
image = st.session_state.app_state.get('user_image')
if not image:
st.error("Image not found. Please start over."); return
context = f"The user has approved the plan for '{st.session_state.app_state['project_title']}'."
if selected_option:
context = f"The user chose the upcycling project: '{selected_option}'."
detailed_prompt = f"""
You are a DIY expert. The user wants to proceed with the project titled "{st.session_state.app_state['project_title']}".
{context}
Provide a detailed guide. For each step, you MUST provide a simple, clear illustrative image.
Format your response EXACTLY like this:
TOOLS AND MATERIALS:
- Tool A
- Material B
STEPS(Maximum 7 steps):
1. First step instructions.
2. Second step instructions...
"""
with st.spinner("πŸ› οΈ Generating your detailed guide with illustrations..."):
try:
chat = client.chats.create(
model=GENERATION_MODEL,
config=types.GenerateContentConfig(response_modalities=["Text", "Image"])
)
full_resp = chat.send_message([detailed_prompt, image])
gen_parts = full_resp.candidates[0].content.parts
combined_text = ""
inline_images = []
for part in gen_parts:
if part.text is not None:
combined_text += part.text + "\n"
if part.inline_data is not None:
img = Image.open(BytesIO(part.inline_data.data))
inline_images.append(img)
combined_text = combined_text.strip()
tools_section = re.search(r"TOOLS AND MATERIALS:\s*(.*?)\s*STEPS:", combined_text, re.DOTALL).group(1).strip()
steps_section = re.search(r"STEPS:\s*(.*)", combined_text, re.DOTALL).group(1).strip()
parsed_steps = parse_numbered_steps(steps_section)
st.session_state.app_state['tools_list'] = [line.strip("- ").strip() for line in tools_section.split('\n') if line.strip()]
st.session_state.app_state['steps'] = parsed_steps
st.session_state.app_state['images'] = {idx: inline_images[idx - 1] for idx, _ in parsed_steps if idx - 1 < len(inline_images)}
for idx, step_text in parsed_steps:
st.session_state.app_state['done_flags'][idx] = False
st.session_state.app_state['notes'][idx] = ""
timer_match = re.search(r"wait\s+for\s+(\d+)\s+(seconds?|minutes?)", step_text.lower())
if timer_match:
val, unit = int(timer_match.group(1)), timer_match.group(2)
st.session_state.app_state['timers'][idx] = val * (60 if "minute" in unit else 1)
else:
st.session_state.app_state['timers'][idx] = 0
except Exception as e:
st.error(f"Failed to generate or parse the illustrated guide: {str(e)}")
def render_sidebar_navigation():
st.sidebar.markdown("## Steps Navigation")
steps = st.session_state.app_state['steps']
if not steps: return
total_steps = len(steps)
completed = sum(1 for done in st.session_state.app_state['done_flags'].values() if done)
st.sidebar.progress(completed / total_steps if total_steps > 0 else 0)
st.sidebar.write(f"Progress: {completed}/{total_steps} steps")
for (idx, _) in steps:
is_done = st.session_state.app_state['done_flags'].get(idx, False)
label = f"{'βœ“' if is_done else 'Β·'} Step {idx}"
if st.sidebar.button(label, key=f"nav_{idx}"):
st.session_state.app_state['current_step'] = idx
st.rerun()
def render_tools_list():
if st.session_state.app_state['tools_list']:
with st.expander("πŸ”§ Required Tools & Materials", expanded=True):
for item in st.session_state.app_state['tools_list']:
st.markdown(f"- {item}")
def render_step(idx, text):
total = len(st.session_state.app_state['steps'])
st.markdown(f"### Step {idx} of {total}")
st.write(text)
# FINALIZED TTS Integration
if st.button(f"πŸ”Š Narrate Step {idx}", key=f"tts_{idx}"):
with st.spinner("Generating narration..."):
audio_data, mime_type = generate_tts_audio(client, text)
if audio_data:
# Check if the audio is raw PCM data
if 'L16' in mime_type or 'pcm' in mime_type:
st.info("Raw audio format detected. Converting to WAV for playback...")
# Convert the raw PCM data to a playable WAV format
wav_data = _convert_pcm_to_wav(audio_data)
st.audio(wav_data, format="audio/wav")
else:
# If it's already in a standard format (like mp3, ogg), play it directly
st.audio(audio_data, format=mime_type)
else:
st.error("Could not generate audio.")
if idx in st.session_state.app_state['images']:
st.image(
st.session_state.app_state['images'][idx],
caption=f"Illustration for step {idx}",
use_container_width=True
)
done = st.checkbox("βœ… Mark this step as completed", value=st.session_state.app_state['done_flags'].get(idx, False), key=f"done_{idx}")
st.session_state.app_state['done_flags'][idx] = done
notes = st.text_area("πŸ“ Your notes for this step:", value=st.session_state.app_state['notes'].get(idx, ""), height=100, key=f"notes_{idx}")
st.session_state.app_state['notes'][idx] = notes
st.markdown("---")
col1, col2, col3 = st.columns([1, 2, 1])
if idx > 1 and col1.button("⬅️ Previous", key=f"prev_{idx}"):
st.session_state.app_state['current_step'] -= 1
st.rerun()
if idx < total and col3.button("Next ➑️", key=f"next_{idx}"):
st.session_state.app_state['current_step'] += 1
st.rerun()
# ─────────────────────────────────────────────────────────────────────────────
# 4. APP LAYOUT - FIXED UPLOAD SECTION
# ─────────────────────────────────────────────────────────────────────────────
st.set_page_config(page_title="NeoFix DIY Assistant", page_icon="πŸ› οΈ", layout="wide")
st.title("πŸ› οΈ NeoFix AI-Powered DIY Assistant")
with st.expander("ℹ️ How it works", expanded=False):
st.write("""
1. **Upload a photo** of your project or the item you want to fix or build (appliance, car part, plant, craft project).
2. **(Optional) Describe your goal** for more accurate results.
3. **Review the Plan.** The AI will propose a plan. If you didn't provide a description, you'll be asked to approve it.
4. **Get Your Guide** with tools and illustrated step-by-step instructions.
5. **Follow the Steps** using the interactive checklist.
""")
if not st.session_state.app_state['prompt_sent']:
st.markdown("---")
col1, col2 = st.columns([3, 1])
with col1:
st.markdown("### πŸ“· Upload Project Image")
# Show upload status
if st.session_state.app_state.get('upload_error'):
st.error(f"Upload Error: {st.session_state.app_state['upload_error']}")
if st.session_state.app_state.get('upload_attempts', 0) > 0:
st.info(f"Upload attempts: {st.session_state.app_state['upload_attempts']}")
# IMPROVED File uploader with unique key to force refresh
upload_key = f"file_upload_{st.session_state.app_state.get('upload_attempts', 0)}"
uploaded_image = st.file_uploader(
"Choose an image file",
type=["jpg", "jpeg", "png", "bmp", "gif"],
accept_multiple_files=False,
key=upload_key,
help="Supported: JPG, PNG, BMP, GIF (max 5MB)"
)
# Process uploaded image immediately
processed_image = None
upload_status = ""
if uploaded_image is not None:
# Check if this is a new file upload
current_file_id = f"{uploaded_image.name}_{uploaded_image.size}"
if current_file_id != st.session_state.app_state.get('last_uploaded_file'):
st.session_state.app_state['last_uploaded_file'] = current_file_id
with st.spinner("Processing uploaded image..."):
processed_image, upload_status = handle_uploaded_file(uploaded_image)
if processed_image is not None:
st.session_state.app_state['upload_error'] = None
st.success("βœ… Image uploaded and processed successfully!")
st.image(processed_image, caption="Uploaded image preview", use_container_width=True)
else:
st.session_state.app_state['upload_error'] = upload_status
st.session_state.app_state['upload_attempts'] += 1
st.error(f"❌ {upload_status}")
else:
# File already processed, show cached result
if st.session_state.app_state.get('upload_error') is None:
processed_image, _ = handle_uploaded_file(uploaded_image)
if processed_image:
st.success("βœ… Image ready for analysis!")
st.image(processed_image, caption="Uploaded image preview", use_container_width=True)
# Alternative camera input
st.markdown("##### Alternative: Take a photo")
camera_image = st.camera_input("Take a picture", key=f"camera_{st.session_state.app_state.get('upload_attempts', 0)}")
if camera_image and not uploaded_image:
with st.spinner("Processing camera image..."):
processed_image, upload_status = handle_uploaded_file(camera_image)
if processed_image is not None:
st.session_state.app_state['upload_error'] = None
st.success("βœ… Photo captured and processed!")
st.image(processed_image, caption="Camera photo preview", use_container_width=True)
else:
st.error(f"❌ {upload_status}")
context_text = st.text_area(
"✏️ Describe the issue or your goal (optional but recommended)",
height=80,
placeholder="e.g., 'My toaster won't turn on,' or 'How do I build a desk like this?'"
)
with col2:
st.markdown("### Actions")
# Get AI Guidance button - only enabled when image is ready
has_valid_image = (uploaded_image is not None or camera_image is not None) and st.session_state.app_state.get('upload_error') is None
if st.button(
"πŸš€ Get AI Guidance",
type="primary",
use_container_width=True,
disabled=not has_valid_image
):
image_to_analyze = None
# Determine which image to use
if uploaded_image:
image_to_analyze, status = handle_uploaded_file(uploaded_image)
elif camera_image:
image_to_analyze, status = handle_uploaded_file(camera_image)
if image_to_analyze is not None:
initial_analysis(image_to_analyze, context_text)
st.rerun()
else:
st.error(f"❌ Image processing failed: {status}")
# Status message for button
if not has_valid_image:
if uploaded_image is None and camera_image is None:
st.warning("⚠️ Please upload an image first!")
elif st.session_state.app_state.get('upload_error'):
st.warning("⚠️ Fix upload error first!")
# Troubleshooting section
with st.expander("πŸ”§ Upload Troubleshooting"):
st.markdown("""
**Common fixes:**
1. **Refresh upload**: Click button below
2. **Check file size**: Max 5MB
3. **Try different format**: JPG works best
4. **Use camera**: If file upload fails
5. **Clear browser cache**: Ctrl+Shift+Delete
""")
if st.button("πŸ”„ Reset Upload", use_container_width=True):
st.session_state.app_state['upload_attempts'] = 0
st.session_state.app_state['upload_error'] = None
st.session_state.app_state['last_uploaded_file'] = None
st.rerun()
# Debug info
if st.checkbox("Show debug info"):
st.json({
"upload_attempts": st.session_state.app_state.get('upload_attempts', 0),
"upload_error": st.session_state.app_state.get('upload_error'),
"last_file": st.session_state.app_state.get('last_uploaded_file'),
"has_uploaded_file": uploaded_image is not None,
"has_camera_image": camera_image is not None
})
if st.button("πŸ”„ Start Over", use_container_width=True):
reset_state()
else:
render_sidebar_navigation()
st.markdown("---")
st.markdown(f"### {st.session_state.app_state.get('project_title', 'Your Project')}")
st.markdown(f"**Category:** `{st.session_state.app_state.get('category', 'N/A')}`")
st.info(f"**Description:** {st.session_state.app_state.get('project_description', 'N/A')}")
st.markdown("---")
if not st.session_state.app_state['steps']:
if st.session_state.app_state['upcycling_options']:
st.markdown("#### The AI has suggested a few projects. Please choose one:")
for i, option in enumerate(st.session_state.app_state['upcycling_options']):
if st.button(option, key=f"option_{i}"):
generate_detailed_guide_with_images(selected_option=option)
st.rerun()
elif not st.session_state.app_state['plan_approved']:
st.markdown("#### The AI has proposed the following plan:")
st.success(st.session_state.app_state['initial_plan'])
if st.button("βœ… Looks good, proceed with this plan", type="primary"):
st.session_state.app_state['plan_approved'] = True
generate_detailed_guide_with_images()
st.rerun()
else:
render_tools_list()
st.markdown("---")
current_step_index = st.session_state.app_state['current_step']
try:
step_num, step_text = st.session_state.app_state['steps'][current_step_index - 1]
render_step(step_num, step_text)
except IndexError:
st.session_state.app_state['current_step'] = 1
st.rerun()
total_steps = len(st.session_state.app_state['steps'])
done_count = sum(1 for d in st.session_state.app_state['done_flags'].values() if d)
if total_steps > 0:
progress = done_count / total_steps
st.progress(progress)
st.markdown(f"**Overall Progress:** {done_count} of {total_steps} completed ({progress:.0%})")
if done_count == total_steps:
st.balloons()
st.success("πŸŽ‰ Congratulations! You've completed all steps!")
if st.button("πŸ”„ Start Over"):
reset_state()