NeoTest

Sleeping

App Files Files Community

NeoTest / app.py

rairo

Update app.py

135e487 verified 8 months ago

raw

history blame contribute delete

27.4 kB

	import streamlit as st
	from PIL import Image
	from io import BytesIO
	from google import genai
	from google.genai import types
	import re
	import time
	import os
	import wave
	import io
	import tempfile
	import base64

	# Disable Streamlit analytics (prevents PermissionError in some environments)
	os.environ["STREAMLIT_ANALYTICS_ENABLED"] = "false"

	# ─────────────────────────────────────────────────────────────────────────────
	# 1. CONFIGURATION
	# ─────────────────────────────────────────────────────────────────────────────

	# 1.1 Load your Google API key from environment or Streamlit secrets
	try:
	API_KEY = st.secrets["GOOGLE_API_KEY"]
	except (AttributeError, KeyError):
	API_KEY = os.environ.get("GOOGLE_API_KEY")

	if not API_KEY:
	st.error("Please set GOOGLE_API_KEY in your environment variables or Streamlit secrets")
	st.stop()

	# 1.2 Initialize the GenAI client
	try:
	client = genai.Client(api_key=API_KEY)
	except Exception as e:
	st.error(f"Failed to initialize GenAI Client: {e}")
	st.stop()

	# 1.3 Constants
	CATEGORY_MODEL = "gemini-2.0-flash-exp"
	GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
	TTS_MODEL = "gemini-2.5-flash-preview-tts"

	# 1.4 Helper to parse numbered steps out of Gemini text
	def parse_numbered_steps(text):
	text = "\n" + text
	steps = re.findall(r"\n\s(\d+).\s(.*)", text, re.MULTILINE)
	return [(int(num), desc.strip()) for num, desc in steps]

	# 1.5 FIXED File Upload Handler
	def handle_uploaded_file(uploaded_file):
	"""Enhanced file handler with better error handling and validation for Hugging Face Spaces."""
	if uploaded_file is None:
	return None, "No file uploaded"

	try:
	# Get file info
	file_details = {
	"filename": uploaded_file.name,
	"filetype": uploaded_file.type,
	"filesize": uploaded_file.size
	}

	# Validate file size (limit to 5MB for better performance in HF Spaces)
	max_size = 5 * 1024 * 1024 # 5MB
	if uploaded_file.size > max_size:
	return None, f"File size ({uploaded_file.size / 1024 / 1024:.1f}MB) exceeds limit (5MB)"

	# Validate file type more strictly
	allowed_types = ['image/jpeg', 'image/jpg', 'image/png', 'image/bmp', 'image/gif']
	if uploaded_file.type not in allowed_types:
	return None, f"Unsupported file type: {uploaded_file.type}. Allowed: JPG, PNG, BMP, GIF"

	# Read file bytes with error handling
	try:
	file_bytes = uploaded_file.read()
	if len(file_bytes) == 0:
	return None, "File appears to be empty"
	except Exception as read_error:
	return None, f"Error reading file: {str(read_error)}"

	# Reset file pointer for PIL
	uploaded_file.seek(0)

	# Try to open and validate the image
	try:
	image = Image.open(BytesIO(file_bytes))

	# Verify image is valid
	image.verify()

	# Reopen for actual use (verify() closes the image)
	image = Image.open(BytesIO(file_bytes))

	# Convert to RGB if necessary (handles RGBA, P mode, etc.)
	if image.mode not in ('RGB', 'L'):
	image = image.convert('RGB')

	# Resize if too large (helps with memory in HF Spaces)
	max_dimension = 1024
	if max(image.size) > max_dimension:
	image.thumbnail((max_dimension, max_dimension), Image.Resampling.LANCZOS)

	return image, "Success"

	except Exception as img_error:
	return None, f"Invalid or corrupted image: {str(img_error)}"

	except Exception as e:
	return None, f"Unexpected error processing file: {str(e)}"

	# 1.6 TTS Generation Function with better error handling
	@st.cache_data
	def generate_tts_audio(_client, text_to_speak):
	"""Generates audio from text using Gemini TTS and returns the audio data and its mime type."""
	try:
	# Limit text length to prevent timeout
	if len(text_to_speak) > 500:
	text_to_speak = text_to_speak[:500] + "..."

	response = _client.models.generate_content(
	model=TTS_MODEL,
	contents=f"Say clearly: {text_to_speak}",
	config=types.GenerateContentConfig(
	response_modalities=["AUDIO"],
	speech_config=types.SpeechConfig(
	voice_config=types.VoiceConfig(
	prebuilt_voice_config=types.PrebuiltVoiceConfig(
	voice_name='Kore',
	)
	)
	),
	)
	)
	audio_part = response.candidates[0].content.parts[0]
	return audio_part.inline_data.data, audio_part.inline_data.mime_type
	except Exception as e:
	st.error(f"Failed to generate narration: {e}")
	return None, None

	# 1.7 NEW HELPER FUNCTION TO CREATE A WAV FILE IN MEMORY
	def _convert_pcm_to_wav(pcm_data, sample_rate=24000, channels=1, sample_width=2):
	"""Wraps raw PCM audio data in a WAV container in memory."""
	audio_buffer = io.BytesIO()
	with wave.open(audio_buffer, 'wb') as wf:
	wf.setnchannels(channels)
	wf.setsampwidth(sample_width)
	wf.setframerate(sample_rate)
	wf.writeframes(pcm_data)
	audio_buffer.seek(0)
	return audio_buffer.getvalue()

	# ─────────────────────────────────────────────────────────────────────────────
	# 2. SESSION STATE SETUP
	# ─────────────────────────────────────────────────────────────────────────────

	if "app_state" not in st.session_state:
	st.session_state.app_state = {
	"steps": [], "images": {}, "tools_list": [], "current_step": 1,
	"done_flags": {}, "notes": {}, "timers": {}, "category": None,
	"prompt_sent": False, "timer_running": {}, "last_tick": {},
	"project_title": "", "project_description": "", "upcycling_options": [],
	"plan_approved": False, "initial_plan": "", "user_image": None,
	"upload_error": None, "upload_attempts": 0, "last_uploaded_file": None
	}

	# ─────────────────────────────────────────────────────────────────────────────
	# 3. LAYOUT & FUNCTIONS
	# ─────────────────────────────────────────────────────────────────────────────

	def reset_state():
	"""Clear out all session state so user can start fresh."""
	st.session_state.app_state = {
	"steps": [], "images": {}, "tools_list": [], "current_step": 1,
	"done_flags": {}, "notes": {}, "timers": {}, "category": None,
	"prompt_sent": False, "timer_running": {}, "last_tick": {},
	"project_title": "", "project_description": "", "upcycling_options": [],
	"plan_approved": False, "initial_plan": "", "user_image": None,
	"upload_error": None, "upload_attempts": 0, "last_uploaded_file": None
	}
	st.success("✅ Reset complete!")
	st.rerun()

	def send_text_request(model_name, prompt, image):
	"""Helper to send requests that expect only a text response."""
	try:
	chat = client.chats.create(model=model_name)
	response = chat.send_message([prompt, image])
	response_text = "".join(part.text for part in response.candidates[0].content.parts if part.text)
	return response_text.strip()
	except Exception as e:
	st.error(f"Error with model {model_name}: {str(e)}")
	return None

	def initial_analysis(image, context_text):
	"""First pass with AI: get category, then title, description, and initial plan."""
	if image is None:
	st.error("No valid image provided for analysis")
	return

	st.session_state.app_state['user_image'] = image

	with st.spinner("🤖 Analyzing your project and preparing a plan..."):
	category_prompt = (
	"You are an expert DIY assistant. Analyze the user's image and context. "
	f"Context: '{context_text}'. "
	"Categorize the project into ONE of the following: "
	"Home Appliance Repair, Automotive Maintenance, Gardening & Urban Farming, "
	"Upcycling & Sustainable Crafts, or DIY Project Creation. "
	"Reply with ONLY the category name."
	)
	category = send_text_request(CATEGORY_MODEL, category_prompt, image)
	if not category: return
	st.session_state.app_state['category'] = category

	plan_prompt = f"""
	You are an expert DIY assistant in the category: {category}.
	User Context: "{context_text if context_text else 'No context provided.'}"
	Based on the image and context, perform the following:
	1. Title: Create a short, clear title for this project.
	2. Description: Write a brief, one-paragraph description of the goal.
	3. Initial Plan:
	- If 'Upcycling & Sustainable Crafts' AND no specific project is mentioned, propose three distinct project options as a numbered list under "UPCYCLING OPTIONS:".
	- For all other cases, briefly outline the main stages of the proposed solution.
	Structure your response EXACTLY like this:
	TITLE: [Your title]
	DESCRIPTION: [Your description]
	INITIAL PLAN:
	[Your plan or 3 options]
	"""
	plan_response = send_text_request(GENERATION_MODEL, plan_prompt, image)
	if not plan_response: return

	try:
	st.session_state.app_state['project_title'] = re.search(r"TITLE:\s(.)", plan_response).group(1).strip()
	st.session_state.app_state['project_description'] = re.search(r"DESCRIPTION:\s(.)", plan_response, re.DOTALL).group(1).strip()
	initial_plan_text = re.search(r"INITIAL PLAN:\s(.)", plan_response, re.DOTALL).group(1).strip()

	if "UPCYCLING OPTIONS:" in initial_plan_text:
	options = re.findall(r"^\s\d+\.\s(.*)", initial_plan_text, re.MULTILINE)
	st.session_state.app_state['upcycling_options'] = options
	else:
	st.session_state.app_state['initial_plan'] = initial_plan_text

	st.session_state.app_state['prompt_sent'] = True
	if context_text:
	st.session_state.app_state['plan_approved'] = True
	generate_detailed_guide_with_images()
	else:
	st.session_state.app_state['plan_approved'] = False
	except AttributeError:
	st.error("The AI response was not in the expected format. Please try again.")
	st.session_state.app_state['prompt_sent'] = False

	def generate_detailed_guide_with_images(selected_option=None):
	"""Generates the detailed guide with steps and illustrations."""
	image = st.session_state.app_state.get('user_image')
	if not image:
	st.error("Image not found. Please start over."); return

	context = f"The user has approved the plan for '{st.session_state.app_state['project_title']}'."
	if selected_option:
	context = f"The user chose the upcycling project: '{selected_option}'."

	detailed_prompt = f"""
	You are a DIY expert. The user wants to proceed with the project titled "{st.session_state.app_state['project_title']}".
	{context}
	Provide a detailed guide. For each step, you MUST provide a simple, clear illustrative image.
	Format your response EXACTLY like this:
	TOOLS AND MATERIALS:
	- Tool A
	- Material B
	STEPS(Maximum 7 steps):
	1. First step instructions.
	2. Second step instructions...
	"""
	with st.spinner("🛠️ Generating your detailed guide with illustrations..."):
	try:
	chat = client.chats.create(
	model=GENERATION_MODEL,
	config=types.GenerateContentConfig(response_modalities=["Text", "Image"])
	)
	full_resp = chat.send_message([detailed_prompt, image])
	gen_parts = full_resp.candidates[0].content.parts

	combined_text = ""
	inline_images = []
	for part in gen_parts:
	if part.text is not None:
	combined_text += part.text + "\n"
	if part.inline_data is not None:
	img = Image.open(BytesIO(part.inline_data.data))
	inline_images.append(img)
	combined_text = combined_text.strip()

	tools_section = re.search(r"TOOLS AND MATERIALS:\s(.?)\s*STEPS:", combined_text, re.DOTALL).group(1).strip()
	steps_section = re.search(r"STEPS:\s(.)", combined_text, re.DOTALL).group(1).strip()
	parsed_steps = parse_numbered_steps(steps_section)

	st.session_state.app_state['tools_list'] = [line.strip("- ").strip() for line in tools_section.split('\n') if line.strip()]
	st.session_state.app_state['steps'] = parsed_steps
	st.session_state.app_state['images'] = {idx: inline_images[idx - 1] for idx, _ in parsed_steps if idx - 1 < len(inline_images)}

	for idx, step_text in parsed_steps:
	st.session_state.app_state['done_flags'][idx] = False
	st.session_state.app_state['notes'][idx] = ""
	timer_match = re.search(r"wait\s+for\s+(\d+)\s+(seconds?\|minutes?)", step_text.lower())
	if timer_match:
	val, unit = int(timer_match.group(1)), timer_match.group(2)
	st.session_state.app_state['timers'][idx] = val * (60 if "minute" in unit else 1)
	else:
	st.session_state.app_state['timers'][idx] = 0
	except Exception as e:
	st.error(f"Failed to generate or parse the illustrated guide: {str(e)}")

	def render_sidebar_navigation():
	st.sidebar.markdown("## Steps Navigation")
	steps = st.session_state.app_state['steps']
	if not steps: return
	total_steps = len(steps)
	completed = sum(1 for done in st.session_state.app_state['done_flags'].values() if done)
	st.sidebar.progress(completed / total_steps if total_steps > 0 else 0)
	st.sidebar.write(f"Progress: {completed}/{total_steps} steps")
	for (idx, _) in steps:
	is_done = st.session_state.app_state['done_flags'].get(idx, False)
	label = f"{'✓' if is_done else '·'} Step {idx}"
	if st.sidebar.button(label, key=f"nav_{idx}"):
	st.session_state.app_state['current_step'] = idx
	st.rerun()

	def render_tools_list():
	if st.session_state.app_state['tools_list']:
	with st.expander("🔧 Required Tools & Materials", expanded=True):
	for item in st.session_state.app_state['tools_list']:
	st.markdown(f"- {item}")

	def render_step(idx, text):
	total = len(st.session_state.app_state['steps'])
	st.markdown(f"### Step {idx} of {total}")
	st.write(text)

	# FINALIZED TTS Integration
	if st.button(f"🔊 Narrate Step {idx}", key=f"tts_{idx}"):
	with st.spinner("Generating narration..."):
	audio_data, mime_type = generate_tts_audio(client, text)

	if audio_data:
	# Check if the audio is raw PCM data
	if 'L16' in mime_type or 'pcm' in mime_type:
	st.info("Raw audio format detected. Converting to WAV for playback...")
	# Convert the raw PCM data to a playable WAV format
	wav_data = _convert_pcm_to_wav(audio_data)
	st.audio(wav_data, format="audio/wav")
	else:
	# If it's already in a standard format (like mp3, ogg), play it directly
	st.audio(audio_data, format=mime_type)
	else:
	st.error("Could not generate audio.")

	if idx in st.session_state.app_state['images']:
	st.image(
	st.session_state.app_state['images'][idx],
	caption=f"Illustration for step {idx}",
	use_container_width=True
	)

	done = st.checkbox("✅ Mark this step as completed", value=st.session_state.app_state['done_flags'].get(idx, False), key=f"done_{idx}")
	st.session_state.app_state['done_flags'][idx] = done
	notes = st.text_area("📝 Your notes for this step:", value=st.session_state.app_state['notes'].get(idx, ""), height=100, key=f"notes_{idx}")
	st.session_state.app_state['notes'][idx] = notes
	st.markdown("---")
	col1, col2, col3 = st.columns([1, 2, 1])
	if idx > 1 and col1.button("⬅️ Previous", key=f"prev_{idx}"):
	st.session_state.app_state['current_step'] -= 1
	st.rerun()
	if idx < total and col3.button("Next ➡️", key=f"next_{idx}"):
	st.session_state.app_state['current_step'] += 1
	st.rerun()

	# ─────────────────────────────────────────────────────────────────────────────
	# 4. APP LAYOUT - FIXED UPLOAD SECTION
	# ─────────────────────────────────────────────────────────────────────────────

	st.set_page_config(page_title="NeoFix DIY Assistant", page_icon="🛠️", layout="wide")
	st.title("🛠️ NeoFix AI-Powered DIY Assistant")

	with st.expander("ℹ️ How it works", expanded=False):
	st.write("""
	1. Upload a photo of your project or the item you want to fix or build (appliance, car part, plant, craft project).
	2. (Optional) Describe your goal for more accurate results.
	3. Review the Plan. The AI will propose a plan. If you didn't provide a description, you'll be asked to approve it.
	4. Get Your Guide with tools and illustrated step-by-step instructions.
	5. Follow the Steps using the interactive checklist.
	""")

	if not st.session_state.app_state['prompt_sent']:
	st.markdown("---")
	col1, col2 = st.columns([3, 1])

	with col1:
	st.markdown("### 📷 Upload Project Image")

	# Show upload status
	if st.session_state.app_state.get('upload_error'):
	st.error(f"Upload Error: {st.session_state.app_state['upload_error']}")

	if st.session_state.app_state.get('upload_attempts', 0) > 0:
	st.info(f"Upload attempts: {st.session_state.app_state['upload_attempts']}")

	# IMPROVED File uploader with unique key to force refresh
	upload_key = f"file_upload_{st.session_state.app_state.get('upload_attempts', 0)}"
	uploaded_image = st.file_uploader(
	"Choose an image file",
	type=["jpg", "jpeg", "png", "bmp", "gif"],
	accept_multiple_files=False,
	key=upload_key,
	help="Supported: JPG, PNG, BMP, GIF (max 5MB)"
	)

	# Process uploaded image immediately
	processed_image = None
	upload_status = ""

	if uploaded_image is not None:
	# Check if this is a new file upload
	current_file_id = f"{uploaded_image.name}_{uploaded_image.size}"
	if current_file_id != st.session_state.app_state.get('last_uploaded_file'):
	st.session_state.app_state['last_uploaded_file'] = current_file_id

	with st.spinner("Processing uploaded image..."):
	processed_image, upload_status = handle_uploaded_file(uploaded_image)

	if processed_image is not None:
	st.session_state.app_state['upload_error'] = None
	st.success("✅ Image uploaded and processed successfully!")
	st.image(processed_image, caption="Uploaded image preview", use_container_width=True)
	else:
	st.session_state.app_state['upload_error'] = upload_status
	st.session_state.app_state['upload_attempts'] += 1
	st.error(f"❌ {upload_status}")
	else:
	# File already processed, show cached result
	if st.session_state.app_state.get('upload_error') is None:
	processed_image, _ = handle_uploaded_file(uploaded_image)
	if processed_image:
	st.success("✅ Image ready for analysis!")
	st.image(processed_image, caption="Uploaded image preview", use_container_width=True)

	# Alternative camera input
	st.markdown("##### Alternative: Take a photo")
	camera_image = st.camera_input("Take a picture", key=f"camera_{st.session_state.app_state.get('upload_attempts', 0)}")
	if camera_image and not uploaded_image:
	with st.spinner("Processing camera image..."):
	processed_image, upload_status = handle_uploaded_file(camera_image)
	if processed_image is not None:
	st.session_state.app_state['upload_error'] = None
	st.success("✅ Photo captured and processed!")
	st.image(processed_image, caption="Camera photo preview", use_container_width=True)
	else:
	st.error(f"❌ {upload_status}")

	context_text = st.text_area(
	"✏️ Describe the issue or your goal (optional but recommended)",
	height=80,
	placeholder="e.g., 'My toaster won't turn on,' or 'How do I build a desk like this?'"
	)

	with col2:
	st.markdown("### Actions")

	# Get AI Guidance button - only enabled when image is ready
	has_valid_image = (uploaded_image is not None or camera_image is not None) and st.session_state.app_state.get('upload_error') is None

	if st.button(
	"🚀 Get AI Guidance",
	type="primary",
	use_container_width=True,
	disabled=not has_valid_image
	):
	image_to_analyze = None

	# Determine which image to use
	if uploaded_image:
	image_to_analyze, status = handle_uploaded_file(uploaded_image)
	elif camera_image:
	image_to_analyze, status = handle_uploaded_file(camera_image)

	if image_to_analyze is not None:
	initial_analysis(image_to_analyze, context_text)
	st.rerun()
	else:
	st.error(f"❌ Image processing failed: {status}")

	# Status message for button
	if not has_valid_image:
	if uploaded_image is None and camera_image is None:
	st.warning("⚠️ Please upload an image first!")
	elif st.session_state.app_state.get('upload_error'):
	st.warning("⚠️ Fix upload error first!")

	# Troubleshooting section
	with st.expander("🔧 Upload Troubleshooting"):
	st.markdown("""
	Common fixes:
	1. Refresh upload: Click button below
	2. Check file size: Max 5MB
	3. Try different format: JPG works best
	4. Use camera: If file upload fails
	5. Clear browser cache: Ctrl+Shift+Delete
	""")

	if st.button("🔄 Reset Upload", use_container_width=True):
	st.session_state.app_state['upload_attempts'] = 0
	st.session_state.app_state['upload_error'] = None
	st.session_state.app_state['last_uploaded_file'] = None
	st.rerun()

	# Debug info
	if st.checkbox("Show debug info"):
	st.json({
	"upload_attempts": st.session_state.app_state.get('upload_attempts', 0),
	"upload_error": st.session_state.app_state.get('upload_error'),
	"last_file": st.session_state.app_state.get('last_uploaded_file'),
	"has_uploaded_file": uploaded_image is not None,
	"has_camera_image": camera_image is not None
	})

	if st.button("🔄 Start Over", use_container_width=True):
	reset_state()
	else:
	render_sidebar_navigation()
	st.markdown("---")
	st.markdown(f"### {st.session_state.app_state.get('project_title', 'Your Project')}")
	st.markdown(f"Category: `{st.session_state.app_state.get('category', 'N/A')}`")
	st.info(f"Description: {st.session_state.app_state.get('project_description', 'N/A')}")
	st.markdown("---")

	if not st.session_state.app_state['steps']:
	if st.session_state.app_state['upcycling_options']:
	st.markdown("#### The AI has suggested a few projects. Please choose one:")
	for i, option in enumerate(st.session_state.app_state['upcycling_options']):
	if st.button(option, key=f"option_{i}"):
	generate_detailed_guide_with_images(selected_option=option)
	st.rerun()
	elif not st.session_state.app_state['plan_approved']:
	st.markdown("#### The AI has proposed the following plan:")
	st.success(st.session_state.app_state['initial_plan'])
	if st.button("✅ Looks good, proceed with this plan", type="primary"):
	st.session_state.app_state['plan_approved'] = True
	generate_detailed_guide_with_images()
	st.rerun()
	else:
	render_tools_list()
	st.markdown("---")
	current_step_index = st.session_state.app_state['current_step']
	try:
	step_num, step_text = st.session_state.app_state['steps'][current_step_index - 1]
	render_step(step_num, step_text)
	except IndexError:
	st.session_state.app_state['current_step'] = 1
	st.rerun()

	total_steps = len(st.session_state.app_state['steps'])
	done_count = sum(1 for d in st.session_state.app_state['done_flags'].values() if d)
	if total_steps > 0:
	progress = done_count / total_steps
	st.progress(progress)
	st.markdown(f"Overall Progress: {done_count} of {total_steps} completed ({progress:.0%})")
	if done_count == total_steps:
	st.balloons()
	st.success("🎉 Congratulations! You've completed all steps!")

	if st.button("🔄 Start Over"):
	reset_state()