NeoTest

Sleeping

App Files Files Community

rairo commited on Jun 23, 2025

Commit

135e487

verified ·

1 Parent(s): d26d92a

Update app.py

Browse files

Files changed (1) hide show

app.py +537 -261

app.py CHANGED Viewed

@@ -1,297 +1,573 @@
-###############################################################################
-#  Sozo Business Studio · AI transforms business data into compelling narratives
-###############################################################################
-import os, re, json, hashlib, uuid, asyncio, base64, io, tempfile, wave
-from pathlib import Path
 import streamlit as st
-import pandas as pd
-import matplotlib
-matplotlib.use("Agg")
-import matplotlib.pyplot as plt
-from fpdf import FPDF, HTMLMixin
-from markdown_it import MarkdownIt
-from pptx import Presentation
-from pptx.util import Inches, Pt
 from google import genai
 from google.genai import types
-from google.adk.agents import LlmAgent, SequentialAgent
-from google.adk.sessions import InMemorySessionService
-from google.adk.runners import Runner
-from langchain_experimental.agents import create_pandas_dataframe_agent
-from langchain_google_genai import ChatGoogleGenerativeAI
-# ─────────────────────────────────────────────────────────────────────────────
-# PAGE CONFIG
-# ─────────────────────────────────────────────────────────────────────────────
-st.set_page_config(page_title="Sozo Business Studio", layout="wide")
-st.title("📊 Sozo Business Studio")
-st.caption("AI transforms business data into compelling narratives.")
 # ─────────────────────────────────────────────────────────────────────────────
-# CONSTANTS
 # ─────────────────────────────────────────────────────────────────────────────
-FONT_DIR   = Path(__file__).parent if "__file__" in globals() else Path(".")
-FONT_REG   = FONT_DIR / "NotoSans-Regular.ttf"
-FONT_BLD   = FONT_DIR / "NotoSans-Bold.ttf"
-FONT_FAM   = "NotoSans"
-SLIDES     = 7
-TTS_MODEL  = "gemini-2.5-flash-preview-tts"
-API_KEY    = os.getenv("GEMINI_API_KEY")
 if not API_KEY:
-    st.error("GEMINI_API_KEY not set"); st.stop()
 try:
-    GEM = genai.Client(api_key=API_KEY)
 except Exception as e:
-    st.error(f"GenAI init failed: {e}"); st.stop()
-# ─────────────────────────────────────────────────────────────────────────────
-# SESSION STATE
-# ─────────────────────────────────────────────────────────────────────────────
-st.session_state.setdefault("bundles", {})
-st.session_state.setdefault("slide_idx", 0)
-# ─────────────────────────────────────────────────────────────────────────────
-# HELPERS
-# ─────────────────────────────────────────────────────────────────────────────
-sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
-def fix_bullet(text: str) -> str:
-    subs = {
-        "\x95": "•", "\x96": "-", "\x97": "—",
-        "\x91": "'",  "\x92": "'", "\x93": '"', "\x94": '"'
-    }
-    for bad, good in subs.items():
-        text = text.replace(bad, good)
-    return re.sub(r'[\x80-\x9f]', '', text)
-def convert_pcm_to_wav(pcm: bytes, rate=24_000, ch=1, width=2) -> bytes:
-    buf = io.BytesIO()
-    with wave.open(buf, "wb") as wf:
-        wf.setnchannels(ch); wf.setsampwidth(width); wf.setframerate(rate); wf.writeframes(pcm)
-    buf.seek(0); return buf.getvalue()
-@st.cache_data(show_spinner=False)
-def generate_tts_audio(_client, txt: str):
-    txt = re.sub(r'[^\w\s\.,!?;:-]', '', txt)[:500]
-    if not txt: return None, None
     try:
-        resp = _client.models.generate_content(
             model=TTS_MODEL,
-            contents=f"Say clearly: {txt}",
             config=types.GenerateContentConfig(
                 response_modalities=["AUDIO"],
                 speech_config=types.SpeechConfig(
                     voice_config=types.VoiceConfig(
-                        prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Kore")
                     )
                 ),
             )
         )
-        part = resp.candidates[0].content.parts[0]
-        return part.inline_data.data, part.inline_data.mime_type
-    except Exception:
         return None, None
-# robust, single-pass regex  – matches <> or [] and optional quotes/spaces
-TAG_RE = re.compile(
-    r'[<\[]\s*generate_?chart\s*[:=]?\s*["\']?\s*([^>\]"\']+?)\s*["\']?\s*[>\]]',
-    flags=re.IGNORECASE
-)
-def extract_chart_tags(text: str) -> list[str]:
-    return list(dict.fromkeys(TAG_RE.findall(text)))  # de-dupe while preserving order
-def replace_chart_tags(text: str, cmap: dict[str, str], repl):
-    return TAG_RE.sub(lambda m: repl(cmap[m.group(1)]) if m.group(1) in cmap else m.group(0), text)
 # ─────────────────────────────────────────────────────────────────────────────
-# PDF / PPTX BUILDERS
 # ─────────────────────────────────────────────────────────────────────────────
-class _PDF(FPDF, HTMLMixin): pass
-def build_pdf(markdown: str, cmap: dict[str, str]) -> bytes:
-    markdown = fix_bullet(markdown).replace("•", "*")
-    markdown = replace_chart_tags(markdown, cmap, lambda p: f'<img src="{p}">')
-    html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(markdown)
-    pdf = _PDF(); pdf.set_auto_page_break(True, 15)
-    fonts_ok = False
-    for s, ttf in [("", FONT_REG), ("B", FONT_BLD)]:
-        if ttf.exists():
-            try: pdf.add_font(FONT_FAM, s, str(ttf), uni=True); fonts_ok = True
-            except: pass
-    if fonts_ok: pdf.set_fallback_fonts([FONT_FAM])
-    pdf.add_page()
-    pdf.set_font(FONT_FAM if fonts_ok else "Arial", "B", 18)
-    pdf.cell(0, 12, "AI-Generated Business Report", ln=True); pdf.ln(3)
-    pdf.set_font(FONT_FAM if fonts_ok else "Arial", "", 11)
-    # shrink big tables / images
-    html = re.sub(r'(<table[^>]*>)', r'\1<font size="8">', html)
-    pdf.write_html(html)
-    return pdf.output(dest="S").encode("latin-1", "ignore")
-def build_pptx(slides: tuple[str, ...], cmap: dict[str, str]) -> bytes:
-    prs = Presentation(); layout = prs.slide_layouts[1]
-    for raw in slides:
-        if not raw.strip(): continue
-        raw = fix_bullet(raw)
-        chart_here = extract_chart_tags(raw)
-        title, *body_lines = [l.strip(" -•") for l in raw.splitlines() if l.strip()]
-        slide = prs.slides.add_slide(layout)
-        slide.shapes.title.text = title or "Slide"
-        # body text
-        tf = slide.shapes.placeholders[1].text_frame; tf.clear(); tf.word_wrap = True
-        for line in body_lines:
-            if "generate_chart" in line.lower(): continue
-            p = tf.add_paragraph(); p.text = line; p.font.size = Pt(20)
-        # first matching chart
-        for tag in chart_here:
-            if tag in cmap:
-                slide.shapes.add_picture(cmap[tag], Inches(1), Inches(3.5), width=Inches(8))
-                break
-    buf = io.BytesIO(); prs.save(buf); return buf.getvalue()
 # ─────────────────────────────────────────────────────────────────────────────
-# MAIN GENERATION (unchanged business logic, but sturdier asyncio)
 # ─────────────────────────────────────────────────────────────────────────────
-@st.cache_data(show_spinner=False)
-def generate_assets(key, file_bytes, filename, mode, ctx):
-    # read file
-    df = pd.read_excel(io.BytesIO(file_bytes)) if filename.lower().endswith(".xlsx") else pd.read_csv(io.BytesIO(file_bytes))
-    # prompts
-    report_prompt = """You are a senior business analyst … <generate_chart: "bar chart of sales by region"> …"""
-    pres_prompt   = f"""Create exactly {SLIDES} concise slides … one chart tag per slide …"""
-    agents = []
-    if mode in ("Report", "Both"):
-        agents.append(LlmAgent("ReportAgent", "gemini-2.5-flash", report_prompt))
-    if mode in ("Presentation", "Both"):
-        agents.append(LlmAgent("PresentationAgent", "gemini-2.5-flash", pres_prompt))
-    root = SequentialAgent("Pipeline", sub_agents=agents)
-    async def _go():
-        svc = InMemorySessionService(); sid = str(uuid.uuid4())
-        await svc.create_session(app_name="studio", user_id="u", session_id=sid)
-        runner = Runner(root, "studio", svc)
-        ctx_payload = types.Content(role="user", parts=[types.Part(text=json.dumps({
-            "shape": df.shape, "columns": list(df.columns), "types": df.dtypes.astype(str).to_dict(),
-            "sample": df.head(3).to_dict(), "user_ctx": ctx or "None"
-        }, indent=2))])
-        out = {}
-        async for ev in runner.run_async(user_id="u", session_id=sid, new_message=ctx_payload):
-            if ev.is_final_response():
-                out[ev.author] = ev.content.parts[0].text
-        return out
-    try: out = asyncio.run(_go())
     except Exception as e:
-        st.error(f"Agent run failed: {e}"); return None
-    if not out: st.error("No output"); return None
-    # charts
-    chart_tags = extract_chart_tags("\n".join(out.values()))
-    cmap = {}
-    if chart_tags:
-        chart_agent = create_pandas_dataframe_agent(
-            llm=ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.1),
-            df=df, verbose=False, allow_dangerous_code=True)
-        for tag in chart_tags:
-            try:
-                chart_agent.run(f"Make a {tag} with matplotlib and save it to 'plt'.")
-                fig = plt.gcf()
-                if fig.get_axes():
-                    p = Path(tempfile.gettempdir())/f"ch_{uuid.uuid4()}.png"
-                    fig.savefig(p, dpi=300, bbox_inches="tight", facecolor="white")
-                    cmap[tag] = str(p)
-                plt.close('all')
-            except Exception: pass
-    # outputs
-    md_preview = pdf_bytes = pptx_bytes = None; slides = []
-    if "ReportAgent" in out:
-        md_raw   = fix_bullet(out["ReportAgent"])
-        pdf_bytes = build_pdf(md_raw, cmap)
-        md_preview = replace_chart_tags(md_raw, cmap, lambda p: f'<img src="data:image/png;base64,{base64.b64encode(open(p,"rb").read()).decode()}">')
-    if "PresentationAgent" in out:
-        raw      = fix_bullet(out["PresentationAgent"])
-        parts    = re.split(r'(?i)(?=^\s*slide\s+\d+)', raw, flags=re.MULTILINE)
-        slides   = [p.strip() for p in parts if p.strip()]
-        if slides: pptx_bytes = build_pptx(tuple(slides), cmap)
-    return {"preview_md": md_preview, "pdf": pdf_bytes, "slides": slides,
-            "pptx": pptx_bytes, "chart_count": len(cmap), "key": key}
 # ─────────────────────────────────────────────────────────────────────────────
-# UI
 # ─────────────────────────────────────────────────────────────────────────────
-mode = st.radio("Choose output format:", ["Report", "Presentation", "Both"], horizontal=True, index=2)
-upl  = st.file_uploader("Upload business data", ["csv", "xlsx"])
-ctx  = st.text_area("Business context (optional)")
-if st.button("🚀 Generate Narrative", type="primary"):
-    if not upl: st.warning("Please upload a file"); st.stop()
-    key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
-    bundle = generate_assets(key, upl.getvalue(), upl.name, mode, ctx)
-    if not bundle: st.stop()
-    if bundle["chart_count"]: st.success(f"✅ Generated {bundle['chart_count']} charts")
-    # ensure slide index reset when new bundle arrives
-    if st.session_state.get("bundle_key") != key:
-        st.session_state["bundle_key"] = key
-        st.session_state["slide_idx"]  = 0
-    # Tabs
-    if mode == "Both":
-        tab_rep, tab_pre = st.tabs(["📄 Report", "📑 Slides"])
-    elif mode == "Report":
-        tab_rep = st.container(); tab_pre = None
     else:
-        tab_pre = st.container(); tab_rep = None
-    # Report tab
-    if tab_rep:
-        with tab_rep:
-            st.subheader("Generated Report")
-            st.markdown(bundle["preview_md"] or "_no report_", unsafe_allow_html=True)
-            if bundle["pdf"]:
-                st.download_button("⬇️ PDF", bundle["pdf"], "business_report.pdf", "application/pdf")
-    # Slides tab
-    if tab_pre:
-        with tab_pre:
-            slides = bundle["slides"]; n = len(slides)
-            if not slides: st.warning("No slides created"); st.stop()
-            idx = st.session_state["slide_idx"]
-            st.markdown(f"##### Slide {idx+1}/{n}")
-            text = replace_chart_tags(slides[idx], {}, lambda _: "")
-            st.markdown(text)
-            # Narrate
-            if st.button("🔊 Narrate", key=f"tts_{idx}"):
-                audio, mtype = generate_tts_audio(GEM, re.sub(r'\n+', '. ', text))
-                if audio:
-                    if 'pcm' in (mtype or '').lower() or 'l16' in (mtype or '').lower():
-                        audio = convert_pcm_to_wav(audio); mtype = "audio/wav"
-                    st.audio(audio, format=mtype)
-            # Nav buttons
-            c1, c2, c3 = st.columns([1,2,1])
-            with c1:  st.button("⬅️ Prev", on_click=lambda: st.session_state.update(slide_idx=max(idx-1,0)), disabled=idx==0)
-            with c3:  st.button("Next ➡️", on_click=lambda: st.session_state.update(slide_idx=min(idx+1,n-1)), disabled=idx==n-1)
-            if bundle["pptx"]:
-                st.download_button("⬇️ PowerPoint", bundle["pptx"],
-                                   "business_presentation.pptx",
-                                   "application/vnd.openxmlformats-officedocument.presentationml.presentation")

 import streamlit as st
+from PIL import Image
+from io import BytesIO
 from google import genai
 from google.genai import types
+import re
+import time
+import os
+import wave
+import io
+import tempfile
+import base64
+# Disable Streamlit analytics (prevents PermissionError in some environments)
+os.environ["STREAMLIT_ANALYTICS_ENABLED"] = "false"
 # ─────────────────────────────────────────────────────────────────────────────
+# 1. CONFIGURATION
 # ─────────────────────────────────────────────────────────────────────────────
+# 1.1 Load your Google API key from environment or Streamlit secrets
+try:
+    API_KEY = st.secrets["GOOGLE_API_KEY"]
+except (AttributeError, KeyError):
+    API_KEY = os.environ.get("GOOGLE_API_KEY")
 if not API_KEY:
+    st.error("Please set GOOGLE_API_KEY in your environment variables or Streamlit secrets")
+    st.stop()
+# 1.2 Initialize the GenAI client
 try:
+    client = genai.Client(api_key=API_KEY)
 except Exception as e:
+    st.error(f"Failed to initialize GenAI Client: {e}")
+    st.stop()
+# 1.3 Constants
+CATEGORY_MODEL = "gemini-2.0-flash-exp"
+GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
+TTS_MODEL = "gemini-2.5-flash-preview-tts"
+# 1.4 Helper to parse numbered steps out of Gemini text
+def parse_numbered_steps(text):
+    text = "\n" + text
+    steps = re.findall(r"\n\s*(\d+).\s*(.*)", text, re.MULTILINE)
+    return [(int(num), desc.strip()) for num, desc in steps]
+# 1.5 FIXED File Upload Handler
+def handle_uploaded_file(uploaded_file):
+    """Enhanced file handler with better error handling and validation for Hugging Face Spaces."""
+    if uploaded_file is None:
+        return None, "No file uploaded"
+    try:
+        # Get file info
+        file_details = {
+            "filename": uploaded_file.name,
+            "filetype": uploaded_file.type,
+            "filesize": uploaded_file.size
+        }
+        # Validate file size (limit to 5MB for better performance in HF Spaces)
+        max_size = 5 * 1024 * 1024  # 5MB
+        if uploaded_file.size > max_size:
+            return None, f"File size ({uploaded_file.size / 1024 / 1024:.1f}MB) exceeds limit (5MB)"
+        # Validate file type more strictly
+        allowed_types = ['image/jpeg', 'image/jpg', 'image/png', 'image/bmp', 'image/gif']
+        if uploaded_file.type not in allowed_types:
+            return None, f"Unsupported file type: {uploaded_file.type}. Allowed: JPG, PNG, BMP, GIF"
+        # Read file bytes with error handling
+        try:
+            file_bytes = uploaded_file.read()
+            if len(file_bytes) == 0:
+                return None, "File appears to be empty"
+        except Exception as read_error:
+            return None, f"Error reading file: {str(read_error)}"
+        # Reset file pointer for PIL
+        uploaded_file.seek(0)
+        # Try to open and validate the image
+        try:
+            image = Image.open(BytesIO(file_bytes))
+            # Verify image is valid
+            image.verify()
+            # Reopen for actual use (verify() closes the image)
+            image = Image.open(BytesIO(file_bytes))
+            # Convert to RGB if necessary (handles RGBA, P mode, etc.)
+            if image.mode not in ('RGB', 'L'):
+                image = image.convert('RGB')
+            # Resize if too large (helps with memory in HF Spaces)
+            max_dimension = 1024
+            if max(image.size) > max_dimension:
+                image.thumbnail((max_dimension, max_dimension), Image.Resampling.LANCZOS)
+            return image, "Success"
+        except Exception as img_error:
+            return None, f"Invalid or corrupted image: {str(img_error)}"
+    except Exception as e:
+        return None, f"Unexpected error processing file: {str(e)}"
+# 1.6 TTS Generation Function with better error handling
+@st.cache_data
+def generate_tts_audio(_client, text_to_speak):
+    """Generates audio from text using Gemini TTS and returns the audio data and its mime type."""
     try:
+        # Limit text length to prevent timeout
+        if len(text_to_speak) > 500:
+            text_to_speak = text_to_speak[:500] + "..."
+        response = _client.models.generate_content(
             model=TTS_MODEL,
+            contents=f"Say clearly: {text_to_speak}",
             config=types.GenerateContentConfig(
                 response_modalities=["AUDIO"],
                 speech_config=types.SpeechConfig(
                     voice_config=types.VoiceConfig(
+                        prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                            voice_name='Kore',
+                        )
                     )
                 ),
             )
         )
+        audio_part = response.candidates[0].content.parts[0]
+        return audio_part.inline_data.data, audio_part.inline_data.mime_type
+    except Exception as e:
+        st.error(f"Failed to generate narration: {e}")
         return None, None
+# 1.7 NEW HELPER FUNCTION TO CREATE A WAV FILE IN MEMORY
+def _convert_pcm_to_wav(pcm_data, sample_rate=24000, channels=1, sample_width=2):
+    """Wraps raw PCM audio data in a WAV container in memory."""
+    audio_buffer = io.BytesIO()
+    with wave.open(audio_buffer, 'wb') as wf:
+        wf.setnchannels(channels)
+        wf.setsampwidth(sample_width)
+        wf.setframerate(sample_rate)
+        wf.writeframes(pcm_data)
+    audio_buffer.seek(0)
+    return audio_buffer.getvalue()
 # ─────────────────────────────────────────────────────────────────────────────
+# 2. SESSION STATE SETUP
 # ─────────────────────────────────────────────────────────────────────────────
+if "app_state" not in st.session_state:
+    st.session_state.app_state = {
+        "steps": [], "images": {}, "tools_list": [], "current_step": 1,
+        "done_flags": {}, "notes": {}, "timers": {}, "category": None,
+        "prompt_sent": False, "timer_running": {}, "last_tick": {},
+        "project_title": "", "project_description": "", "upcycling_options": [],
+        "plan_approved": False, "initial_plan": "", "user_image": None,
+        "upload_error": None, "upload_attempts": 0, "last_uploaded_file": None
+    }
 # ─────────────────────────────────────────────────────────────────────────────
+# 3. LAYOUT & FUNCTIONS
 # ─────────────────────────────────────────────────────────────────────────────
+def reset_state():
+    """Clear out all session state so user can start fresh."""
+    st.session_state.app_state = {
+        "steps": [], "images": {}, "tools_list": [], "current_step": 1,
+        "done_flags": {}, "notes": {}, "timers": {}, "category": None,
+        "prompt_sent": False, "timer_running": {}, "last_tick": {},
+        "project_title": "", "project_description": "", "upcycling_options": [],
+        "plan_approved": False, "initial_plan": "", "user_image": None,
+        "upload_error": None, "upload_attempts": 0, "last_uploaded_file": None
+    }
+    st.success("✅ Reset complete!")
+    st.rerun()
+def send_text_request(model_name, prompt, image):
+    """Helper to send requests that expect only a text response."""
+    try:
+        chat = client.chats.create(model=model_name)
+        response = chat.send_message([prompt, image])
+        response_text = "".join(part.text for part in response.candidates[0].content.parts if part.text)
+        return response_text.strip()
     except Exception as e:
+        st.error(f"Error with model {model_name}: {str(e)}")
+        return None
+def initial_analysis(image, context_text):
+    """First pass with AI: get category, then title, description, and initial plan."""
+    if image is None:
+        st.error("No valid image provided for analysis")
+        return
+    st.session_state.app_state['user_image'] = image
+    with st.spinner("🤖 Analyzing your project and preparing a plan..."):
+        category_prompt = (
+            "You are an expert DIY assistant. Analyze the user's image and context. "
+            f"Context: '{context_text}'. "
+            "Categorize the project into ONE of the following: "
+            "Home Appliance Repair, Automotive Maintenance, Gardening & Urban Farming, "
+            "Upcycling & Sustainable Crafts, or DIY Project Creation. "
+            "Reply with ONLY the category name."
+        )
+        category = send_text_request(CATEGORY_MODEL, category_prompt, image)
+        if not category: return
+        st.session_state.app_state['category'] = category
+        plan_prompt = f"""
+        You are an expert DIY assistant in the category: {category}.
+        User Context: "{context_text if context_text else 'No context provided.'}"
+        Based on the image and context, perform the following:
+        1.  **Title:** Create a short, clear title for this project.
+        2.  **Description:** Write a brief, one-paragraph description of the goal.
+        3.  **Initial Plan:**
+            - If 'Upcycling & Sustainable Crafts' AND no specific project is mentioned, propose three distinct project options as a numbered list under "UPCYCLING OPTIONS:".
+            - For all other cases, briefly outline the main stages of the proposed solution.
+        Structure your response EXACTLY like this:
+        TITLE: [Your title]
+        DESCRIPTION: [Your description]
+        INITIAL PLAN:
+        [Your plan or 3 options]
+        """
+        plan_response = send_text_request(GENERATION_MODEL, plan_prompt, image)
+        if not plan_response: return
+    try:
+        st.session_state.app_state['project_title'] = re.search(r"TITLE:\s*(.*)", plan_response).group(1).strip()
+        st.session_state.app_state['project_description'] = re.search(r"DESCRIPTION:\s*(.*)", plan_response, re.DOTALL).group(1).strip()
+        initial_plan_text = re.search(r"INITIAL PLAN:\s*(.*)", plan_response, re.DOTALL).group(1).strip()
+        if "UPCYCLING OPTIONS:" in initial_plan_text:
+            options = re.findall(r"^\s*\d+\.\s*(.*)", initial_plan_text, re.MULTILINE)
+            st.session_state.app_state['upcycling_options'] = options
+        else:
+            st.session_state.app_state['initial_plan'] = initial_plan_text
+        st.session_state.app_state['prompt_sent'] = True
+        if context_text:
+            st.session_state.app_state['plan_approved'] = True
+            generate_detailed_guide_with_images()
+        else:
+            st.session_state.app_state['plan_approved'] = False
+    except AttributeError:
+        st.error("The AI response was not in the expected format. Please try again.")
+        st.session_state.app_state['prompt_sent'] = False
+def generate_detailed_guide_with_images(selected_option=None):
+    """Generates the detailed guide with steps and illustrations."""
+    image = st.session_state.app_state.get('user_image')
+    if not image:
+        st.error("Image not found. Please start over."); return
+    context = f"The user has approved the plan for '{st.session_state.app_state['project_title']}'."
+    if selected_option:
+        context = f"The user chose the upcycling project: '{selected_option}'."
+    detailed_prompt = f"""
+    You are a DIY expert. The user wants to proceed with the project titled "{st.session_state.app_state['project_title']}".
+    {context}
+    Provide a detailed guide. For each step, you MUST provide a simple, clear illustrative image.
+    Format your response EXACTLY like this:
+    TOOLS AND MATERIALS:
+    - Tool A
+    - Material B
+    STEPS(Maximum 7 steps):
+    1. First step instructions.
+    2. Second step instructions...
+    """
+    with st.spinner("🛠️ Generating your detailed guide with illustrations..."):
+        try:
+            chat = client.chats.create(
+                model=GENERATION_MODEL,
+                config=types.GenerateContentConfig(response_modalities=["Text", "Image"])
+            )
+            full_resp = chat.send_message([detailed_prompt, image])
+            gen_parts = full_resp.candidates[0].content.parts
+            combined_text = ""
+            inline_images = []
+            for part in gen_parts:
+                if part.text is not None:
+                    combined_text += part.text + "\n"
+                if part.inline_data is not None:
+                    img = Image.open(BytesIO(part.inline_data.data))
+                    inline_images.append(img)
+            combined_text = combined_text.strip()
+            tools_section = re.search(r"TOOLS AND MATERIALS:\s*(.*?)\s*STEPS:", combined_text, re.DOTALL).group(1).strip()
+            steps_section = re.search(r"STEPS:\s*(.*)", combined_text, re.DOTALL).group(1).strip()
+            parsed_steps = parse_numbered_steps(steps_section)
+            st.session_state.app_state['tools_list'] = [line.strip("- ").strip() for line in tools_section.split('\n') if line.strip()]
+            st.session_state.app_state['steps'] = parsed_steps
+            st.session_state.app_state['images'] = {idx: inline_images[idx - 1] for idx, _ in parsed_steps if idx - 1 < len(inline_images)}
+            for idx, step_text in parsed_steps:
+                st.session_state.app_state['done_flags'][idx] = False
+                st.session_state.app_state['notes'][idx] = ""
+                timer_match = re.search(r"wait\s+for\s+(\d+)\s+(seconds?|minutes?)", step_text.lower())
+                if timer_match:
+                    val, unit = int(timer_match.group(1)), timer_match.group(2)
+                    st.session_state.app_state['timers'][idx] = val * (60 if "minute" in unit else 1)
+                else:
+                    st.session_state.app_state['timers'][idx] = 0
+        except Exception as e:
+            st.error(f"Failed to generate or parse the illustrated guide: {str(e)}")
+def render_sidebar_navigation():
+    st.sidebar.markdown("## Steps Navigation")
+    steps = st.session_state.app_state['steps']
+    if not steps: return
+    total_steps = len(steps)
+    completed = sum(1 for done in st.session_state.app_state['done_flags'].values() if done)
+    st.sidebar.progress(completed / total_steps if total_steps > 0 else 0)
+    st.sidebar.write(f"Progress: {completed}/{total_steps} steps")
+    for (idx, _) in steps:
+        is_done = st.session_state.app_state['done_flags'].get(idx, False)
+        label = f"{'✓' if is_done else '·'} Step {idx}"
+        if st.sidebar.button(label, key=f"nav_{idx}"):
+            st.session_state.app_state['current_step'] = idx
+            st.rerun()
+def render_tools_list():
+    if st.session_state.app_state['tools_list']:
+        with st.expander("🔧 Required Tools & Materials", expanded=True):
+            for item in st.session_state.app_state['tools_list']:
+                st.markdown(f"- {item}")
+def render_step(idx, text):
+    total = len(st.session_state.app_state['steps'])
+    st.markdown(f"### Step {idx} of {total}")
+    st.write(text)
+    # FINALIZED TTS Integration
+    if st.button(f"🔊 Narrate Step {idx}", key=f"tts_{idx}"):
+        with st.spinner("Generating narration..."):
+            audio_data, mime_type = generate_tts_audio(client, text)
+            if audio_data:
+                # Check if the audio is raw PCM data
+                if 'L16' in mime_type or 'pcm' in mime_type:
+                    st.info("Raw audio format detected. Converting to WAV for playback...")
+                    # Convert the raw PCM data to a playable WAV format
+                    wav_data = _convert_pcm_to_wav(audio_data)
+                    st.audio(wav_data, format="audio/wav")
+                else:
+                    # If it's already in a standard format (like mp3, ogg), play it directly
+                    st.audio(audio_data, format=mime_type)
+            else:
+                st.error("Could not generate audio.")
+    if idx in st.session_state.app_state['images']:
+        st.image(
+            st.session_state.app_state['images'][idx],
+            caption=f"Illustration for step {idx}",
+            use_container_width=True
+        )
+    done = st.checkbox("✅ Mark this step as completed", value=st.session_state.app_state['done_flags'].get(idx, False), key=f"done_{idx}")
+    st.session_state.app_state['done_flags'][idx] = done
+    notes = st.text_area("📝 Your notes for this step:", value=st.session_state.app_state['notes'].get(idx, ""), height=100, key=f"notes_{idx}")
+    st.session_state.app_state['notes'][idx] = notes
+    st.markdown("---")
+    col1, col2, col3 = st.columns([1, 2, 1])
+    if idx > 1 and col1.button("⬅️ Previous", key=f"prev_{idx}"):
+        st.session_state.app_state['current_step'] -= 1
+        st.rerun()
+    if idx < total and col3.button("Next ➡️", key=f"next_{idx}"):
+        st.session_state.app_state['current_step'] += 1
+        st.rerun()
 # ─────────────────────────────────────────────────────────────────────────────
+# 4. APP LAYOUT - FIXED UPLOAD SECTION
 # ─────────────────────────────────────────────────────────────────────────────
+st.set_page_config(page_title="NeoFix DIY Assistant", page_icon="🛠️", layout="wide")
+st.title("🛠️ NeoFix AI-Powered DIY Assistant")
+with st.expander("ℹ️ How it works", expanded=False):
+    st.write("""
+    1.  **Upload a photo** of your project or the item you want to fix or build (appliance, car part, plant, craft project).
+    2.  **(Optional) Describe your goal** for more accurate results.
+    3.  **Review the Plan.** The AI will propose a plan. If you didn't provide a description, you'll be asked to approve it.
+    4.  **Get Your Guide** with tools and illustrated step-by-step instructions.
+    5.  **Follow the Steps** using the interactive checklist.
+    """)
+if not st.session_state.app_state['prompt_sent']:
+    st.markdown("---")
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        st.markdown("### 📷 Upload Project Image")
+        # Show upload status
+        if st.session_state.app_state.get('upload_error'):
+            st.error(f"Upload Error: {st.session_state.app_state['upload_error']}")
+        if st.session_state.app_state.get('upload_attempts', 0) > 0:
+            st.info(f"Upload attempts: {st.session_state.app_state['upload_attempts']}")
+        # IMPROVED File uploader with unique key to force refresh
+        upload_key = f"file_upload_{st.session_state.app_state.get('upload_attempts', 0)}"
+        uploaded_image = st.file_uploader(
+            "Choose an image file",
+            type=["jpg", "jpeg", "png", "bmp", "gif"],
+            accept_multiple_files=False,
+            key=upload_key,
+            help="Supported: JPG, PNG, BMP, GIF (max 5MB)"
+        )
+        # Process uploaded image immediately
+        processed_image = None
+        upload_status = ""
+        if uploaded_image is not None:
+            # Check if this is a new file upload
+            current_file_id = f"{uploaded_image.name}_{uploaded_image.size}"
+            if current_file_id != st.session_state.app_state.get('last_uploaded_file'):
+                st.session_state.app_state['last_uploaded_file'] = current_file_id
+                with st.spinner("Processing uploaded image..."):
+                    processed_image, upload_status = handle_uploaded_file(uploaded_image)
+                if processed_image is not None:
+                    st.session_state.app_state['upload_error'] = None
+                    st.success("✅ Image uploaded and processed successfully!")
+                    st.image(processed_image, caption="Uploaded image preview", use_container_width=True)
+                else:
+                    st.session_state.app_state['upload_error'] = upload_status
+                    st.session_state.app_state['upload_attempts'] += 1
+                    st.error(f"❌ {upload_status}")
+            else:
+                # File already processed, show cached result
+                if st.session_state.app_state.get('upload_error') is None:
+                    processed_image, _ = handle_uploaded_file(uploaded_image)
+                    if processed_image:
+                        st.success("✅ Image ready for analysis!")
+                        st.image(processed_image, caption="Uploaded image preview", use_container_width=True)
+        # Alternative camera input
+        st.markdown("##### Alternative: Take a photo")
+        camera_image = st.camera_input("Take a picture", key=f"camera_{st.session_state.app_state.get('upload_attempts', 0)}")
+        if camera_image and not uploaded_image:
+            with st.spinner("Processing camera image..."):
+                processed_image, upload_status = handle_uploaded_file(camera_image)
+            if processed_image is not None:
+                st.session_state.app_state['upload_error'] = None
+                st.success("✅ Photo captured and processed!")
+                st.image(processed_image, caption="Camera photo preview", use_container_width=True)
+            else:
+                st.error(f"❌ {upload_status}")
+        context_text = st.text_area(
+            "✏️ Describe the issue or your goal (optional but recommended)",
+            height=80,
+            placeholder="e.g., 'My toaster won't turn on,' or 'How do I build a desk like this?'"
+        )
+    with col2:
+        st.markdown("### Actions")
+        # Get AI Guidance button - only enabled when image is ready
+        has_valid_image = (uploaded_image is not None or camera_image is not None) and st.session_state.app_state.get('upload_error') is None
+        if st.button(
+            "🚀 Get AI Guidance",
+            type="primary",
+            use_container_width=True,
+            disabled=not has_valid_image
+        ):
+            image_to_analyze = None
+            # Determine which image to use
+            if uploaded_image:
+                image_to_analyze, status = handle_uploaded_file(uploaded_image)
+            elif camera_image:
+                image_to_analyze, status = handle_uploaded_file(camera_image)
+            if image_to_analyze is not None:
+                initial_analysis(image_to_analyze, context_text)
+                st.rerun()
+            else:
+                st.error(f"❌ Image processing failed: {status}")
+        # Status message for button
+        if not has_valid_image:
+            if uploaded_image is None and camera_image is None:
+                st.warning("⚠️ Please upload an image first!")
+            elif st.session_state.app_state.get('upload_error'):
+                st.warning("⚠️ Fix upload error first!")
+        # Troubleshooting section
+        with st.expander("🔧 Upload Troubleshooting"):
+            st.markdown("""
+            **Common fixes:**
+            1. **Refresh upload**: Click button below
+            2. **Check file size**: Max 5MB
+            3. **Try different format**: JPG works best
+            4. **Use camera**: If file upload fails
+            5. **Clear browser cache**: Ctrl+Shift+Delete
+            """)
+            if st.button("🔄 Reset Upload", use_container_width=True):
+                st.session_state.app_state['upload_attempts'] = 0
+                st.session_state.app_state['upload_error'] = None
+                st.session_state.app_state['last_uploaded_file'] = None
+                st.rerun()
+            # Debug info
+            if st.checkbox("Show debug info"):
+                st.json({
+                    "upload_attempts": st.session_state.app_state.get('upload_attempts', 0),
+                    "upload_error": st.session_state.app_state.get('upload_error'),
+                    "last_file": st.session_state.app_state.get('last_uploaded_file'),
+                    "has_uploaded_file": uploaded_image is not None,
+                    "has_camera_image": camera_image is not None
+                })
+        if st.button("🔄 Start Over", use_container_width=True):
+            reset_state()
+else:
+    render_sidebar_navigation()
+    st.markdown("---")
+    st.markdown(f"### {st.session_state.app_state.get('project_title', 'Your Project')}")
+    st.markdown(f"**Category:** `{st.session_state.app_state.get('category', 'N/A')}`")
+    st.info(f"**Description:** {st.session_state.app_state.get('project_description', 'N/A')}")
+    st.markdown("---")
+    if not st.session_state.app_state['steps']:
+        if st.session_state.app_state['upcycling_options']:
+            st.markdown("#### The AI has suggested a few projects. Please choose one:")
+            for i, option in enumerate(st.session_state.app_state['upcycling_options']):
+                if st.button(option, key=f"option_{i}"):
+                    generate_detailed_guide_with_images(selected_option=option)
+                    st.rerun()
+        elif not st.session_state.app_state['plan_approved']:
+            st.markdown("#### The AI has proposed the following plan:")
+            st.success(st.session_state.app_state['initial_plan'])
+            if st.button("✅ Looks good, proceed with this plan", type="primary"):
+                st.session_state.app_state['plan_approved'] = True
+                generate_detailed_guide_with_images()
+                st.rerun()
     else:
+        render_tools_list()
+        st.markdown("---")
+        current_step_index = st.session_state.app_state['current_step']
+        try:
+            step_num, step_text = st.session_state.app_state['steps'][current_step_index - 1]
+            render_step(step_num, step_text)
+        except IndexError:
+            st.session_state.app_state['current_step'] = 1
+            st.rerun()
+        total_steps = len(st.session_state.app_state['steps'])
+        done_count = sum(1 for d in st.session_state.app_state['done_flags'].values() if d)
+        if total_steps > 0:
+            progress = done_count / total_steps
+            st.progress(progress)
+            st.markdown(f"**Overall Progress:** {done_count} of {total_steps} completed ({progress:.0%})")
+            if done_count == total_steps:
+                st.balloons()
+                st.success("🎉 Congratulations! You've completed all steps!")
+    if st.button("🔄 Start Over"):
+        reset_state()