Adk-Analyst2

Sleeping

App Files Files Community

rairo commited on Jul 6, 2025

Commit

4a65224

verified ·

1 Parent(s): 1acf113

Update app.py

Browse files

Files changed (1) hide show

app.py +436 -567

app.py CHANGED Viewed

@@ -1,17 +1,16 @@
 ##############################################################################
-# Sozo Business Studio · 10-Jul-2025 (Performance Fixed)                     #
-#  • Fixed report generation freezing issues                                  #
-#  • Optimized memory usage and resource management                           #
-#  • Added proper error handling and timeouts                                 #
-#  • Improved chart generation with fallback strategies                       #
-#  • Enhanced progress tracking and user feedback                             #
 ##############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
-import time, gc, threading
 from pathlib import Path
-from typing import Tuple, Dict, List, Optional
-from concurrent.futures import ThreadPoolExecutor, TimeoutError
 import streamlit as st
 import pandas as pd
@@ -28,663 +27,533 @@ import cv2
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
 from google import genai
-from google.genai import types
 # ─── CONFIG ────────────────────────────────────────────────────────────────
 st.set_page_config(page_title="Sozo Business Studio", layout="wide")
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
-FPS, WIDTH, HEIGHT = 24, 1280, 720
 MAX_CHARTS, VIDEO_SCENES = 5, 5
-CHART_TIMEOUT = 30  # seconds
-REPORT_TIMEOUT = 120  # seconds
 API_KEY = os.getenv("GEMINI_API_KEY")
 if not API_KEY:
-    st.error("⚠️ GEMINI_API_KEY is not set.")
-    st.stop()
-try:
-    GEM = genai.Client(api_key=API_KEY)
-except Exception as e:
-    st.error(f"⚠️ Failed to initialize Gemini client: {e}")
-    st.stop()
-DG_KEY = os.getenv("DEEPGRAM_API_KEY")
 st.session_state.setdefault("bundle", None)
 sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
-# ─── MEMORY MANAGEMENT ─────────────────────────────────────────────────────
-def cleanup_matplotlib():
-    """Clean up matplotlib resources to prevent memory leaks"""
-    plt.close('all')
-    plt.clf()
-    plt.cla()
-    gc.collect()
-def safe_temp_cleanup(temp_files: List[Path]):
-    """Safely clean up temporary files"""
-    for temp_file in temp_files:
-        try:
-            if temp_file.exists():
-                temp_file.unlink()
-        except Exception:
-            pass
-# ─── ENHANCED HELPERS ──────────────────────────────────────────────────────
 def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
-    """Load CSV/Excel with enhanced error handling and size limits"""
     try:
-        # Check file size (limit to 50MB)
-        if len(buf) > 50 * 1024 * 1024:
-            return None, "File too large (max 50MB)"
         ext = Path(name).suffix.lower()
-        # Use smaller chunk size for large files
-        if ext in (".xlsx", ".xls"):
-            df = pd.read_excel(io.BytesIO(buf), engine='openpyxl' if ext == '.xlsx' else 'xlrd')
-        else:
-            df = pd.read_csv(io.BytesIO(buf), encoding='utf-8', on_bad_lines='skip')
-        # Basic data validation
         df.columns = df.columns.astype(str).str.strip()
-        df = df.dropna(how="all").reset_index(drop=True)
-        # Limit rows for performance
-        if len(df) > 10000:
-            df = df.head(10000)
-            st.warning("⚠️ Dataset truncated to 10,000 rows for performance")
         if df.empty or len(df.columns) == 0:
             raise ValueError("No usable data found")
         return df, None
     except Exception as e:
-        return None, f"Error loading file: {str(e)}"
 def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
-    """Convert for Streamlit Arrow renderer with memory optimization"""
-    # Create a copy with limited rows for preview
-    safe = df.head(1000).copy()
     for c in safe.columns:
         if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
             safe[c] = safe[c].astype(safe[c].dtype.name.lower())
     return safe
-@st.cache_data(show_spinner=False, ttl=3600)
 def deepgram_tts(txt: str) -> Tuple[bytes, str]:
-    """Cached audio narration with timeout"""
     if not DG_KEY or not txt:
         return None, None
     txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000]
     try:
         r = requests.post(
             "https://api.deepgram.com/v1/speak",
             params={"model": "aura-2-andromeda-en"},
             headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"},
-            json={"text": txt},
-            timeout=15  # Reduced timeout
-        )
         r.raise_for_status()
         return r.content, r.headers.get("Content-Type", "audio/mpeg")
     except Exception:
         return None, None
 def generate_silence_mp3(duration: float, out: Path):
-    """Generate silence with error handling"""
-    try:
-        subprocess.run(
-            ["ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono",
-             "-t", f"{duration:.3f}", "-q:a", "9", str(out)],
-            check=True, capture_output=True, timeout=30
-        )
-    except Exception as e:
-        st.warning(f"Failed to generate silence: {e}")
 def audio_duration(path: str) -> float:
-    """Get audio duration with fallback"""
     try:
         res = subprocess.run(
             ["ffprobe", "-v", "error", "-show_entries", "format=duration",
              "-of", "default=nw=1:nk=1", path],
-            text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-            check=True, timeout=10
-        )
         return float(res.stdout.strip())
     except Exception:
         return 5.0
-# ─── CHART GENERATION WITH TIMEOUT ────────────────────────────────────────
 TAG_RE = re.compile(
     r'[<[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>"\'\]]+?)["\']?\s*[>\]]',
     re.I)
-def extract_chart_tags(t: str) -> List[str]:
-    """Extract chart tags with deduplication"""
-    if not t:
-        return []
-    tags = [m.group("d").strip() for m in TAG_RE.finditer(t)]
-    return list(dict.fromkeys(tags))  # Remove duplicates while preserving order
-re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I)
 def clean_narration(txt: str) -> str:
-    """Clean narration text"""
-    if not txt:
-        return ""
     txt = re_scene.sub("", txt)
     txt = TAG_RE.sub("", txt)
     txt = re.sub(r"\s*\([^)]*\)", "", txt)
     txt = re.sub(r"\s{2,}", " ", txt).strip()
     return txt
-def generate_chart_with_timeout(agent, description: str, timeout: int = CHART_TIMEOUT) -> Optional[str]:
-    """Generate chart with timeout and fallback"""
-    def chart_worker():
-        try:
-            cleanup_matplotlib()
-            # Enhanced chart generation prompt
-            chart_prompt = f"""
-            Create a {description} chart using matplotlib with these requirements:
-            1. Use plt.figure(figsize=(12, 8)) for consistent sizing
-            2. Apply a clean, professional style: plt.style.use('seaborn-v0_8')
-            3. Include proper title, axis labels, and legends
-            4. Use professional color palette
-            5. Ensure readable fonts (size 12+)
-            6. Handle missing values by dropping or filling them
-            7. Save with: plt.savefig('chart.png', dpi=300, bbox_inches='tight', facecolor='white')
-            8. Always call plt.close() after saving
-            Important: Only use columns that exist in the dataframe. If a column doesn't exist, use the closest available column.
-            """
-            result = agent.run(chart_prompt)
-            return result
-        except Exception as e:
-            st.warning(f"Chart generation failed: {e}")
-            return None
-    try:
-        with ThreadPoolExecutor(max_workers=1) as executor:
-            future = executor.submit(chart_worker)
-            result = future.result(timeout=timeout)
-            return result
-    except TimeoutError:
-        st.warning(f"Chart generation timed out after {timeout} seconds")
-        return None
-    except Exception as e:
-        st.warning(f"Chart generation error: {e}")
-        return None
-    finally:
-        cleanup_matplotlib()
-def create_fallback_chart(df: pd.DataFrame, description: str) -> Optional[str]:
-    """Create a simple fallback chart"""
-    try:
-        cleanup_matplotlib()
-        fig, ax = plt.subplots(figsize=(12, 8))
-        # Simple fallback based on data types
-        numeric_cols = df.select_dtypes(include=[np.number]).columns
-        categorical_cols = df.select_dtypes(include=['object']).columns
-        if len(numeric_cols) >= 2:
-            # Scatter plot
-            ax.scatter(df[numeric_cols[0]], df[numeric_cols[1]], alpha=0.6)
-            ax.set_xlabel(numeric_cols[0])
-            ax.set_ylabel(numeric_cols[1])
-            ax.set_title(f"Scatter Plot: {description}")
-        elif len(numeric_cols) == 1:
-            # Histogram
-            ax.hist(df[numeric_cols[0]].dropna(), bins=20, alpha=0.7)
-            ax.set_xlabel(numeric_cols[0])
-            ax.set_ylabel('Frequency')
-            ax.set_title(f"Distribution: {description}")
-        else:
-            # Simple text chart
-            ax.text(0.5, 0.5, f"Chart: {description}\nData available",
-                   ha='center', va='center', fontsize=16)
-            ax.set_xlim(0, 1)
-            ax.set_ylim(0, 1)
-            ax.set_title(description)
-        plt.tight_layout()
-        # Save to temporary file
-        temp_path = Path(tempfile.gettempdir()) / f"fallback_{uuid.uuid4()}.png"
-        plt.savefig(temp_path, dpi=300, bbox_inches="tight", facecolor="white")
-        plt.close(fig)
-        return str(temp_path)
-    except Exception as e:
-        st.warning(f"Fallback chart creation failed: {e}")
-        return None
-    finally:
-        cleanup_matplotlib()
-# ─── IMAGE GENERATION WITH FALLBACK ───────────────────────────────────────
 def placeholder_img() -> Image.Image:
-    """Create placeholder image"""
     return Image.new("RGB", (WIDTH, HEIGHT), (230, 230, 230))
-def generate_image_from_prompt(prompt: str, timeout: int = 30) -> Image.Image:
-    """Generate image with timeout and fallback"""
-    def image_worker():
-        model_main = "gemini-2.0-flash-exp-image-generation"
-        model_fallback = "gemini-2.0-flash-preview-image-generation"
-        full_prompt = "A clean business-presentation illustration: " + prompt
-        def fetch(model_name):
             res = GEM.models.generate_content(
                 model=model_name,
                 contents=full_prompt,
-                config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
             )
             for part in res.candidates[0].content.parts:
                 if getattr(part, "inline_data", None):
                     return Image.open(io.BytesIO(part.inline_data.data)).convert("RGB")
             return None
-        try:
-            img = fetch(model_main) or fetch(model_fallback)
-            return img if img else placeholder_img()
         except Exception:
-            return placeholder_img()
-    try:
-        with ThreadPoolExecutor(max_workers=1) as executor:
-            future = executor.submit(image_worker)
-            return future.result(timeout=timeout)
-    except TimeoutError:
-        st.warning(f"Image generation timed out after {timeout} seconds")
-        return placeholder_img()
-    except Exception:
-        return placeholder_img()
-# ─── OPTIMIZED PDF GENERATION ─────────────────────────────────────────────
 class PDF(FPDF, HTMLMixin):
-    def header(self):
-        self.set_font('Arial', 'B', 16)
-        self.cell(0, 10, 'Sozo Business Report', 0, 1, 'C')
-        self.ln(5)
-    def footer(self):
-        self.set_y(-15)
-        self.set_font('Arial', 'I', 8)
-        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
 def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
-    """Build PDF with error handling"""
-    try:
-        # Convert markdown to HTML with chart substitution
-        html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(
-            TAG_RE.sub(lambda m: f'<img src="{charts.get(m.group("d").strip(), "")}" width="400">', md)
-        )
-        pdf = PDF()
-        pdf.set_auto_page_break(True, margin=15)
-        pdf.add_page()
-        pdf.set_font("Arial", "", 11)
-        # Simple text conversion (avoid complex HTML)
-        text_content = re.sub(r'<[^>]+>', '', html)
-        pdf.multi_cell(0, 6, text_content)
-        return bytes(pdf.output(dest="S"))
-    except Exception as e:
-        st.error(f"PDF generation failed: {e}")
-        # Return simple fallback PDF
-        pdf = PDF()
-        pdf.add_page()
-        pdf.set_font("Arial", "", 12)
-        pdf.multi_cell(0, 6, "Report generation encountered an error. Please try again.")
-        return bytes(pdf.output(dest="S"))
-# ─── OPTIMIZED REPORT GENERATION ──────────────────────────────────────────
-def generate_report(buf: bytes, name: str, ctx: str, key: str) -> Optional[dict]:
-    """Generate report with improved error handling and timeouts"""
-    progress_bar = st.progress(0)
-    status_text = st.empty()
-    try:
-        # Step 1: Load data
-        status_text.text("Loading and validating data...")
-        progress_bar.progress(0.1)
-        df, err = load_dataframe_safely(buf, name)
-        if err:
-            st.error(err)
-            return None
-        # Step 2: Initialize LLM
-        status_text.text("Initializing AI models...")
-        progress_bar.progress(0.2)
-        try:
-            llm = ChatGoogleGenerativeAI(
-                model="gemini-2.0-flash",
-                google_api_key=API_KEY,
-                temperature=0.1,
-                request_timeout=60
-            )
-        except Exception as e:
-            st.error(f"Failed to initialize AI model: {e}")
-            return None
-        # Step 3: Create context (limit size)
-        status_text.text("Analyzing data structure...")
-        progress_bar.progress(0.3)
-        # Limit context size to prevent memory issues
-        sample_size = min(100, len(df))
-        ctx_dict = {
-            "shape": df.shape,
-            "columns": list(df.columns)[:20],  # Limit columns
-            "user_ctx": ctx or "General business analysis",
-            "sample_data": df.head(sample_size).to_dict('records')[:10],  # Small sample
-            "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
-        }
-        # Add numeric summary only if reasonable size
-        numeric_cols = df.select_dtypes(include=[np.number]).columns
-        if len(numeric_cols) > 0 and len(numeric_cols) < 20:
-            ctx_dict["numeric_summary"] = {
-                col: {stat: float(val) for stat, val in stats.items()}
-                for col, stats in df[numeric_cols].describe().to_dict().items()
-            }
-        # Step 4: Generate report
-        status_text.text("Generating report content...")
-        progress_bar.progress(0.4)
-        cols = ", ".join(ctx_dict["columns"][:10])
-        report_prompt = f"""
-        Analyze this business dataset and create a professional executive report.
-        **Dataset:** {ctx_dict["shape"][0]} rows, {ctx_dict["shape"][1]} columns
-        **Columns:** {cols}
-        **Context:** {ctx_dict["user_ctx"]}
-        **Requirements:**
-        1. Write in professional, executive-level language
-        2. Include 3-5 key insights with specific data points
-        3. Provide actionable recommendations
-        4. Use maximum 3 chart tags: `<generate_chart: "chart_type | description">`
-        5. Valid chart types: bar, pie, line, scatter, hist
-        6. Keep total length under 2000 words
-        **Structure:**
-        ## Executive Summary
-        [Brief overview of key findings]
-        ## Key Insights
-        [3-5 actionable insights with data support]
-        ## Recommendations
-        [Specific, actionable recommendations]
-        Focus on business impact and practical insights.
-        """
-        try:
-            with ThreadPoolExecutor(max_workers=1) as executor:
-                future = executor.submit(lambda: llm.invoke(report_prompt).content)
-                md = future.result(timeout=REPORT_TIMEOUT)
-        except TimeoutError:
-            st.error("Report generation timed out. Please try with a smaller dataset.")
-            return None
-        except Exception as e:
-            st.error(f"Report generation failed: {e}")
-            return None
-        # Step 5: Generate charts
-        status_text.text("Generating charts...")
-        progress_bar.progress(0.6)
-        chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
-        charts: Dict[str, str] = {}
-        temp_files: List[Path] = []
-        if chart_descs:
-            try:
-                agent = create_pandas_dataframe_agent(
-                    llm=llm, df=df, verbose=False,
-                    allow_dangerous_code=True,
-                    max_iterations=3,
-                    early_stopping_method="generate"
-                )
-                for i, desc in enumerate(chart_descs):
-                    chart_progress = 0.6 + (0.3 * (i + 1) / len(chart_descs))
-                    progress_bar.progress(chart_progress)
-                    status_text.text(f"Generating chart {i+1}/{len(chart_descs)}: {desc[:50]}...")
-                    # Try agent-based chart generation
-                    result = generate_chart_with_timeout(agent, desc)
-                    # Check if matplotlib saved a file
-                    chart_path = None
-                    potential_paths = [
-                        Path("chart.png"),
-                        Path(tempfile.gettempdir()) / "chart.png",
-                    ]
-                    for path in potential_paths:
-                        if path.exists():
-                            chart_path = path
-                            break
-                    # If no chart was generated, create fallback
-                    if not chart_path:
-                        chart_path = create_fallback_chart(df, desc)
-                    if chart_path and Path(chart_path).exists():
-                        # Move to permanent temp location
-                        perm_path = Path(tempfile.gettempdir()) / f"chart_{uuid.uuid4()}.png"
-                        Path(chart_path).rename(perm_path)
-                        charts[desc] = str(perm_path)
-                        temp_files.append(perm_path)
-                    cleanup_matplotlib()
-            except Exception as e:
-                st.warning(f"Chart generation encountered issues: {e}")
-                # Continue without charts
-        # Step 6: Build PDF
-        status_text.text("Building PDF...")
-        progress_bar.progress(0.9)
         try:
-            # Create preview with base64 encoded images
-            preview = md
-            for desc, path in charts.items():
-                if Path(path).exists():
-                    try:
-                        img_bytes = Path(path).read_bytes()
-                        b64_img = base64.b64encode(img_bytes).decode()
-                        preview = preview.replace(
-                            f'<generate_chart: "{desc}">',
-                            f'<img src="data:image/png;base64,{b64_img}" style="max-width: 100%;">'
-                        )
-                    except Exception:
-                        pass
-            pdf_bytes = build_pdf(md, charts)
-            # Clean up temporary files
-            safe_temp_cleanup(temp_files)
-            progress_bar.progress(1.0)
-            status_text.text("Report generated successfully!")
-            return {
-                "type": "report",
-                "preview": preview,
-                "pdf": pdf_bytes,
-                "report_md": md,
-                "key": key,
-            }
         except Exception as e:
-            st.error(f"PDF generation failed: {e}")
-            return None
-    except Exception as e:
-        st.error(f"Report generation failed: {e}")
-        return None
-    finally:
-        # Clean up UI elements
-        progress_bar.empty()
-        status_text.empty()
-        cleanup_matplotlib()
-        gc.collect()
-# ─── VIDEO GENERATION (SIMPLIFIED) ────────────────────────────────────────
 def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path, fps: int = FPS) -> str:
-    """Animate image with fade effect"""
     try:
-        frames = max(int(dur * fps), fps)
-        vid = cv2.VideoWriter(str(out), cv2.VideoWriter_fourcc(*"mp4v"), fps, (WIDTH, HEIGHT))
-        blank = np.full_like(img_cv2, 255)
-        for i in range(frames):
-            a = i / frames
-            blended = cv2.addWeighted(blank, 1 - a, img_cv2, a, 0)
-            vid.write(blended)
-        vid.release()
-        return str(out)
     except Exception as e:
-        st.warning(f"Video animation failed: {e}")
-        return str(out)
-def generate_video(buf: bytes, name: str, ctx: str, key: str) -> Optional[str]:
-    """Generate video with simplified approach"""
     try:
-        # Check FFmpeg availability
         subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
     except Exception:
-        st.error("🔴 FFmpeg not available — cannot render video.")
-        return None
     df, err = load_dataframe_safely(buf, name)
     if err:
-        st.error(err)
-        return None
-    # Simplified video generation for better performance
-    st.info("🎬 Video generation is simplified for better performance")
-    try:
-        # Create a simple video with data visualization
-        img = generate_image_from_prompt(f"Business data visualization for {ctx or 'data analysis'}")
-        img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
-        video_path = Path(tempfile.gettempdir()) / f"{key}.mp4"
-        animate_image_fade(img_cv, 10.0, video_path)
-        return str(video_path)
-    except Exception as e:
-        st.error(f"Video generation failed: {e}")
-        return None
-# ─── STREAMLIT UI ─────────────────────────────────────────────────────────
-def main():
-    """Main application function"""
-    # Mode selection
-    mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
-    # File upload
-    upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
-    if upl:
-        # Show data preview
-        with st.spinner("Loading data preview..."):
-            df_prev, load_err = load_dataframe_safely(upl.getvalue(), upl.name)
-        if load_err:
-            st.error(f"Error loading file: {load_err}")
         else:
-            with st.expander("📊 Data Preview", expanded=False):
-                st.info(f"Shape: {df_prev.shape[0]} rows × {df_prev.shape[1]} columns")
-                st.dataframe(arrow_df(df_prev), use_container_width=True)
-    # Context input
-    ctx = st.text_area(
-        "Business context or specific instructions (optional)",
-        help="Provide context about your data or specific analysis requirements"
-    )
-    # Generate button
-    if st.button("🚀 Generate", type="primary", disabled=not upl):
-        key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
-        if mode == "Report (PDF)":
-            st.session_state.bundle = generate_report(upl.getvalue(), upl.name, ctx, key)
         else:
-            st.session_state.bundle = None
-            path = generate_video(upl.getvalue(), upl.name, ctx, key)
-            if path:
-                st.session_state.bundle = {"type": "video", "video_path": path, "key": key}
-        st.rerun()
-    # Display results
-    if bundle := st.session_state.get("bundle"):
-        if bundle["type"] == "report":
-            st.subheader("📄 Generated Report")
-            # Report preview
-            with st.expander("📖 View Report", expanded=True):
-                st.markdown(bundle["preview"], unsafe_allow_html=True)
-            # Download options
-            col1, col2 = st.columns(2)
-            with col1:
-                st.download_button(
-                    "📥 Download PDF",
-                    bundle["pdf"],
-                    "business_report.pdf",
-                    "application/pdf",
-                    use_container_width=True
-                )
-            with col2:
-                if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
-                    with st.spinner("Generating narration..."):
-                        txt = re.sub(r"<[^>]+>", "", bundle["report_md"])
-                        audio, mime = deepgram_tts(txt)
-                        if audio:
-                            st.audio(audio, format=mime)
-                        else:
-                            st.error("Narration failed.")
-        elif bundle["type"] == "video":
-            st.subheader("🎬 Generated Video Narrative")
-            vp = bundle["video_path"]
-            if Path(vp).exists():
-                with open(vp, "rb") as f:
-                    st.video(f.read())
-                with open(vp, "rb") as f:
-                    st.download_button(
-                        "📥 Download Video",
-                        f,
-                        f"sozo_narrative_{bundle['key'][:8]}.mp4",
-                        "video/mp4",
-                        use_container_width=True
-                    )
-            else:
-                st.error("Video file missing – generation failed.")
-if __name__ == "__main__":
-    main()

 ##############################################################################
+# Sozo Business Studio · 10-Jul-2025 (full drop-in)
+# • Restores PDF branch alongside fixed Video branch
+# • Shared chart-tag grammar across both paths
+# • Narrator text cleans scene labels + chart talk
+# • Matplotlib animation starts from blank; artists returned (blit=True)
+# • Gemini Flash-preview image gen with placeholder fallback
+# • Silent-audio fallback keeps mux lengths equal
 ##############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
 from pathlib import Path
+from typing import Tuple, Dict, List
 import streamlit as st
 import pandas as pd
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
 from google import genai
+from google.genai import types   # for GenerateContentConfig
 # ─── CONFIG ────────────────────────────────────────────────────────────────
 st.set_page_config(page_title="Sozo Business Studio", layout="wide")
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
+FPS, WIDTH, HEIGHT  = 24, 1280, 720
 MAX_CHARTS, VIDEO_SCENES = 5, 5
 API_KEY = os.getenv("GEMINI_API_KEY")
 if not API_KEY:
+    st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
+GEM = genai.Client(api_key=API_KEY)
+DG_KEY = os.getenv("DEEPGRAM_API_KEY")  # optional for narration
+# --- IMPROVED: State management for an interactive, non-freezing UI ---
 st.session_state.setdefault("bundle", None)
+st.session_state.setdefault("report_md", None)
+st.session_state.setdefault("chart_descs", [])
+st.session_state.setdefault("generated_charts", {}) # Dict[desc, base64_string]
+st.session_state.setdefault("pdf_bytes", None)
+st.session_state.setdefault("df", None)
+st.session_state.setdefault("current_file_key", None)
 sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
+# ─── HELPERS ───────────────────────────────────────────────────────────────
 def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
+    """Load CSV/Excel, return (df, err)."""
     try:
         ext = Path(name).suffix.lower()
+        df = (pd.read_excel if ext in (".xlsx", ".xls") else pd.read_csv)(io.BytesIO(buf))
         df.columns = df.columns.astype(str).str.strip()
+        df = df.dropna(how="all")
         if df.empty or len(df.columns) == 0:
             raise ValueError("No usable data found")
         return df, None
     except Exception as e:
+        return None, str(e)
 def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
+    """Convert for Streamlit Arrow renderer."""
+    safe = df.copy()
     for c in safe.columns:
         if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
             safe[c] = safe[c].astype(safe[c].dtype.name.lower())
     return safe
+@st.cache_data(show_spinner=False)
 def deepgram_tts(txt: str) -> Tuple[bytes, str]:
+    """Optional audio narration."""
     if not DG_KEY or not txt:
         return None, None
     txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000]
     try:
         r = requests.post(
             "https://api.deepgram.com/v1/speak",
             params={"model": "aura-2-andromeda-en"},
             headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"},
+            json={"text": txt}, timeout=30)
         r.raise_for_status()
         return r.content, r.headers.get("Content-Type", "audio/mpeg")
     except Exception:
         return None, None
 def generate_silence_mp3(duration: float, out: Path):
+    subprocess.run(
+        ["ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono",
+         "-t", f"{duration:.3f}", "-q:a", "9", str(out)],
+        check=True, capture_output=True)
 def audio_duration(path: str) -> float:
     try:
         res = subprocess.run(
             ["ffprobe", "-v", "error", "-show_entries", "format=duration",
              "-of", "default=nw=1:nk=1", path],
+            text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
         return float(res.stdout.strip())
     except Exception:
         return 5.0
 TAG_RE = re.compile(
     r'[<[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>"\'\]]+?)["\']?\s*[>\]]',
     re.I)
+extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip()
+                                                  for m in TAG_RE.finditer(t or "")))
+re_scene = re.compile(r"^\s*scene\s*\d+[:.- ]*", re.I)
 def clean_narration(txt: str) -> str:
     txt = re_scene.sub("", txt)
     txt = TAG_RE.sub("", txt)
     txt = re.sub(r"\s*\([^)]*\)", "", txt)
     txt = re.sub(r"\s{2,}", " ", txt).strip()
     return txt
+# ─── IMAGE GENERATION & PLACEHOLDER ────────────────────────────────────────
 def placeholder_img() -> Image.Image:
     return Image.new("RGB", (WIDTH, HEIGHT), (230, 230, 230))
+@st.cache_data(show_spinner="Generating image...")
+def generate_image_from_prompt(prompt: str) -> Image.Image:
+    # IMPROVED: Using your original model names for consistency with your environment.
+    model_main = "gemini-2.0-flash-exp-image-generation"
+    model_fallback = "gemini-2.0-flash-preview-image-generation"
+    full_prompt = "A clean business-presentation illustration: " + prompt
+    def fetch(model_name):
+        try:
             res = GEM.models.generate_content(
                 model=model_name,
                 contents=full_prompt,
+                generation_config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
             )
             for part in res.candidates[0].content.parts:
                 if getattr(part, "inline_data", None):
                     return Image.open(io.BytesIO(part.inline_data.data)).convert("RGB")
             return None
         except Exception:
+            # Silently fail to allow fallback
+            return None
+    img = fetch(model_main) or fetch(model_fallback)
+    return img if img else placeholder_img()
+# ─── PDF & REPORT GENERATION (REFACTORED) ──────────────────────────────────
 class PDF(FPDF, HTMLMixin):
+    pass
 def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
+    """Builds a PDF from markdown text and a dictionary of chart descriptions to base64 image strings."""
+    def replacer(match):
+        desc = match.group("d").strip()
+        if desc in charts and charts[desc]:
+            return f'<img src="data:image/png;base64,{charts[desc]}">'
+        return ""
+    html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(TAG_RE.sub(replacer, md))
+    pdf = PDF()
+    pdf.set_auto_page_break(True, margin=15)
+    pdf.add_page()
+    pdf.set_font("Arial", "B", 18)
+    pdf.cell(0, 12, "AI-Generated Business Report", ln=True)
+    pdf.ln(3)
+    pdf.set_font("Arial", "", 11)
+    pdf.write_html(html)
+    return bytes(pdf.output(dest="S"))
+def generate_report_text(df: pd.DataFrame, ctx: str) -> Tuple[str, List[str]]:
+    """Generates only the text part of the report. This is the fast, first step."""
+    # Using your original model name.
+    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
+    # IMPROVED: Sending a summary instead of the full dataframe is more efficient and robust.
+    ctx_dict = {
+        "shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis",
+        "data_sample": df.head().to_dict('records'),
+        "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
+        "missing_values": {col: int(count) for col, count in df.isnull().sum().to_dict().items() if count > 0},
+        "numeric_summary": df.describe().to_dict() if not df.select_dtypes(include=np.number).empty else {}
+    }
+    cols = ", ".join(ctx_dict["columns"][:8])
+    report_prompt = f"""
+    You are a senior data analyst and business intelligence expert. Analyze the provided dataset and write a comprehensive executive-level Markdown report.
+    **Dataset Analysis Context:**
+    {json.dumps(ctx_dict, indent=2, default=str)}
+    **Instructions:**
+    1. **Identify Data Domain**: First, determine what type of data this represents.
+    2. **Executive Summary**: Start with a high-level summary of key findings and business impact.
+    3. **Data Quality Assessment**: Comment on data completeness and reliability.
+    4. **Key Insights**: Provide 4-6 actionable insights specific to the identified domain.
+    5. **Strategic Recommendations**: Offer concrete, actionable recommendations.
+    6. **Visual Support**: When a visualization would enhance understanding, insert chart tags like:
+       `<generate_chart: "chart_type | specific description">`
+       Valid chart types: bar, pie, line, scatter, hist
+       Base every chart on actual columns: {cols}
+    7. **Format Requirements**: Use professional business language and clear headers (## Executive Summary, etc.).
+    """
+    md = llm.invoke(report_prompt).content
+    chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
+    return md, chart_descs
+def generate_single_chart(description: str, df: pd.DataFrame) -> str:
+    """Generates one chart using the agent and returns it as a base64 string. More reliable."""
+    # Using your original model name.
+    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
+    agent = create_pandas_dataframe_agent(
+        llm=llm, df=df, verbose=False, allow_dangerous_code=True,
+        agent_type="openai-functions", handle_parsing_errors=True
+    )
+    # IMPROVED: A more explicit prompt for the agent leads to more reliable code generation.
+    chart_prompt = f"""
+    Your task is to generate Python code to create a single, static, professional chart using matplotlib based on the provided dataframe `df`.
+    The user's request is: '{description}'.
+    Follow these rules strictly:
+    1.  The dataframe is already loaded and available as a variable named `df`.
+    2.  Generate only the Python code to produce the plot. Do not add any explanation or surrounding text.
+    3.  Use `plt.figure()` to create a new figure for the plot.
+    4.  Add a clear title and labels to the axes.
+    5.  DO NOT use `st.pyplot()` or `plt.show()`. The code will be executed to save the figure.
+    6.  Ensure the final code block is pure Python.
+    """
+    for _ in range(2):  # Retry once on failure
         try:
+            response = agent.invoke({"input": chart_prompt})
+            code_to_execute = response['output'].strip().replace("```python", "").replace("```", "")
+            fig, ax = plt.subplots(figsize=(10, 6), dpi=150)
+            exec_globals = {'df': df, 'pd': pd, 'np': np, 'plt': plt, 'fig': fig, 'ax': ax}
+            exec(code_to_execute, exec_globals)
+            if fig.axes and any(ax.get_children() for ax in fig.axes):
+                buf = io.BytesIO()
+                fig.savefig(buf, format="png", dpi=150, bbox_inches="tight", facecolor="white")
+                plt.close(fig)
+                return base64.b64encode(buf.getvalue()).decode()
+            plt.close(fig)
         except Exception as e:
+            st.warning(f"Chart generation attempt failed: {e}")
+            plt.close("all")
+    return None # Return None if all attempts fail
+# ─── ANIMATION HELPERS (YOUR ORIGINAL CODE) ────────────────────────────────
 def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path, fps: int = FPS) -> str:
+    frames = max(int(dur * fps), fps)
+    vid = cv2.VideoWriter(str(out), cv2.VideoWriter_fourcc(*"mp4v"), fps, (WIDTH, HEIGHT))
+    blank = np.full_like(img_cv2, 255)
+    for i in range(frames):
+        a = i / (frames - 1) # Go from 0 to 1
+        vid.write(cv2.addWeighted(blank, 1 - a, img_cv2, a, 0))
+    vid.release()
+    return str(out)
+def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path, fps: int = FPS) -> str:
+    """Render an animated chart whose clip length equals the audio length `dur`."""
+    ctype, *rest = [s.strip().lower() for s in desc.split("|", 1)]
+    ctype = ctype or "bar"
+    title = rest[0] if rest else desc
+    if ctype == "pie":
+        cat_cols = df.select_dtypes(exclude="number").columns
+        num_cols = df.select_dtypes(include="number").columns
+        if not cat_cols.any() or not num_cols.any(): raise ValueError("Pie chart requires one categorical and one numeric column.")
+        cat, num = cat_cols[0], num_cols[0]
+        plot_df = df.groupby(cat)[num].sum().sort_values(ascending=False).head(8)
+    elif ctype in ("bar", "hist"):
+        num_cols = df.select_dtypes(include="number").columns
+        if not num_cols.any(): raise ValueError(f"{ctype} chart requires a numeric column.")
+        num = num_cols[0]
+        plot_df = df[num]
+    else:  # line / scatter
+        num_cols = df.select_dtypes(include="number").columns
+        if len(num_cols) < 2: raise ValueError("Line/scatter chart requires at least two numeric columns.")
+        plot_df = df[list(num_cols[:2])].sort_index()
+    frames = max(10, int(dur * fps))
+    fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
+    artists = []
+    if ctype == "pie":
+        wedges, _ = ax.pie(np.zeros_like(plot_df.values), labels=plot_df.index, startangle=90)
+        ax.set_title(title); artists.extend(wedges)
+        def init(): [w.set_alpha(0) for w in wedges]; return artists
+        def update(i):
+            a = i / (frames - 1)
+            wedges, _ = ax.pie(plot_df.values * a, labels=plot_df.index, startangle=90)
+            for w in wedges: w.set_alpha(a)
+            return wedges
+    elif ctype == "bar":
+        bars = ax.bar(plot_df.index, np.zeros_like(plot_df.values), color="#1f77b4")
+        ax.set_ylim(0, plot_df.max() * 1.1); ax.set_title(title); artists.extend(bars)
+        def init(): return artists
+        def update(i):
+            a = i / (frames - 1)
+            for b, h in zip(bars, plot_df.values): b.set_height(h * a)
+            return artists
+    elif ctype == "hist":
+        _, _, patches = ax.hist(plot_df, bins=20, color="#1f77b4", alpha=0)
+        ax.set_title(title); artists.extend(patches)
+        def init(): [p.set_alpha(0) for p in patches]; return artists
+        def update(i):
+            a = i / (frames - 1)
+            for p in patches: p.set_alpha(a)
+            return artists
+    elif ctype == "scatter":
+        pts = ax.scatter(plot_df.iloc[:, 0], plot_df.iloc[:, 1], s=10, alpha=0)
+        ax.set_title(title); ax.grid(alpha=.3); artists.append(pts)
+        def init(): pts.set_alpha(0); return artists
+        def update(i): pts.set_alpha(i / (frames - 1)); return artists
+    else:  # line
+        line, = ax.plot([], [], lw=2)
+        x_full = plot_df.iloc[:, 0]
+        y_full = plot_df.iloc[:, 1]
+        ax.set_xlim(x_full.min(), x_full.max()); ax.set_ylim(y_full.min(), y_full.max())
+        ax.set_title(title); ax.grid(alpha=.3); artists.append(line)
+        def init(): line.set_data([], []); return artists
+        def update(i):
+            k = max(2, int(len(x_full) * i / (frames - 1)))
+            line.set_data(x_full[:k], y_full.iloc[:k])
+            return artists
+    anim = FuncAnimation(fig, update, init_func=init, frames=frames, blit=True, interval=1000 / fps)
+    anim.save(str(out), writer=FFMpegWriter(fps=fps, metadata={'artist': 'Sozo'}), dpi=144)
+    plt.close(fig)
+    return str(out)
+def safe_chart(desc, df, dur, out):
     try:
+        return animate_chart(desc, df, dur, out)
     except Exception as e:
+        st.warning(f"Animated chart failed ('{desc}'): {e}. Using static fallback.")
+        with plt.ioff():
+            fig, ax = plt.subplots()
+            try:
+                # Attempt a simple plot
+                df.select_dtypes(include=np.number).plot(ax=ax)
+                ax.set_title(desc)
+            except:
+                # If that fails, just show a text error on the image
+                ax.text(0.5, 0.5, 'Could not render chart', ha='center', va='center')
+            p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
+            fig.savefig(p, bbox_inches="tight"); plt.close(fig)
+            img = cv2.resize(cv2.imread(str(p)), (WIDTH, HEIGHT))
+            return animate_image_fade(img, dur, out)
+def concat_media(paths: List[str], out: Path, kind="video"):
+    if not paths: return
+    lst_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
+    with lst_path.open("w", encoding="utf-8") as f:
+        for p in paths:
+            if Path(p).exists() and Path(p).stat().st_size > 0:
+                f.write(f"file '{Path(p).resolve().as_posix()}'\n")
+    if lst_path.stat().st_size == 0:
+        lst_path.unlink()
+        return
+    cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(lst_path), "-c", "copy", str(out)]
+    subprocess.run(cmd, check=True, capture_output=True)
+    lst_path.unlink(missing_ok=True)
+# ─── VIDEO GENERATION (YOUR ORIGINAL CODE) ─────────────────────────────────
+def build_story_prompt(ctx_dict):
+    cols = ", ".join(ctx_dict["columns"][:6])
+    return f"""
+    You are a professional business storyteller and data analyst. Create a compelling script for a {VIDEO_SCENES}-scene business video presentation.
+    **Complete Dataset Context:**
+    {json.dumps(ctx_dict, indent=2, default=str)}
+    **Task Requirements:**
+    1. **Identify the Data Story**: Determine what business domain this data represents and what story it tells
+    2. **Create {VIDEO_SCENES} distinct scenes** that build a logical narrative arc
+    3. **Each scene must contain:**
+       - 1-2 sentences of clear, professional narration (plain English, no jargon)
+       - Exactly one chart tag: `<generate_chart: "chart_type | specific description">`
+    **Chart Guidelines:**
+    - Valid types: bar, pie, line, scatter, hist
+    - Base all charts on actual columns: {cols}
+    **Narrative Structure:**
+    - Scene 1: Set the context and introduce the main story
+    - Middle scenes: Develop key insights and supporting evidence
+    - Final scene: Conclude with actionable takeaways or future outlook
+    **Output Format:**
+    Separate each scene with exactly [SCENE_BREAK]
+    """
+def generate_video(buf: bytes, name: str, ctx: str, key: str):
     try:
         subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
     except Exception:
+        st.error("🔴 FFmpeg not available — cannot render video."); return None
     df, err = load_dataframe_safely(buf, name)
     if err:
+        st.error(err); return None
+    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.2)
+    ctx_dict = {
+        "shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis",
+        "data_sample": df.head().to_dict('records'),
+        "numeric_summary": df.describe().to_dict() if not df.select_dtypes(include=np.number).empty else {}
+    }
+    script = llm.invoke(build_story_prompt(ctx_dict)).content
+    scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
+    video_parts, audio_parts, temps = [], [], []
+    for idx, sc in enumerate(scenes[:VIDEO_SCENES]):
+        st.progress((idx + 1) / VIDEO_SCENES, text=f"Rendering Scene {idx + 1}/{VIDEO_SCENES}")
+        descs = extract_chart_tags(sc)
+        narrative = clean_narration(sc)
+        audio_bytes, _ = deepgram_tts(narrative)
+        mp3 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
+        if audio_bytes:
+            mp3.write_bytes(audio_bytes)
+            dur = audio_duration(str(mp3))
+        else:
+            dur = 5.0
+            generate_silence_mp3(dur, mp3)
+        audio_parts.append(str(mp3)); temps.append(mp3)
+        mp4 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
+        if descs:
+            safe_chart(descs[0], df, dur, mp4)
         else:
+            img = generate_image_from_prompt(narrative)
+            img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
+            animate_image_fade(img_cv, dur, mp4)
+        video_parts.append(str(mp4)); temps.append(mp4)
+    silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
+    concat_media(video_parts, silent_vid, "video")
+    audio_mix = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
+    concat_media(audio_parts, audio_mix, "audio")
+    final_vid = Path(tempfile.gettempdir()) / f"{key}.mp4"
+    if silent_vid.exists() and silent_vid.stat().st_size > 0 and audio_mix.exists() and audio_mix.stat().st_size > 0:
+        subprocess.run(
+            ["ffmpeg", "-y", "-i", str(silent_vid), "-i", str(audio_mix),
+             "-c:v", "copy", "-c:a", "aac", "-shortest", str(final_vid)],
+            check=True, capture_output=True)
+    else:
+        st.error("Failed to generate video or audio components.")
+        return None
+    for p in temps + [silent_vid, audio_mix]:
+        p.unlink(missing_ok=True)
+    return str(final_vid)
+# ─── UI & WORKFLOW (RESTRUCTURED FOR RESPONSIVENESS) ───────────────────────
+mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
+upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
+# Reset state if a new file is uploaded
+if upl and sha1_bytes(upl.getvalue()) != st.session_state.current_file_key:
+    st.session_state.report_md = None
+    st.session_state.chart_descs = []
+    st.session_state.generated_charts = {}
+    st.session_state.pdf_bytes = None
+    st.session_state.bundle = None
+    st.session_state.current_file_key = sha1_bytes(upl.getvalue())
+    df, err = load_dataframe_safely(upl.getvalue(), upl.name)
+    if err:
+        st.error(f"Error loading data: {err}")
+        st.session_state.df = None # Ensure df is cleared on error
+    else:
+        st.session_state.df = df
+if st.session_state.get("df") is not None:
+    with st.expander("📊 Data Preview"):
+        st.dataframe(arrow_df(st.session_state.df.head()))
+    ctx = st.text_area("Business context or specific instructions (optional)")
+    if mode == "Report (PDF)":
+        if st.button("🚀 Generate Report", type="primary", disabled=(st.session_state.report_md is not None)):
+            with st.spinner("Analyzing data and drafting report..."):
+                md, descs = generate_report_text(st.session_state.df, ctx)
+                st.session_state.report_md = md
+                st.session_state.chart_descs = descs
+            st.rerun()
+    else:
+        if st.button("🎬 Generate Video", type="primary"):
+            st.warning("Video generation is a long process and will lock the UI.")
+            with st.spinner("Generating video... This may take several minutes."):
+                key = st.session_state.current_file_key
+                path = generate_video(upl.getvalue(), upl.name, ctx, key)
+                if path:
+                    st.session_state.bundle = {"type": "video", "video_path": path, "key": key}
+            st.rerun()
+# ─── OUTPUT DISPLAY ────────────────────────────────────────────────────────
+if st.session_state.get("report_md"):
+    st.subheader("📄 Generated Report")
+    preview_md = st.session_state.report_md
+    for desc, b64_data in st.session_state.generated_charts.items():
+        if b64_data:
+            img_tag = f'<img src="data:image/png;base64,{b64_data}" width="600">'
+            preview_md = TAG_RE.sub(lambda m: img_tag if m.group("d").strip() == desc else m.group(0), preview_md, count=1)
+    preview_md = TAG_RE.sub("[Chart will be generated here]", preview_md)
+    with st.expander("View Report", expanded=True):
+        st.markdown(preview_md, unsafe_allow_html=True)
+    pending_charts = [d for d in st.session_state.chart_descs if d not in st.session_state.generated_charts]
+    if pending_charts:
+        if st.button("📊 Generate Visualizations", use_container_width=True, type="primary"):
+            for desc in pending_charts:
+                with st.spinner(f"Generating chart: {desc}"):
+                    b64_image = generate_single_chart(desc, st.session_state.df)
+                    st.session_state.generated_charts[desc] = b64_image
+                st.rerun()
+    all_charts_processed = st.session_state.chart_descs and len(st.session_state.generated_charts) == len(st.session_state.chart_descs)
+    if all_charts_processed:
+        c1, c2 = st.columns(2)
+        with c1:
+            if st.session_state.pdf_bytes is None:
+                with st.spinner("Building PDF..."):
+                    st.session_state.pdf_bytes = build_pdf(st.session_state.report_md, st.session_state.generated_charts)
+            st.download_button("Download PDF", st.session_state.pdf_bytes, "business_report.pdf", "application/pdf", use_container_width=True)
+        with c2:
+            if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
+                txt = clean_narration(st.session_state.report_md)
+                audio, mime = deepgram_tts(txt)
+                st.audio(audio, format=mime) if audio else st.error("Narration failed.")
+elif bundle := st.session_state.get("bundle"):
+    if bundle["type"] == "video":
+        st.subheader("🎬 Generated Video Narrative")
+        vp = bundle["video_path"]
+        if Path(vp).exists():
+            with open(vp, "rb") as f:
+                st.video(f.read())
+            with open(vp, "rb") as f:
+                st.download_button("Download Video", f, f"sozo_narrative_{bundle['key'][:8]}.mp4", "video/mp4")
         else:
+            st.error("Video file missing – generation may have failed.")