Adk-Analyst2

Sleeping

App Files Files Community

rairo commited on Jul 6, 2025

Commit

e8d80ac

verified ·

1 Parent(s): f2b1c99

Update app.py

Browse files

Files changed (1) hide show

app.py +483 -299

app.py CHANGED Viewed

@@ -1,16 +1,18 @@
 ##############################################################################
-# Sozo Business Studio · 10-Jul-2025 (full drop-in)
 # • Restores PDF branch alongside fixed Video branch
 # • Shared chart-tag grammar across both paths
 # • Narrator text cleans scene labels + chart talk
 # • Matplotlib animation starts from blank; artists returned (blit=True)
 # • Gemini Flash-preview image gen with placeholder fallback
 # • Silent-audio fallback keeps mux lengths equal
 ##############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
 from pathlib import Path
 from typing import Tuple, Dict, List
 import streamlit as st
 import pandas as pd
@@ -19,6 +21,7 @@ import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 from matplotlib.animation import FuncAnimation, FFMpegWriter
 from fpdf import FPDF, HTMLMixin
 from markdown_it import MarkdownIt
 from PIL import Image
@@ -27,15 +30,14 @@ import cv2
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
 from google import genai
-from google.genai import types   # for GenerateContentConfig
 # ─── CONFIG ────────────────────────────────────────────────────────────────
 st.set_page_config(page_title="Sozo Business Studio", layout="wide")
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
-FPS, WIDTH, HEIGHT  = 24, 1280, 720
 MAX_CHARTS, VIDEO_SCENES = 5, 5
 API_KEY = os.getenv("GEMINI_API_KEY")
@@ -43,22 +45,17 @@ if not API_KEY:
     st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
 GEM = genai.Client(api_key=API_KEY)
-DG_KEY = os.getenv("DEEPGRAM_API_KEY")  # optional for narration
-# --- IMPROVED: State management for an interactive, non-freezing UI ---
-st.session_state.setdefault("bundle", None)
-st.session_state.setdefault("report_md", None)
-st.session_state.setdefault("chart_descs", [])
-st.session_state.setdefault("generated_charts", {}) # Dict[desc, base64_string]
-st.session_state.setdefault("pdf_bytes", None)
-st.session_state.setdefault("df", None)
-st.session_state.setdefault("current_file_key", None)
 sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
-# ─── HELPERS ───────────────────────────────────────────────────────────────
 def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
     """Load CSV/Excel, return (df, err)."""
     try:
@@ -90,8 +87,13 @@ def deepgram_tts(txt: str) -> Tuple[bytes, str]:
         r = requests.post(
             "https://api.deepgram.com/v1/speak",
             params={"model": "aura-2-andromeda-en"},
-            headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"},
-            json={"text": txt}, timeout=30)
         r.raise_for_status()
         return r.content, r.headers.get("Content-Type", "audio/mpeg")
     except Exception:
@@ -99,78 +101,96 @@ def deepgram_tts(txt: str) -> Tuple[bytes, str]:
 def generate_silence_mp3(duration: float, out: Path):
     subprocess.run(
-        ["ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono",
-         "-t", f"{duration:.3f}", "-q:a", "9", str(out)],
-        check=True, capture_output=True)
 def audio_duration(path: str) -> float:
     try:
         res = subprocess.run(
-            ["ffprobe", "-v", "error", "-show_entries", "format=duration",
-             "-of", "default=nw=1:nk=1", path],
-            text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
         return float(res.stdout.strip())
     except Exception:
         return 5.0
 TAG_RE = re.compile(
-    r'[<[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>"\'\]]+?)["\']?\s*[>\]]',
-    re.I)
-extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip()
-                                                  for m in TAG_RE.finditer(t or "")))
-# --- FIXED: Escaped the hyphen to treat it as a literal character ---
-re_scene = re.compile(r"^\s*scene\s*\d+[:\.- ]*", re.I)
 def clean_narration(txt: str) -> str:
     txt = re_scene.sub("", txt)
     txt = TAG_RE.sub("", txt)
     txt = re.sub(r"\s*\([^)]*\)", "", txt)
-    txt = re.sub(r"\s{2,}", " ", txt).strip()
-    return txt
 # ─── IMAGE GENERATION & PLACEHOLDER ────────────────────────────────────────
 def placeholder_img() -> Image.Image:
     return Image.new("RGB", (WIDTH, HEIGHT), (230, 230, 230))
-@st.cache_data(show_spinner="Generating image...")
 def generate_image_from_prompt(prompt: str) -> Image.Image:
     model_main = "gemini-2.0-flash-exp-image-generation"
     model_fallback = "gemini-2.0-flash-preview-image-generation"
     full_prompt = "A clean business-presentation illustration: " + prompt
     def fetch(model_name):
-        try:
-            res = GEM.models.generate_content(
-                model=model_name,
-                contents=full_prompt,
-                generation_config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
-            )
-            for part in res.candidates[0].content.parts:
-                if getattr(part, "inline_data", None):
-                    return Image.open(io.BytesIO(part.inline_data.data)).convert("RGB")
-            return None
-        except Exception:
-            return None
-    img = fetch(model_main) or fetch(model_fallback)
-    return img if img else placeholder_img()
-# ─── PDF & REPORT GENERATION (REFACTORED) ──────────────────────────────────
 class PDF(FPDF, HTMLMixin):
     pass
 def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
-    """Builds a PDF from markdown text and a dictionary of chart descriptions to base64 image strings."""
-    def replacer(match):
-        desc = match.group("d").strip()
-        if desc in charts and charts[desc]:
-            return f'<img src="data:image/png;base64,{charts[desc]}">'
-        return ""
-    html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(TAG_RE.sub(replacer, md))
     pdf = PDF()
     pdf.set_auto_page_break(True, margin=15)
     pdf.add_page()
@@ -179,251 +199,404 @@ def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
     pdf.ln(3)
     pdf.set_font("Arial", "", 11)
     pdf.write_html(html)
-    return bytes(pdf.output(dest="S"))
-def generate_report_text(df: pd.DataFrame, ctx: str) -> Tuple[str, List[str]]:
-    """Generates only the text part of the report. This is the fast, first step."""
-    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
     ctx_dict = {
-        "shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis",
-        "data_sample": df.head().to_dict('records'),
         "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
-        "missing_values": {col: int(count) for col, count in df.isnull().sum().to_dict().items() if count > 0},
-        "numeric_summary": df.describe().to_dict() if not df.select_dtypes(include=np.number).empty else {}
     }
-    cols = ", ".join(ctx_dict["columns"][:8])
     report_prompt = f"""
     You are a senior data analyst and business intelligence expert. Analyze the provided dataset and write a comprehensive executive-level Markdown report.
     **Dataset Analysis Context:**
-    {json.dumps(ctx_dict, indent=2, default=str)}
     **Instructions:**
-    1. **Identify Data Domain**: First, determine what type of data this represents.
     2. **Executive Summary**: Start with a high-level summary of key findings and business impact.
-    3. **Data Quality Assessment**: Comment on data completeness and reliability.
-    4. **Key Insights**: Provide 4-6 actionable insights specific to the identified domain.
-    5. **Strategic Recommendations**: Offer concrete, actionable recommendations.
-    6. **Visual Support**: When a visualization would enhance understanding, insert chart tags like:
-       `<generate_chart: "chart_type | specific description">`
        Valid chart types: bar, pie, line, scatter, hist
        Base every chart on actual columns: {cols}
-    7. **Format Requirements**: Use professional business language and clear headers (## Executive Summary, etc.).
     """
     md = llm.invoke(report_prompt).content
     chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
-    return md, chart_descs
-def generate_single_chart(description: str, df: pd.DataFrame) -> str:
-    """Generates one chart using the agent and returns it as a base64 string. More reliable."""
-    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
-    agent = create_pandas_dataframe_agent(
-        llm=llm, df=df, verbose=False, allow_dangerous_code=True,
-        agent_type="openai-functions", handle_parsing_errors=True
-    )
-    chart_prompt = f"""
-    Your task is to generate Python code to create a single, static, professional chart using matplotlib based on the provided dataframe `df`.
-    The user's request is: '{description}'.
-    Follow these rules strictly:
-    1.  The dataframe is already loaded and available as a variable named `df`.
-    2.  Generate only the Python code to produce the plot. Do not add any explanation or surrounding text.
-    3.  Use `plt.figure()` to create a new figure for the plot.
-    4.  Add a clear title and labels to the axes.
-    5.  DO NOT use `st.pyplot()` or `plt.show()`. The code will be executed to save the figure.
-    6.  Ensure the final code block is pure Python.
-    """
-    for _ in range(2):  # Retry once on failure
         try:
-            response = agent.invoke({"input": chart_prompt})
-            code_to_execute = response['output'].strip().replace("```python", "").replace("```", "")
-            fig, ax = plt.subplots(figsize=(10, 6), dpi=150)
-            exec_globals = {'df': df, 'pd': pd, 'np': np, 'plt': plt, 'fig': fig, 'ax': ax}
-            exec(code_to_execute, exec_globals)
-            if fig.axes and any(ax.get_children() for ax in fig.axes):
-                buf = io.BytesIO()
-                fig.savefig(buf, format="png", dpi=150, bbox_inches="tight", facecolor="white")
-                plt.close(fig)
-                return base64.b64encode(buf.getvalue()).decode()
-            plt.close(fig)
-        except Exception as e:
-            st.warning(f"Chart generation attempt failed: {e}")
-            plt.close("all")
-    return None
-# ─── ANIMATION HELPERS (YOUR ORIGINAL CODE) ────────────────────────────────
-def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path, fps: int = FPS) -> str:
     frames = max(int(dur * fps), fps)
-    vid = cv2.VideoWriter(str(out), cv2.VideoWriter_fourcc(*"mp4v"), fps, (WIDTH, HEIGHT))
     blank = np.full_like(img_cv2, 255)
     for i in range(frames):
-        a = i / (frames - 1) if frames > 1 else 1.0
         vid.write(cv2.addWeighted(blank, 1 - a, img_cv2, a, 0))
     vid.release()
     return str(out)
-def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path, fps: int = FPS) -> str:
-    """Render an animated chart whose clip length equals the audio length `dur`."""
     ctype, *rest = [s.strip().lower() for s in desc.split("|", 1)]
     ctype = ctype or "bar"
     title = rest[0] if rest else desc
     if ctype == "pie":
-        cat_cols = df.select_dtypes(exclude="number").columns
-        num_cols = df.select_dtypes(include="number").columns
-        if not cat_cols.any() or not num_cols.any(): raise ValueError("Pie chart requires one categorical and one numeric column.")
-        cat, num = cat_cols[0], num_cols[0]
         plot_df = df.groupby(cat)[num].sum().sort_values(ascending=False).head(8)
     elif ctype in ("bar", "hist"):
-        num_cols = df.select_dtypes(include="number").columns
-        if not num_cols.any(): raise ValueError(f"{ctype} chart requires a numeric column.")
-        num = num_cols[0]
         plot_df = df[num]
-    else:  # line / scatter
-        num_cols = df.select_dtypes(include="number").columns
-        if len(num_cols) < 2: raise ValueError("Line/scatter chart requires at least two numeric columns.")
-        plot_df = df[list(num_cols[:2])].sort_index()
     frames = max(10, int(dur * fps))
     fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
-    artists = []
     if ctype == "pie":
-        wedges, _ = ax.pie(np.zeros_like(plot_df.values), labels=plot_df.index, startangle=90)
-        ax.set_title(title); artists.extend(wedges)
-        def init(): [w.set_alpha(0) for w in wedges]; return artists
         def update(i):
             a = i / (frames - 1)
-            wedges, _ = ax.pie(plot_df.values * a, labels=plot_df.index, startangle=90)
             for w in wedges: w.set_alpha(a)
             return wedges
     elif ctype == "bar":
         bars = ax.bar(plot_df.index, np.zeros_like(plot_df.values), color="#1f77b4")
-        ax.set_ylim(0, plot_df.max() * 1.1); ax.set_title(title); artists.extend(bars)
-        def init(): return artists
         def update(i):
             a = i / (frames - 1)
-            for b, h in zip(bars, plot_df.values): b.set_height(h * a)
-            return artists
     elif ctype == "hist":
         _, _, patches = ax.hist(plot_df, bins=20, color="#1f77b4", alpha=0)
-        ax.set_title(title); artists.extend(patches)
-        def init(): [p.set_alpha(0) for p in patches]; return artists
         def update(i):
             a = i / (frames - 1)
             for p in patches: p.set_alpha(a)
-            return artists
     elif ctype == "scatter":
-        pts = ax.scatter(plot_df.iloc[:, 0], plot_df.iloc[:, 1], s=10, alpha=0)
-        ax.set_title(title); ax.grid(alpha=.3); artists.append(pts)
-        def init(): pts.set_alpha(0); return artists
-        def update(i): pts.set_alpha(i / (frames - 1)); return artists
     else:  # line
         line, = ax.plot([], [], lw=2)
-        x_full = plot_df.iloc[:, 0]
-        y_full = plot_df.iloc[:, 1]
-        ax.set_xlim(x_full.min(), x_full.max()); ax.set_ylim(y_full.min(), y_full.max())
-        ax.set_title(title); ax.grid(alpha=.3); artists.append(line)
-        def init(): line.set_data([], []); return artists
         def update(i):
             k = max(2, int(len(x_full) * i / (frames - 1)))
             line.set_data(x_full[:k], y_full.iloc[:k])
-            return artists
-    anim = FuncAnimation(fig, update, init_func=init, frames=frames, blit=True, interval=1000 / fps)
-    anim.save(str(out), writer=FFMpegWriter(fps=fps, metadata={'artist': 'Sozo'}), dpi=144)
     plt.close(fig)
     return str(out)
 def safe_chart(desc, df, dur, out):
     try:
         return animate_chart(desc, df, dur, out)
-    except Exception as e:
-        st.warning(f"Animated chart failed ('{desc}'): {e}. Using static fallback.")
         with plt.ioff():
-            fig, ax = plt.subplots()
-            try:
-                df.select_dtypes(include=np.number).plot(ax=ax)
-                ax.set_title(desc)
-            except Exception:
-                ax.text(0.5, 0.5, 'Could not render chart', ha='center', va='center')
             p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
-            fig.savefig(p, bbox_inches="tight"); plt.close(fig)
-            img_path = str(p)
-            img = cv2.imread(img_path)
-            if img is None: # Handle case where image read fails
-                img = np.full((HEIGHT, WIDTH, 3), 230, dtype=np.uint8) # Fallback gray image
-            img_resized = cv2.resize(img, (WIDTH, HEIGHT))
-            return animate_image_fade(img_resized, dur, out)
 def concat_media(paths: List[str], out: Path, kind="video"):
-    if not paths: return
-    lst_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
-    with lst_path.open("w", encoding="utf-8") as f:
-        for p in paths:
-            if Path(p).exists() and Path(p).stat().st_size > 0:
-                f.write(f"file '{Path(p).resolve().as_posix()}'\n")
-    if not lst_path.is_file() or lst_path.stat().st_size == 0:
-        if lst_path.is_file(): lst_path.unlink()
         return
-    cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(lst_path), "-c", "copy", str(out)]
-    subprocess.run(cmd, check=True, capture_output=True)
-    lst_path.unlink(missing_ok=True)
-# ─── VIDEO GENERATION (YOUR ORIGINAL CODE) ─────────────────────────────────
 def build_story_prompt(ctx_dict):
     cols = ", ".join(ctx_dict["columns"][:6])
     return f"""
     You are a professional business storyteller and data analyst. Create a compelling script for a {VIDEO_SCENES}-scene business video presentation.
     **Complete Dataset Context:**
-    {json.dumps(ctx_dict, indent=2, default=str)}
     **Task Requirements:**
     1. **Identify the Data Story**: Determine what business domain this data represents and what story it tells
     2. **Create {VIDEO_SCENES} distinct scenes** that build a logical narrative arc
     3. **Each scene must contain:**
        - 1-2 sentences of clear, professional narration (plain English, no jargon)
        - Exactly one chart tag: `<generate_chart: "chart_type | specific description">`
     **Chart Guidelines:**
-    - Valid types: bar, pie, line, scatter, hist
-    - Base all charts on actual columns: {cols}
     **Narrative Structure:**
-    - Scene 1: Set the context and introduce the main story
-    - Middle scenes: Develop key insights and supporting evidence
-    - Final scene: Conclude with actionable takeaways or future outlook
-    **Output Format:**
-    Separate each scene with exactly [SCENE_BREAK]
     """
 def generate_video(buf: bytes, name: str, ctx: str, key: str):
     try:
         subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
     except Exception:
-        st.error("🔴 FFmpeg not available — cannot render video."); return None
     df, err = load_dataframe_safely(buf, name)
     if err:
-        st.error(err); return None
-    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.2)
     ctx_dict = {
-        "shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis",
-        "data_sample": df.head().to_dict('records'),
-        "numeric_summary": df.describe().to_dict() if not df.select_dtypes(include=np.number).empty else {}
     }
     script = llm.invoke(build_story_prompt(ctx_dict)).content
     scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
     video_parts, audio_parts, temps = [], [], []
     for idx, sc in enumerate(scenes[:VIDEO_SCENES]):
-        st.progress((idx + 1) / VIDEO_SCENES, text=f"Rendering Scene {idx + 1}/{VIDEO_SCENES}")
         descs = extract_chart_tags(sc)
         narrative = clean_narration(sc)
         audio_bytes, _ = deepgram_tts(narrative)
         mp3 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
         if audio_bytes:
@@ -432,127 +605,138 @@ def generate_video(buf: bytes, name: str, ctx: str, key: str):
         else:
             dur = 5.0
             generate_silence_mp3(dur, mp3)
-        audio_parts.append(str(mp3)); temps.append(mp3)
         mp4 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
         if descs:
             safe_chart(descs[0], df, dur, mp4)
         else:
             img = generate_image_from_prompt(narrative)
-            img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
             animate_image_fade(img_cv, dur, mp4)
-        video_parts.append(str(mp4)); temps.append(mp4)
     silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
     concat_media(video_parts, silent_vid, "video")
     audio_mix = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
     concat_media(audio_parts, audio_mix, "audio")
     final_vid = Path(tempfile.gettempdir()) / f"{key}.mp4"
-    if silent_vid.exists() and silent_vid.stat().st_size > 0 and audio_mix.exists() and audio_mix.stat().st_size > 0:
-        subprocess.run(
-            ["ffmpeg", "-y", "-i", str(silent_vid), "-i", str(audio_mix),
-             "-c:v", "copy", "-c:a", "aac", "-shortest", str(final_vid)],
-            check=True, capture_output=True)
-    else:
-        st.error("Failed to generate video or audio components.")
-        return None
     for p in temps + [silent_vid, audio_mix]:
         p.unlink(missing_ok=True)
     return str(final_vid)
-# ─── UI & WORKFLOW (RESTRUCTURED FOR RESPONSIVENESS) ───────────────────────
-mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
 upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
 if upl:
-    file_key = sha1_bytes(upl.getvalue())
-    if file_key != st.session_state.current_file_key:
-        st.session_state.report_md = None
-        st.session_state.chart_descs = []
-        st.session_state.generated_charts = {}
-        st.session_state.pdf_bytes = None
-        st.session_state.bundle = None
-        st.session_state.current_file_key = file_key
-        df, err = load_dataframe_safely(upl.getvalue(), upl.name)
-        if err:
-            st.error(f"Error loading data: {err}")
-            st.session_state.df = None
-        else:
-            st.session_state.df = df
-            st.rerun()
-if st.session_state.get("df") is not None:
-    with st.expander("📊 Data Preview", expanded=True):
-        st.dataframe(arrow_df(st.session_state.df.head()))
-    ctx = st.text_area("Business context or specific instructions (optional)")
     if mode == "Report (PDF)":
-        if st.button("🚀 Generate Report", type="primary", disabled=(st.session_state.report_md is not None)):
-            with st.spinner("Analyzing data and drafting report..."):
-                md, descs = generate_report_text(st.session_state.df, ctx)
-                st.session_state.report_md = md
-                st.session_state.chart_descs = descs
-            st.rerun()
-    else: # Video Mode
-        if st.button("🎬 Generate Video", type="primary"):
-            st.warning("Video generation is a long process and will lock the UI.")
-            with st.spinner("Generating video... This may take several minutes."):
-                key = st.session_state.current_file_key
-                path = generate_video(upl.getvalue(), upl.name, ctx, key)
-                if path:
-                    st.session_state.bundle = {"type": "video", "video_path": path, "key": key}
-            st.rerun()
-# ─── OUTPUT DISPLAY ────────────────────────────────────────────────────────
-if st.session_state.get("report_md"):
     st.subheader("📄 Generated Report")
-    preview_md = st.session_state.report_md
-    for desc, b64_data in st.session_state.generated_charts.items():
-        if b64_data:
-            img_tag = f'<img src="data:image/png;base64,{b64_data}" width="600">'
-            preview_md = TAG_RE.sub(lambda m: img_tag if m.group("d").strip() == desc else m.group(0), preview_md, count=1)
-    preview_md = TAG_RE.sub("[Chart will be generated here]", preview_md)
     with st.expander("View Report", expanded=True):
-        st.markdown(preview_md, unsafe_allow_html=True)
-    pending_charts = [d for d in st.session_state.chart_descs if d not in st.session_state.generated_charts]
-    if pending_charts:
-        if st.button("📊 Generate Visualizations", use_container_width=True, type="primary"):
-            for desc in pending_charts:
-                with st.spinner(f"Generating chart: {desc}"):
-                    b64_image = generate_single_chart(desc, st.session_state.df)
-                    st.session_state.generated_charts[desc] = b64_image
-                st.rerun()
-    all_charts_processed = st.session_state.chart_descs and len(st.session_state.generated_charts) == len(st.session_state.chart_descs)
-    if all_charts_processed:
         c1, c2 = st.columns(2)
         with c1:
-            if st.session_state.pdf_bytes is None:
-                with st.spinner("Building PDF..."):
-                    st.session_state.pdf_bytes = build_pdf(st.session_state.report_md, st.session_state.generated_charts)
-            st.download_button("Download PDF", st.session_state.pdf_bytes, "business_report.pdf", "application/pdf", use_container_width=True)
         with c2:
-            if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
-                txt = clean_narration(st.session_state.report_md)
                 audio, mime = deepgram_tts(txt)
-                st.audio(audio, format=mime) if audio else st.error("Narration failed.")
-elif bundle := st.session_state.get("bundle"):
-    if bundle["type"] == "video":
-        st.subheader("🎬 Generated Video Narrative")
-        vp = bundle["video_path"]
-        if Path(vp).exists():
-            with open(vp, "rb") as f:
-                st.video(f.read())
-            with open(vp, "rb") as f:
-                st.download_button("Download Video", f, f"sozo_narrative_{bundle['key'][:8]}.mp4", "video/mp4")
-        else:
-            st.error("Video file missing – generation may have failed.")

 ##############################################################################
+# Sozo Business Studio · 10-Jul-2025
 # • Restores PDF branch alongside fixed Video branch
 # • Shared chart-tag grammar across both paths
 # • Narrator text cleans scene labels + chart talk
 # • Matplotlib animation starts from blank; artists returned (blit=True)
 # • Gemini Flash-preview image gen with placeholder fallback
 # • Silent-audio fallback keeps mux lengths equal
+# • NEW (2025-07-06): Lazy-loading of PDF charts  +  st.rerun()
 ##############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
 from pathlib import Path
 from typing import Tuple, Dict, List
+from concurrent.futures import ThreadPoolExecutor
 import streamlit as st
 import pandas as pd
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 from matplotlib.animation import FuncAnimation, FFMpegWriter
 from fpdf import FPDF, HTMLMixin
 from markdown_it import MarkdownIt
 from PIL import Image
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
 from google import genai
+from google.genai import types           # GenerateContentConfig
 # ─── CONFIG ────────────────────────────────────────────────────────────────
 st.set_page_config(page_title="Sozo Business Studio", layout="wide")
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
+FPS, WIDTH, HEIGHT   = 24, 1280, 720
 MAX_CHARTS, VIDEO_SCENES = 5, 5
 API_KEY = os.getenv("GEMINI_API_KEY")
     st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
 GEM = genai.Client(api_key=API_KEY)
+DG_KEY = os.getenv("DEEPGRAM_API_KEY")   # optional narration
 sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
+# ─── LAZY-LOADING SCAFFOLDING ──────────────────────────────────────────────
+EXEC = ThreadPoolExecutor(max_workers=4)        # parallel chart threads
+if "lazy_reports" not in st.session_state:      # key → report dict
+    st.session_state.lazy_reports = {}
+st.session_state.setdefault("bundle", None)     # video branch
+# ─── HELPERS ───────────────────────────────────────────────────────────────
 def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
     """Load CSV/Excel, return (df, err)."""
     try:
         r = requests.post(
             "https://api.deepgram.com/v1/speak",
             params={"model": "aura-2-andromeda-en"},
+            headers={
+                "Authorization": f"Token {DG_KEY}",
+                "Content-Type": "application/json",
+            },
+            json={"text": txt},
+            timeout=30,
+        )
         r.raise_for_status()
         return r.content, r.headers.get("Content-Type", "audio/mpeg")
     except Exception:
 def generate_silence_mp3(duration: float, out: Path):
     subprocess.run(
+        [
+            "ffmpeg",
+            "-y",
+            "-f",
+            "lavfi",
+            "-i",
+            "anullsrc=r=44100:cl=mono",
+            "-t",
+            f"{duration:.3f}",
+            "-q:a",
+            "9",
+            str(out),
+        ],
+        check=True,
+        capture_output=True,
+    )
 def audio_duration(path: str) -> float:
     try:
         res = subprocess.run(
+            [
+                "ffprobe",
+                "-v",
+                "error",
+                "-show_entries",
+                "format=duration",
+                "-of",
+                "default=nw=1:nk=1",
+                path,
+            ],
+            text=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            check=True,
+        )
         return float(res.stdout.strip())
     except Exception:
         return 5.0
 TAG_RE = re.compile(
+    r'[<[]\s*generate_?chart\s*[:=]?\s*[\"\'“”]?(?P<d>[^>\"\'”\]]+?)[\"\'“”]?\s*[>\]]',
+    re.I,
+)
+extract_chart_tags = lambda t: list(
+    dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or ""))
+)
+re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I)
 def clean_narration(txt: str) -> str:
     txt = re_scene.sub("", txt)
     txt = TAG_RE.sub("", txt)
     txt = re.sub(r"\s*\([^)]*\)", "", txt)
+    return re.sub(r"\s{2,}", " ", txt).strip()
 # ─── IMAGE GENERATION & PLACEHOLDER ────────────────────────────────────────
 def placeholder_img() -> Image.Image:
     return Image.new("RGB", (WIDTH, HEIGHT), (230, 230, 230))
 def generate_image_from_prompt(prompt: str) -> Image.Image:
     model_main = "gemini-2.0-flash-exp-image-generation"
     model_fallback = "gemini-2.0-flash-preview-image-generation"
     full_prompt = "A clean business-presentation illustration: " + prompt
     def fetch(model_name):
+        res = GEM.models.generate_content(
+            model=model_name,
+            contents=full_prompt,
+            config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
+        )
+        for part in res.candidates[0].content.parts:
+            if getattr(part, "inline_data", None):
+                return Image.open(io.BytesIO(part.inline_data.data)).convert("RGB")
+        return None
+    try:
+        img = fetch(model_main) or fetch(model_fallback)
+        return img if img else placeholder_img()
+    except Exception:
+        return placeholder_img()
+# ─── PDF GENERATION ────────────────────────────────────────────────────────
 class PDF(FPDF, HTMLMixin):
     pass
 def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
+    html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(
+        TAG_RE.sub(
+            lambda m: f'<img src="{charts.get(m.group("d").strip(), "")}">', md
+        )
+    )
     pdf = PDF()
     pdf.set_auto_page_break(True, margin=15)
     pdf.add_page()
     pdf.ln(3)
     pdf.set_font("Arial", "", 11)
     pdf.write_html(html)
+    return pdf.output(dest="S").encode("latin-1")
+# ─── QUICK STATIC CHART (fallback if LLM code fails) ───────────────────────
+def quick_chart(desc: str, df: pd.DataFrame, out: Path):
+    ctype, *rest = [s.strip().lower() for s in desc.split("|", 1)]
+    ctype = ctype or "bar"
+    title = rest[0] if rest else desc
+    num_cols = df.select_dtypes("number").columns
+    cat_cols = df.select_dtypes(exclude="number").columns
+    with plt.ioff():
+        fig, ax = plt.subplots(figsize=(6, 3.4), dpi=150)
+        if ctype == "pie" and len(cat_cols) >= 1 and len(num_cols) >= 1:
+            plot = df.groupby(cat_cols[0])[num_cols[0]].sum().head(8)
+            ax.pie(plot, labels=plot.index, autopct="%1.1f%%", startangle=90)
+        elif ctype == "line" and len(num_cols) >= 1:
+            df[num_cols[0]].plot(kind="line", ax=ax)
+        elif ctype == "scatter" and len(num_cols) >= 2:
+            ax.scatter(df[num_cols[0]], df[num_cols[1]], s=10, alpha=0.7)
+        elif ctype == "hist" and len(num_cols) >= 1:
+            ax.hist(df[num_cols[0]], bins=20, alpha=0.7)
+        else:  # bar fallback
+            plot = df[num_cols[0]].value_counts().head(10)
+            plot.plot(kind="bar", ax=ax)
+        ax.set_title(title)
+        fig.tight_layout()
+        fig.savefig(out, bbox_inches="tight", facecolor="white")
+        plt.close(fig)
+# ─── REPORT (STEP 1)  — prepare markdown instantly ────────────────────────
+def prepare_report(buf: bytes, name: str, ctx: str):
+    df, err = load_dataframe_safely(buf, name)
+    if err:
+        st.error(err)
+        return None, None, None
+    llm = ChatGoogleGenerativeAI(
+        model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1
+    )
+    # ─── original enhanced context & prompt (UNTOUCHED) ───────────────────
     ctx_dict = {
+        "shape": df.shape,
+        "columns": list(df.columns),
+        "user_ctx": ctx or "General business analysis",
+        "full_dataframe": df.to_dict("records"),
         "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
+        "missing_values": {
+            col: int(count) for col, count in df.isnull().sum().to_dict().items()
+        },
+        "numeric_summary": {
+            col: {stat: float(val) for stat, val in stats.items()}
+            for col, stats in df.describe().to_dict().items()
+        }
+        if len(df.select_dtypes(include=["number"]).columns) > 0
+        else {},
     }
+    cols = ", ".join(ctx_dict["columns"][:6])
     report_prompt = f"""
     You are a senior data analyst and business intelligence expert. Analyze the provided dataset and write a comprehensive executive-level Markdown report.
     **Dataset Analysis Context:**
+    {json.dumps(ctx_dict, indent=2)}
     **Instructions:**
+    1. **Identify Data Domain**: First, determine what type of data this represents (e.g., sales/revenue, healthcare/medical, HR/employee, financial, operational, customer, research, etc.) based on column names and sample data.
     2. **Executive Summary**: Start with a high-level summary of key findings and business impact.
+    3. **Data Quality Assessment**: Comment on data completeness, any notable missing values, and data reliability.
+    4. **Key Insights**: Provide 4-6 actionable insights specific to the identified domain:
+       - Trends and patterns
+       - Outliers or anomalies
+       - Performance indicators
+       - Risk factors or opportunities
+    5. **Strategic Recommendations**: Offer concrete, actionable recommendations based on the data.
+    6. **Visual Support**: When a visualization would enhance understanding, insert chart tags like: `<generate_chart: "chart_type | specific description">`
        Valid chart types: bar, pie, line, scatter, hist
        Base every chart on actual columns: {cols}
+       Choose chart types strategically:
+       - bar: for categorical comparisons
+       - pie: for proportional breakdowns (when categories < 7)
+       - line: for time series or trends
+       - scatter: for correlation analysis
+       - hist: for distribution analysis
+    7. **Format Requirements**:
+       - Use professional business language
+       - Include relevant metrics and percentages
+       - Structure with clear headers (## Executive Summary, ## Key Insights, etc.)
+       - End with ## Next Steps section
+    **Domain-Specific Focus Areas:**
+       - If sales data: focus on revenue trends, customer segments, product performance
+       - If HR data: focus on workforce analytics, retention, performance metrics
+       - If financial data: focus on profitability, cost analysis, financial health
+       - If operational data: focus on efficiency, bottlenecks, process optimization
+       - If customer data: focus on behavior patterns, satisfaction, churn analysis
+    Generate insights that would be valuable to C-level executives and department heads.
     """
+    # ─── end original prompt ───────────────────────────────────────────────
     md = llm.invoke(report_prompt).content
     chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
+    return df, md, chart_descs
+# ─── REPORT (STEP 2)  — background worker per chart ───────────────────────
+def render_chart_worker(rep_key: str, desc: str):
+    """Generate one chart (LLM + fallback)."""
+    rep = st.session_state.lazy_reports[rep_key]
+    df = rep["df"]
+    img_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
+    try:
+        agent = create_pandas_dataframe_agent(
+            llm=ChatGoogleGenerativeAI(
+                model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1
+            ),
+            df=df,
+            verbose=False,
+            allow_dangerous_code=True,
+        )
+        chart_prompt = f"""
+        Create a professional {desc} chart using matplotlib with these requirements:
+        1. Use a clean, business-appropriate style
+        2. Include proper title, axis labels, and legends
+        3. Apply appropriate color schemes (avoid rainbow colors)
+        4. Ensure text is readable (font size 10+)
+        5. Format numbers appropriately (e.g., currency, percentages)
+        6. Save the figure with high quality
+        7. Handle any missing or null values appropriately
+        """
+        agent.run(chart_prompt)
+        if not img_path.exists():
+            raise RuntimeError("LLM did not save figure")
+    except Exception:
         try:
+            quick_chart(desc, df, img_path)
+        except Exception:
+            img_path = None
+    rep["charts"][desc] = str(img_path) if img_path and img_path.exists() else ""
+    rep["pending"].discard(desc)
+    if not rep["pending"]:
+        rep["pdf"] = build_pdf(rep["md"], rep["charts"])
+        rep["finished"] = True
+    st.rerun()
+# ─── Helper: inline image or grey placeholder ─────────────────────────────
+def _inline_image_or_placeholder(rep, desc):
+    p = rep["charts"].get(desc)
+    if p and Path(p).exists():
+        b64 = base64.b64encode(Path(p).read_bytes()).decode()
+        return f'<img src="data:image/png;base64,{b64}">'
+    return '<img height="250" width="400" style="background:#ddd;">'
+# ─── ANIMATION HELPERS (unchanged) ────────────────────────────────────────
+def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path,
+                       fps: int = FPS) -> str:
     frames = max(int(dur * fps), fps)
+    vid = cv2.VideoWriter(str(out), cv2.VideoWriter_fourcc(*"mp4v"),
+                          fps, (WIDTH, HEIGHT))
     blank = np.full_like(img_cv2, 255)
     for i in range(frames):
+        a = i / frames
         vid.write(cv2.addWeighted(blank, 1 - a, img_cv2, a, 0))
     vid.release()
     return str(out)
+def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path,
+                  fps: int = FPS) -> str:
+    """Render an animated chart whose clip length equals `dur`."""
     ctype, *rest = [s.strip().lower() for s in desc.split("|", 1)]
     ctype = ctype or "bar"
     title = rest[0] if rest else desc
+    # prepare data
     if ctype == "pie":
+        cat = df.select_dtypes(exclude="number").columns[0]
+        num = df.select_dtypes(include="number").columns[0]
         plot_df = df.groupby(cat)[num].sum().sort_values(ascending=False).head(8)
     elif ctype in ("bar", "hist"):
+        num = df.select_dtypes(include="number").columns[0]
         plot_df = df[num]
+    else:
+        cols = df.select_dtypes(include="number").columns[:2]
+        plot_df = df[list(cols)].sort_index()
     frames = max(10, int(dur * fps))
     fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
+    # branches
     if ctype == "pie":
+        wedges, _ = ax.pie(plot_df, labels=plot_df.index, startangle=90)
+        ax.set_title(title)
+        def init(): [w.set_alpha(0) for w in wedges]; return wedges
         def update(i):
             a = i / (frames - 1)
             for w in wedges: w.set_alpha(a)
             return wedges
     elif ctype == "bar":
         bars = ax.bar(plot_df.index, np.zeros_like(plot_df.values), color="#1f77b4")
+        ax.set_ylim(0, plot_df.max() * 1.1); ax.set_title(title)
+        def init(): return bars
         def update(i):
             a = i / (frames - 1)
+            for b, h in zip(bars, plot_df.values):
+                b.set_height(h * a)
+            return bars
     elif ctype == "hist":
         _, _, patches = ax.hist(plot_df, bins=20, color="#1f77b4", alpha=0)
+        ax.set_title(title)
+        def init(): [p.set_alpha(0) for p in patches]; return patches
         def update(i):
             a = i / (frames - 1)
             for p in patches: p.set_alpha(a)
+            return patches
     elif ctype == "scatter":
+        pts = ax.scatter(plot_df.iloc[:, 0], plot_df.iloc[:, 1],
+                         s=10, alpha=0)
+        ax.set_title(title); ax.grid(alpha=.3)
+        def init(): pts.set_alpha(0); return [pts]
+        def update(i):
+            pts.set_alpha(i / (frames - 1)); return [pts]
     else:  # line
         line, = ax.plot([], [], lw=2)
+        x_full = (plot_df.iloc[:, 0] if plot_df.shape[1] > 1
+                  else np.arange(len(plot_df)))
+        y_full = (plot_df.iloc[:, 1] if plot_df.shape[1] > 1
+                  else plot_df.iloc[:, 0])
+        ax.set_xlim(x_full.min(), x_full.max())
+        ax.set_ylim(y_full.min(), y_full.max())
+        ax.set_title(title); ax.grid(alpha=.3)
+        def init(): line.set_data([], []); return [line]
         def update(i):
             k = max(2, int(len(x_full) * i / (frames - 1)))
             line.set_data(x_full[:k], y_full.iloc[:k])
+            return [line]
+    anim = FuncAnimation(fig, update, init_func=init,
+                         frames=frames, blit=True,
+                         interval=1000 / fps)
+    anim.save(str(out),
+              writer=FFMpegWriter(fps=fps, metadata={'artist':'Sozo'}),
+              dpi=144)
     plt.close(fig)
     return str(out)
 def safe_chart(desc, df, dur, out):
     try:
         return animate_chart(desc, df, dur, out)
+    except Exception:
         with plt.ioff():
+            df.plot(ax=plt.gca())
             p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
+            plt.savefig(p, bbox_inches="tight"); plt.close()
+        img = cv2.resize(cv2.imread(str(p)), (WIDTH, HEIGHT))
+        return animate_image_fade(img, dur, out)
 def concat_media(paths: List[str], out: Path, kind="video"):
+    if not paths:
         return
+    lst = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
+    with lst.open("w") as f:
+        for p in paths:
+            if Path(p).exists():
+                f.write(f"file '{Path(p).resolve()}'\n")
+    subprocess.run(
+        [
+            "ffmpeg",
+            "-y",
+            "-f",
+            "concat",
+            "-safe",
+            "0",
+            "-i",
+            str(lst),
+            "-c:v" if kind == "video" else "-c:a",
+            "copy",
+            str(out),
+        ],
+        check=True,
+        capture_output=True,
+    )
+    lst.unlink(missing_ok=True)
+# ─── VIDEO GENERATION (original prompt & logic) ───────────────���────────────
 def build_story_prompt(ctx_dict):
     cols = ", ".join(ctx_dict["columns"][:6])
     return f"""
     You are a professional business storyteller and data analyst. Create a compelling script for a {VIDEO_SCENES}-scene business video presentation.
     **Complete Dataset Context:**
+    {json.dumps(ctx_dict, indent=2)}
     **Task Requirements:**
     1. **Identify the Data Story**: Determine what business domain this data represents and what story it tells
     2. **Create {VIDEO_SCENES} distinct scenes** that build a logical narrative arc
     3. **Each scene must contain:**
        - 1-2 sentences of clear, professional narration (plain English, no jargon)
        - Exactly one chart tag: `<generate_chart: "chart_type | specific description">`
     **Chart Guidelines:**
+      - Valid types: bar, pie, line, scatter, hist
+      - Base all charts on actual columns: {cols}
+      - Choose chart types that best tell the story:
+        * bar: categorical comparisons, rankings
+        * pie: proportional breakdowns (≤6 categories)
+        * line: trends over time, progression
+        * scatter: relationships, correlations
+        * hist: distributions, frequency analysis
     **Narrative Structure:**
+      - Scene 1: Set the context and introduce the main story
+      - Middle scenes: Develop key insights and supporting evidence
+      - Final scene: Conclude with actionable takeaways or future outlook
+    **Content Standards:**
+      - Use conversational, executive-level language
+      - Include specific data insights (trends, percentages, comparisons)
+      - Avoid chart descriptions in narration ("as shown in the chart")
+      - Make each scene self-contained but connected to the overall story
+      - Focus on business impact and actionable insights
+    **Domain-Specific Approaches:**
+      - Sales data: Customer journey, revenue trends, market performance
+      - HR data: Workforce insights, talent analytics, organizational health
+      - Financial data: Performance indicators, cost analysis, profitability
+      - Operational data: Process efficiency, bottlenecks, optimization opportunities
+      - Customer data: Behavior patterns, satisfaction trends, retention analysis
+    **Output Format:** Separate each scene with exactly [SCENE_BREAK]
+    **Example Structure:**
+      Our company's data reveals fascinating insights about market performance over the past year. Let's explore what the numbers tell us about our growth trajectory.
+      <generate_chart: "line | monthly revenue growth over 12 months">
+      [SCENE_BREAK]
+      Customer acquisition has shown remarkable patterns, with certain segments driving significantly more value than others. The data shows a clear preference emerging in our target markets.
+      <generate_chart: "bar | customer acquisition by segment">
+    Create a compelling, data-driven story that executives would find engaging and actionable.
     """
 def generate_video(buf: bytes, name: str, ctx: str, key: str):
     try:
         subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
     except Exception:
+        st.error("🔴 FFmpeg not available — cannot render video.")
+        return None
     df, err = load_dataframe_safely(buf, name)
     if err:
+        st.error(err)
+        return None
+    llm = ChatGoogleGenerativeAI(
+        model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.2
+    )
     ctx_dict = {
+        "shape": df.shape,
+        "columns": list(df.columns),
+        "user_ctx": ctx or "General business analysis",
+        "full_dataframe": df.to_dict("records"),
+        "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
+        "numeric_summary": {
+            col: {stat: float(val) for stat, val in stats.items()}
+            for col, stats in df.describe().to_dict().items()
+        }
+        if len(df.select_dtypes(include=["number"]).columns) > 0
+        else {},
     }
     script = llm.invoke(build_story_prompt(ctx_dict)).content
     scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
     video_parts, audio_parts, temps = [], [], []
     for idx, sc in enumerate(scenes[:VIDEO_SCENES]):
+        st.progress(
+            (idx + 1) / VIDEO_SCENES,
+            text=f"Rendering Scene {idx + 1}/{VIDEO_SCENES}",
+        )
         descs = extract_chart_tags(sc)
         narrative = clean_narration(sc)
+        # audio
         audio_bytes, _ = deepgram_tts(narrative)
         mp3 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
         if audio_bytes:
         else:
             dur = 5.0
             generate_silence_mp3(dur, mp3)
+        audio_parts.append(str(mp3))
+        temps.append(mp3)
+        # visual
         mp4 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
         if descs:
             safe_chart(descs[0], df, dur, mp4)
         else:
             img = generate_image_from_prompt(narrative)
+            img_cv = cv2.cvtColor(
+                np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR
+            )
             animate_image_fade(img_cv, dur, mp4)
+        video_parts.append(str(mp4))
+        temps.append(mp4)
+    # concat
     silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
     concat_media(video_parts, silent_vid, "video")
     audio_mix = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
     concat_media(audio_parts, audio_mix, "audio")
     final_vid = Path(tempfile.gettempdir()) / f"{key}.mp4"
+    subprocess.run(
+        [
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(silent_vid),
+            "-i",
+            str(audio_mix),
+            "-c:v",
+            "copy",
+            "-c:a",
+            "aac",
+            "-shortest",
+            str(final_vid),
+        ],
+        check=True,
+        capture_output=True,
+    )
     for p in temps + [silent_vid, audio_mix]:
         p.unlink(missing_ok=True)
     return str(final_vid)
+# ─── UI ────────────────────────────────────────────────────────────────────
+mode = st.radio(
+    "Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True
+)
 upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
 if upl:
+    df_prev, _ = load_dataframe_safely(upl.getvalue(), upl.name)
+    with st.expander("📊 Data Preview"):
+        st.dataframe(arrow_df(df_prev.head()))
+ctx = st.text_area("Business context or specific instructions (optional)")
+# ─── Generate button ──────────────────────────────────────────────────────
+if st.button("🚀 Generate", type="primary", disabled=not upl):
+    key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
     if mode == "Report (PDF)":
+        df, md, chart_descs = prepare_report(upl.getvalue(), upl.name, ctx)
+        if df is None:
+            st.stop()
+        st.session_state.lazy_reports[key] = {
+            "df": df,
+            "md": md,
+            "charts": {},
+            "pending": set(chart_descs),
+            "finished": False,
+        }
+        for d in chart_descs:
+            EXEC.submit(render_chart_worker, key, d)
+        st.rerun()
+    else:  # video branch
+        st.session_state.bundle = None
+        path = generate_video(upl.getvalue(), upl.name, ctx, key)
+        if path:
+            st.session_state.bundle = {"type": "video", "video_path": path, "key": key}
+        st.rerun()
+# ─── OUTPUT ───────────────────────────────────────────────────────────────
+# 1) live PDF reports (may be multiple)
+for rep_key, rep in st.session_state.lazy_reports.items():
     st.subheader("📄 Generated Report")
+    md_with_imgs = TAG_RE.sub(
+        lambda m: _inline_image_or_placeholder(rep, m.group("d").strip()), rep["md"]
+    )
     with st.expander("View Report", expanded=True):
+        st.markdown(md_with_imgs, unsafe_allow_html=True)
+    if rep["finished"]:
         c1, c2 = st.columns(2)
         with c1:
+            st.download_button(
+                "Download PDF",
+                rep["pdf"],
+                f"business_report_{rep_key[:8]}.pdf",
+                "application/pdf",
+                use_container_width=True,
+            )
         with c2:
+            if DG_KEY and st.button("🔊 Narrate Summary", key=f"aud_{rep_key}"):
+                txt = re.sub(r"<[^>]+>", "", rep["md"])
                 audio, mime = deepgram_tts(txt)
+                if audio:
+                    st.audio(audio, format=mime)
+                else:
+                    st.error("Narration failed.")
+    else:
+        st.info("Charts are still rendering… feel free to keep browsing.")
+# 2) video branch output
+if (bundle := st.session_state.get("bundle")) and bundle.get("type") == "video":
+    st.subheader("🎬 Generated Video Narrative")
+    vp = bundle["video_path"]
+    if Path(vp).exists():
+        with open(vp, "rb") as f:
+            st.video(f.read())
+        with open(vp, "rb") as f:
+            st.download_button(
+                "Download Video",
+                f,
+                f"sozo_narrative_{bundle['key'][:8]}.mp4",
+                "video/mp4",
+            )
+    else:
+        st.error("Video file missing – generation failed.")