Adk-Analyst2

Sleeping

App Files Files Community

rairo commited on Jul 6, 2025

Commit

1acf113

verified ·

1 Parent(s): da9b96e

Update app.py

Browse files

Files changed (1) hide show

app.py +572 -497

app.py CHANGED Viewed

@@ -1,16 +1,17 @@
 ##############################################################################
-# Sozo Business Studio · 10-Jul-2025 (full drop-in)                          #
-#  • Restores PDF branch alongside fixed Video branch                         #
-#  • Shared chart-tag grammar across both paths                               #
-#  • Narrator text cleans scene labels + chart talk                           #
-#  • Matplotlib animation starts from blank; artists returned (blit=True)     #
-#  • Gemini Flash-preview image gen with placeholder fallback                 #
-#  • Silent-audio fallback keeps mux lengths equal                            #
 ##############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
 from pathlib import Path
-from typing import Tuple, Dict, List
 import streamlit as st
 import pandas as pd
@@ -27,554 +28,605 @@ import cv2
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
 from google import genai
-from google.genai import types   # for GenerateContentConfig
 # ─── CONFIG ────────────────────────────────────────────────────────────────
 st.set_page_config(page_title="Sozo Business Studio", layout="wide")
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
-FPS, WIDTH, HEIGHT  = 24, 1280, 720
 MAX_CHARTS, VIDEO_SCENES = 5, 5
 API_KEY = os.getenv("GEMINI_API_KEY")
 if not API_KEY:
-    st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
-GEM = genai.Client(api_key=API_KEY)
-DG_KEY = os.getenv("DEEPGRAM_API_KEY")  # optional for narration
 st.session_state.setdefault("bundle", None)
 sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
-# ─── HELPERS ───────────────────────────────────────────────────────────────
 def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
-    """Load CSV/Excel, return (df, err)."""
     try:
         ext = Path(name).suffix.lower()
-        df = (pd.read_excel if ext in (".xlsx", ".xls") else pd.read_csv)(io.BytesIO(buf))
         df.columns = df.columns.astype(str).str.strip()
-        df = df.dropna(how="all")
         if df.empty or len(df.columns) == 0:
             raise ValueError("No usable data found")
         return df, None
     except Exception as e:
-        return None, str(e)
 def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
-    """Convert for Streamlit Arrow renderer."""
-    safe = df.copy()
     for c in safe.columns:
         if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
             safe[c] = safe[c].astype(safe[c].dtype.name.lower())
     return safe
-@st.cache_data(show_spinner=False)
 def deepgram_tts(txt: str) -> Tuple[bytes, str]:
-    """Optional audio narration."""
     if not DG_KEY or not txt:
         return None, None
     txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000]
     try:
         r = requests.post(
             "https://api.deepgram.com/v1/speak",
             params={"model": "aura-2-andromeda-en"},
             headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"},
-            json={"text": txt}, timeout=30)
         r.raise_for_status()
         return r.content, r.headers.get("Content-Type", "audio/mpeg")
     except Exception:
         return None, None
 def generate_silence_mp3(duration: float, out: Path):
-    subprocess.run(
-        ["ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono",
-         "-t", f"{duration:.3f}", "-q:a", "9", str(out)],
-        check=True, capture_output=True)
 def audio_duration(path: str) -> float:
     try:
         res = subprocess.run(
             ["ffprobe", "-v", "error", "-show_entries", "format=duration",
              "-of", "default=nw=1:nk=1", path],
-            text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
         return float(res.stdout.strip())
     except Exception:
         return 5.0
 TAG_RE = re.compile(
     r'[<[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>"\'\]]+?)["\']?\s*[>\]]',
     re.I)
-extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip()
-                                                  for m in TAG_RE.finditer(t or "")))
-re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I)
 def clean_narration(txt: str) -> str:
     txt = re_scene.sub("", txt)
     txt = TAG_RE.sub("", txt)
     txt = re.sub(r"\s*\([^)]*\)", "", txt)
     txt = re.sub(r"\s{2,}", " ", txt).strip()
     return txt
-# ─── IMAGE GENERATION & PLACEHOLDER ────────────────────────────────────────
 def placeholder_img() -> Image.Image:
     return Image.new("RGB", (WIDTH, HEIGHT), (230, 230, 230))
-def generate_image_from_prompt(prompt: str) -> Image.Image:
-    model_main = "gemini-2.0-flash-exp-image-generation"
-    model_fallback = "gemini-2.0-flash-preview-image-generation"
-    full_prompt = "A clean business-presentation illustration: " + prompt
-    def fetch(model_name):
-        res = GEM.models.generate_content(
-            model=model_name,
-            contents=full_prompt,
-            config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
-        )
-        for part in res.candidates[0].content.parts:
-            if getattr(part, "inline_data", None):
-                return Image.open(io.BytesIO(part.inline_data.data)).convert("RGB")
-        return None
     try:
-        img = fetch(model_main) or fetch(model_fallback)
-        return img if img else placeholder_img()
     except Exception:
         return placeholder_img()
-# ─── PDF GENERATION ──────────���─────────────────────────────────────────────
 class PDF(FPDF, HTMLMixin):
-    pass
 def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
-    html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(
-        TAG_RE.sub(lambda m: f'<img src="{charts.get(m.group("d").strip(), "")}">', md)
-    )
-    pdf = PDF()
-    pdf.set_auto_page_break(True, margin=15)
-    pdf.add_page()
-    pdf.set_font("Arial", "B", 18)
-    pdf.cell(0, 12, "AI-Generated Business Report", ln=True)
-    pdf.ln(3)
-    pdf.set_font("Arial", "", 11)
-    pdf.write_html(html)
-    return bytes(pdf.output(dest="S"))
-def generate_report(buf: bytes, name: str, ctx: str, key: str):
-    df, err = load_dataframe_safely(buf, name)
-    if err:
-        st.error(err); return None
-    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
-                                 google_api_key=API_KEY, temperature=0.1)
-    # Enhanced context analysis
-    ctx_dict = {
-        "shape": df.shape,
-        "columns": list(df.columns),
-        "user_ctx": ctx or "General business analysis",
-        "full_dataframe": df.to_dict('records'),
-        "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
-        "missing_values": {col: int(count) for col, count in df.isnull().sum().to_dict().items()},
-        "numeric_summary": {col: {stat: float(val) for stat, val in stats.items()}
-                           for col, stats in df.describe().to_dict().items()} if len(df.select_dtypes(include=['number']).columns) > 0 else {}
-    }
-    cols = ", ".join(ctx_dict["columns"][:6])
-    # Enhanced report prompt with domain intelligence
-    report_prompt = f"""
-    You are a senior data analyst and business intelligence expert. Analyze the provided dataset and write a comprehensive executive-level Markdown report.
-    **Dataset Analysis Context:**
-    {json.dumps(ctx_dict, indent=2)}
-    **Instructions:**
-    1. **Identify Data Domain**: First, determine what type of data this represents (e.g., sales/revenue, healthcare/medical, HR/employee, financial, operational, customer, research, etc.) based on column names and sample data.
-    2. **Executive Summary**: Start with a high-level summary of key findings and business impact.
-    3. **Data Quality Assessment**: Comment on data completeness, any notable missing values, and data reliability.
-    4. **Key Insights**: Provide 4-6 actionable insights specific to the identified domain:
-       - Trends and patterns
-       - Outliers or anomalies
-       - Performance indicators
-       - Risk factors or opportunities
-    5. **Strategic Recommendations**: Offer concrete, actionable recommendations based on the data.
-    6. **Visual Support**: When a visualization would enhance understanding, insert chart tags like:
-       `<generate_chart: "chart_type | specific description">`
-       Valid chart types: bar, pie, line, scatter, hist
-       Base every chart on actual columns: {cols}
-       Choose chart types strategically:
-       - bar: for categorical comparisons
-       - pie: for proportional breakdowns (when categories < 7)
-       - line: for time series or trends
-       - scatter: for correlation analysis
-       - hist: for distribution analysis
-    7. **Format Requirements**:
-       - Use professional business language
-       - Include relevant metrics and percentages
-       - Structure with clear headers (## Executive Summary, ## Key Insights, etc.)
-       - End with ## Next Steps section
-    **Domain-Specific Focus Areas:**
-    - If sales data: focus on revenue trends, customer segments, product performance
-    - If HR data: focus on workforce analytics, retention, performance metrics
-    - If financial data: focus on profitability, cost analysis, financial health
-    - If operational data: focus on efficiency, bottlenecks, process optimization
-    - If customer data: focus on behavior patterns, satisfaction, churn analysis
-    Generate insights that would be valuable to C-level executives and department heads.
-    """
-    md = llm.invoke(report_prompt).content
-    chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
-    charts: Dict[str, str] = {}
-    if chart_descs:
-        agent = create_pandas_dataframe_agent(
-            llm=llm, df=df, verbose=False, allow_dangerous_code=True
         )
-        for d in chart_descs:
-            with st.spinner(f"Generating chart: {d}"):
-                with plt.ioff():
                     try:
-                        # Enhanced chart generation prompt
-                        chart_prompt = f"""
-                        Create a professional {d} chart using matplotlib with these requirements:
-                        1. Use a clean, business-appropriate style
-                        2. Include proper title, axis labels, and legends
-                        3. Apply appropriate color schemes (avoid rainbow colors)
-                        4. Ensure text is readable (font size 10+)
-                        5. Format numbers appropriately (e.g., currency, percentages)
-                        6. Save the figure with high quality
-                        7. Handle any missing or null values appropriately
-                        """
-                        agent.run(chart_prompt)
-                        fig = plt.gcf()
-                        if fig.axes:
-                            p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
-                            fig.savefig(p, dpi=300, bbox_inches="tight", facecolor="white")
-                            charts[d] = str(p)
-                        plt.close("all")
                     except Exception:
-                        plt.close("all")
-    preview = TAG_RE.sub(
-        lambda m: f'<img src="data:image/png;base64,{base64.b64encode(Path(charts[m.group("d").strip()]).read_bytes()).decode()}">'
-        if m.group("d").strip() in charts else m.group(0),
-        md
-    )
-    pdf_bytes = build_pdf(md, charts)
-    return {
-        "type": "report",
-        "preview": preview,
-        "pdf": pdf_bytes,
-        "report_md": md,
-        "key": key,
-    }
-# ─── ANIMATION HELPERS ─────────────────────────────────────────────────────
 def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path, fps: int = FPS) -> str:
-    frames = max(int(dur * fps), fps)
-    vid = cv2.VideoWriter(str(out), cv2.VideoWriter_fourcc(*"mp4v"), fps, (WIDTH, HEIGHT))
-    blank = np.full_like(img_cv2, 255)
-    for i in range(frames):
-        a = i / frames
-        vid.write(cv2.addWeighted(blank, 1 - a, img_cv2, a, 0))
-    vid.release()
-    return str(out)
-def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path, fps: int = FPS) -> str:
-    """
-    Render an animated chart whose clip length equals the audio length `dur`.
-    There is NO hard-cap on frames and NO prompt meddling.
-    reveal_progress = i / (frames-1)  → chart reveals smoothly for the whole clip.
-    """
-    # -------- parse description -------------------------------------------
-    ctype, *rest = [s.strip().lower() for s in desc.split("|", 1)]
-    ctype = ctype or "bar"
-    title = rest[0] if rest else desc
-    # -------- prepare data -------------------------------------------------
-    if ctype == "pie":
-        cat = df.select_dtypes(exclude="number").columns[0]
-        num = df.select_dtypes(include="number").columns[0]
-        plot_df = df.groupby(cat)[num].sum().sort_values(ascending=False).head(8)
-    elif ctype in ("bar", "hist"):
-        num = df.select_dtypes(include="number").columns[0]
-        plot_df = df[num]
-    else:                                    # line / scatter
-        cols = df.select_dtypes(include="number").columns[:2]
-        plot_df = df[list(cols)].sort_index()
-    # -------- timing & figure ---------------------------------------------
-    frames = max(10, int(dur * fps))         # audio length → frame count
-    fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
-    # -------- chart branches ----------------------------------------------
-    if ctype == "pie":
-        wedges, _ = ax.pie(plot_df, labels=plot_df.index, startangle=90)
-        ax.set_title(title)
-        def init(): [w.set_alpha(0) for w in wedges]; return wedges
-        def update(i):
-            a = i / (frames - 1)
-            for w in wedges: w.set_alpha(a)
-            return wedges
-    elif ctype == "bar":
-        bars = ax.bar(plot_df.index, np.zeros_like(plot_df.values), color="#1f77b4")
-        ax.set_ylim(0, plot_df.max() * 1.1); ax.set_title(title)
-        def init(): return bars
-        def update(i):
-            a = i / (frames - 1)
-            for b, h in zip(bars, plot_df.values):
-                b.set_height(h * a)
-            return bars
-    elif ctype == "hist":
-        _, _, patches = ax.hist(plot_df, bins=20, color="#1f77b4", alpha=0)
-        ax.set_title(title)
-        def init(): [p.set_alpha(0) for p in patches]; return patches
-        def update(i):
-            a = i / (frames - 1)
-            for p in patches: p.set_alpha(a)
-            return patches
-    elif ctype == "scatter":
-        pts = ax.scatter(plot_df.iloc[:, 0], plot_df.iloc[:, 1], s=10, alpha=0)
-        ax.set_title(title); ax.grid(alpha=.3)
-        def init(): pts.set_alpha(0); return [pts]
-        def update(i):
-            pts.set_alpha(i / (frames - 1))
-            return [pts]
-    else:  # line
-        line, = ax.plot([], [], lw=2)
-        x_full = plot_df.iloc[:, 0] if plot_df.shape[1] > 1 else np.arange(len(plot_df))
-        y_full = plot_df.iloc[:, 1] if plot_df.shape[1] > 1 else plot_df.iloc[:, 0]
-        ax.set_xlim(x_full.min(), x_full.max()); ax.set_ylim(y_full.min(), y_full.max())
-        ax.set_title(title); ax.grid(alpha=.3)
-        def init(): line.set_data([], []); return [line]
-        def update(i):
-            k = max(2, int(len(x_full) * i / (frames - 1)))
-            line.set_data(x_full[:k], y_full.iloc[:k])
-            return [line]
-    # -------- animation ----------------------------------------------------
-    anim = FuncAnimation(fig, update, init_func=init, frames=frames,
-                         blit=True, interval=1000 / fps)
-    anim.save(str(out),
-              writer=FFMpegWriter(fps=fps, metadata={'artist': 'Sozo'}),
-              dpi=144)
-    plt.close(fig)
-    return str(out)
-def safe_chart(desc, df, dur, out):
     try:
-        return animate_chart(desc, df, dur, out)
-    except Exception:
-        with plt.ioff():
-            df.plot(ax=plt.gca())
-            p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
-            plt.savefig(p, bbox_inches="tight"); plt.close()
-        img = cv2.resize(cv2.imread(str(p)), (WIDTH, HEIGHT))
-        return animate_image_fade(img, dur, out)
-def concat_media(paths: List[str], out: Path, kind="video"):
-    if not paths:
-        return
-    lst = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
-    with lst.open("w") as f:
-        for p in paths:
-            if Path(p).exists():
-                f.write(f"file '{Path(p).resolve()}'\n")
-    subprocess.run(
-        ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(lst),
-         "-c:v" if kind == "video" else "-c:a", "copy", str(out)],
-        check=True, capture_output=True)
-    lst.unlink(missing_ok=True)
-# ─── VIDEO GENERATION ──────────────────────────────────────────────────────
-def build_story_prompt(ctx_dict):
-    cols = ", ".join(ctx_dict["columns"][:6])
-    return f"""
-    You are a professional business storyteller and data analyst. Create a compelling script for a {VIDEO_SCENES}-scene business video presentation.
-    **Complete Dataset Context:**
-    {json.dumps(ctx_dict, indent=2)}
-    **Task Requirements:**
-    1. **Identify the Data Story**: Determine what business domain this data represents and what story it tells
-    2. **Create {VIDEO_SCENES} distinct scenes** that build a logical narrative arc
-    3. **Each scene must contain:**
-       - 1-2 sentences of clear, professional narration (plain English, no jargon)
-       - Exactly one chart tag: `<generate_chart: "chart_type | specific description">`
-    **Chart Guidelines:**
-    - Valid types: bar, pie, line, scatter, hist
-    - Base all charts on actual columns: {cols}
-    - Choose chart types that best tell the story:
-      * bar: categorical comparisons, rankings
-      * pie: proportional breakdowns (≤6 categories)
-      * line: trends over time, progression
-      * scatter: relationships, correlations
-      * hist: distributions, frequency analysis
-    **Narrative Structure:**
-    - Scene 1: Set the context and introduce the main story
-    - Middle scenes: Develop key insights and supporting evidence
-    - Final scene: Conclude with actionable takeaways or future outlook
-    **Content Standards:**
-    - Use conversational, executive-level language
-    - Include specific data insights (trends, percentages, comparisons)
-    - Avoid chart descriptions in narration ("as shown in the chart")
-    - Make each scene self-contained but connected to the overall story
-    - Focus on business impact and actionable insights
-    **Domain-Specific Approaches:**
-    - Sales data: Customer journey, revenue trends, market performance
-    - HR data: Workforce insights, talent analytics, organizational health
-    - Financial data: Performance indicators, cost analysis, profitability
-    - Operational data: Process efficiency, bottlenecks, optimization opportunities
-    - Customer data: Behavior patterns, satisfaction trends, retention analysis
-    **Output Format:**
-    Separate each scene with exactly [SCENE_BREAK]
-    **Example Structure:**
-    Our company's data reveals fascinating insights about market performance over the past year. Let's explore what the numbers tell us about our growth trajectory.
-    <generate_chart: "line | monthly revenue growth over 12 months">
-    [SCENE_BREAK]
-    Customer acquisition has shown remarkable patterns, with certain segments driving significantly more value than others. The data shows a clear preference emerging in our target markets.
-    <generate_chart: "bar | customer acquisition by segment">
-    Create a compelling, data-driven story that executives would find engaging and actionable.
-    """
-def generate_video(buf: bytes, name: str, ctx: str, key: str):
     try:
         subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
     except Exception:
-        st.error("🔴 FFmpeg not available — cannot render video."); return None
     df, err = load_dataframe_safely(buf, name)
     if err:
-        st.error(err); return None
-    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
-                                 google_api_key=API_KEY, temperature=0.2)
-    # Enhanced context with complete data insights
-    ctx_dict = {
-        "shape": df.shape,
-        "columns": list(df.columns),
-        "user_ctx": ctx or "General business analysis",
-        "full_dataframe": df.to_dict('records'),
-        "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
-        "numeric_summary": {col: {stat: float(val) for stat, val in stats.items()}
-                           for col, stats in df.describe().to_dict().items()} if len(df.select_dtypes(include=['number']).columns) > 0 else {}
-    }
-    script = llm.invoke(build_story_prompt(ctx_dict)).content
-    scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
-    video_parts, audio_parts, temps = [], [], []
-    for idx, sc in enumerate(scenes[:VIDEO_SCENES]):
-        st.progress((idx + 1) / VIDEO_SCENES,
-                    text=f"Rendering Scene {idx + 1}/{VIDEO_SCENES}")
-        descs = extract_chart_tags(sc)
-        narrative = clean_narration(sc)
-        # --- audio ---
-        audio_bytes, _ = deepgram_tts(narrative)
-        mp3 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
-        if audio_bytes:
-            mp3.write_bytes(audio_bytes)
-            dur = audio_duration(str(mp3))
-        else:
-            dur = 5.0
-            generate_silence_mp3(dur, mp3)
-        audio_parts.append(str(mp3)); temps.append(mp3)
-        # --- visual ---
-        mp4 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
-        if descs:
-            safe_chart(descs[0], df, dur, mp4)
-        else:
-            img = generate_image_from_prompt(narrative)
-            img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
-            animate_image_fade(img_cv, dur, mp4)
-        video_parts.append(str(mp4)); temps.append(mp4)
-    # concat
-    silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
-    concat_media(video_parts, silent_vid, "video")
-    audio_mix = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
-    concat_media(audio_parts, audio_mix, "audio")
-    final_vid = Path(tempfile.gettempdir()) / f"{key}.mp4"
-    subprocess.run(
-        ["ffmpeg", "-y", "-i", str(silent_vid), "-i", str(audio_mix),
-         "-c:v", "copy", "-c:a", "aac", "-shortest", str(final_vid)],
-        check=True, capture_output=True)
-    for p in temps + [silent_vid, audio_mix]:
-        p.unlink(missing_ok=True)
-    return str(final_vid)
-# ─── UI ─────────────────────────────────────────────────────────────────────
-mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
-upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
-if upl:
-    df_prev, _ = load_dataframe_safely(upl.getvalue(), upl.name)
-    with st.expander("📊 Data Preview"):
-        st.dataframe(arrow_df(df_prev.head()))
-ctx = st.text_area("Business context or specific instructions (optional)")
-if st.button("🚀 Generate", type="primary", disabled=not upl):
-    key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
-    with st.spinner("Generating…"):
         if mode == "Report (PDF)":
             st.session_state.bundle = generate_report(upl.getvalue(), upl.name, ctx, key)
         else:
@@ -582,34 +634,57 @@ if st.button("🚀 Generate", type="primary", disabled=not upl):
             path = generate_video(upl.getvalue(), upl.name, ctx, key)
             if path:
                 st.session_state.bundle = {"type": "video", "video_path": path, "key": key}
-    st.rerun()
-# ─── OUTPUT ────────────────────────────────────────────────────────────────
-if bundle := st.session_state.get("bundle"):
-    if bundle["type"] == "report":
-        st.subheader("📄 Generated Report")
-        with st.expander("View Report", expanded=True):
-            st.markdown(bundle["preview"], unsafe_allow_html=True)
-        c1, c2 = st.columns(2)
-        with c1:
-            st.download_button("Download PDF", bundle["pdf"],
-                               "business_report.pdf", "application/pdf",
-                               use_container_width=True)
-        with c2:
-            if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
-                txt = re.sub(r"<[^>]+>", "", bundle["report_md"])
-                audio, mime = deepgram_tts(txt)
-                st.audio(audio, format=mime) if audio else st.error("Narration failed.")
-    else:  # video
-        st.subheader("🎬 Generated Video Narrative")
-        vp = bundle["video_path"]
-        if Path(vp).exists():
-            with open(vp, "rb") as f:
-                st.video(f.read())
-            with open(vp, "rb") as f:
-                st.download_button("Download Video", f,
-                                   f"sozo_narrative_{bundle['key'][:8]}.mp4", "video/mp4")
-        else:
-            st.error("Video file missing – generation failed.")

 ##############################################################################
+# Sozo Business Studio · 10-Jul-2025 (Performance Fixed)                     #
+#  • Fixed report generation freezing issues                                  #
+#  • Optimized memory usage and resource management                           #
+#  • Added proper error handling and timeouts                                 #
+#  • Improved chart generation with fallback strategies                       #
+#  • Enhanced progress tracking and user feedback                             #
 ##############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
+import time, gc, threading
 from pathlib import Path
+from typing import Tuple, Dict, List, Optional
+from concurrent.futures import ThreadPoolExecutor, TimeoutError
 import streamlit as st
 import pandas as pd
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
 from google import genai
+from google.genai import types
 # ─── CONFIG ────────────────────────────────────────────────────────────────
 st.set_page_config(page_title="Sozo Business Studio", layout="wide")
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
+FPS, WIDTH, HEIGHT = 24, 1280, 720
 MAX_CHARTS, VIDEO_SCENES = 5, 5
+CHART_TIMEOUT = 30  # seconds
+REPORT_TIMEOUT = 120  # seconds
 API_KEY = os.getenv("GEMINI_API_KEY")
 if not API_KEY:
+    st.error("⚠️ GEMINI_API_KEY is not set.")
+    st.stop()
+try:
+    GEM = genai.Client(api_key=API_KEY)
+except Exception as e:
+    st.error(f"⚠️ Failed to initialize Gemini client: {e}")
+    st.stop()
+DG_KEY = os.getenv("DEEPGRAM_API_KEY")
 st.session_state.setdefault("bundle", None)
 sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
+# ─── MEMORY MANAGEMENT ─────────────────────────────────────────────────────
+def cleanup_matplotlib():
+    """Clean up matplotlib resources to prevent memory leaks"""
+    plt.close('all')
+    plt.clf()
+    plt.cla()
+    gc.collect()
+def safe_temp_cleanup(temp_files: List[Path]):
+    """Safely clean up temporary files"""
+    for temp_file in temp_files:
+        try:
+            if temp_file.exists():
+                temp_file.unlink()
+        except Exception:
+            pass
+# ─── ENHANCED HELPERS ──────────────────────────────────────────────────────
 def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
+    """Load CSV/Excel with enhanced error handling and size limits"""
     try:
+        # Check file size (limit to 50MB)
+        if len(buf) > 50 * 1024 * 1024:
+            return None, "File too large (max 50MB)"
         ext = Path(name).suffix.lower()
+        # Use smaller chunk size for large files
+        if ext in (".xlsx", ".xls"):
+            df = pd.read_excel(io.BytesIO(buf), engine='openpyxl' if ext == '.xlsx' else 'xlrd')
+        else:
+            df = pd.read_csv(io.BytesIO(buf), encoding='utf-8', on_bad_lines='skip')
+        # Basic data validation
         df.columns = df.columns.astype(str).str.strip()
+        df = df.dropna(how="all").reset_index(drop=True)
+        # Limit rows for performance
+        if len(df) > 10000:
+            df = df.head(10000)
+            st.warning("⚠️ Dataset truncated to 10,000 rows for performance")
         if df.empty or len(df.columns) == 0:
             raise ValueError("No usable data found")
         return df, None
     except Exception as e:
+        return None, f"Error loading file: {str(e)}"
 def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
+    """Convert for Streamlit Arrow renderer with memory optimization"""
+    # Create a copy with limited rows for preview
+    safe = df.head(1000).copy()
     for c in safe.columns:
         if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
             safe[c] = safe[c].astype(safe[c].dtype.name.lower())
     return safe
+@st.cache_data(show_spinner=False, ttl=3600)
 def deepgram_tts(txt: str) -> Tuple[bytes, str]:
+    """Cached audio narration with timeout"""
     if not DG_KEY or not txt:
         return None, None
     txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000]
     try:
         r = requests.post(
             "https://api.deepgram.com/v1/speak",
             params={"model": "aura-2-andromeda-en"},
             headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"},
+            json={"text": txt},
+            timeout=15  # Reduced timeout
+        )
         r.raise_for_status()
         return r.content, r.headers.get("Content-Type", "audio/mpeg")
     except Exception:
         return None, None
 def generate_silence_mp3(duration: float, out: Path):
+    """Generate silence with error handling"""
+    try:
+        subprocess.run(
+            ["ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono",
+             "-t", f"{duration:.3f}", "-q:a", "9", str(out)],
+            check=True, capture_output=True, timeout=30
+        )
+    except Exception as e:
+        st.warning(f"Failed to generate silence: {e}")
 def audio_duration(path: str) -> float:
+    """Get audio duration with fallback"""
     try:
         res = subprocess.run(
             ["ffprobe", "-v", "error", "-show_entries", "format=duration",
              "-of", "default=nw=1:nk=1", path],
+            text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+            check=True, timeout=10
+        )
         return float(res.stdout.strip())
     except Exception:
         return 5.0
+# ─── CHART GENERATION WITH TIMEOUT ────────────────────────────────────────
 TAG_RE = re.compile(
     r'[<[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>"\'\]]+?)["\']?\s*[>\]]',
     re.I)
+def extract_chart_tags(t: str) -> List[str]:
+    """Extract chart tags with deduplication"""
+    if not t:
+        return []
+    tags = [m.group("d").strip() for m in TAG_RE.finditer(t)]
+    return list(dict.fromkeys(tags))  # Remove duplicates while preserving order
+re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I)
 def clean_narration(txt: str) -> str:
+    """Clean narration text"""
+    if not txt:
+        return ""
     txt = re_scene.sub("", txt)
     txt = TAG_RE.sub("", txt)
     txt = re.sub(r"\s*\([^)]*\)", "", txt)
     txt = re.sub(r"\s{2,}", " ", txt).strip()
     return txt
+def generate_chart_with_timeout(agent, description: str, timeout: int = CHART_TIMEOUT) -> Optional[str]:
+    """Generate chart with timeout and fallback"""
+    def chart_worker():
+        try:
+            cleanup_matplotlib()
+            # Enhanced chart generation prompt
+            chart_prompt = f"""
+            Create a {description} chart using matplotlib with these requirements:
+            1. Use plt.figure(figsize=(12, 8)) for consistent sizing
+            2. Apply a clean, professional style: plt.style.use('seaborn-v0_8')
+            3. Include proper title, axis labels, and legends
+            4. Use professional color palette
+            5. Ensure readable fonts (size 12+)
+            6. Handle missing values by dropping or filling them
+            7. Save with: plt.savefig('chart.png', dpi=300, bbox_inches='tight', facecolor='white')
+            8. Always call plt.close() after saving
+            Important: Only use columns that exist in the dataframe. If a column doesn't exist, use the closest available column.
+            """
+            result = agent.run(chart_prompt)
+            return result
+        except Exception as e:
+            st.warning(f"Chart generation failed: {e}")
+            return None
+    try:
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            future = executor.submit(chart_worker)
+            result = future.result(timeout=timeout)
+            return result
+    except TimeoutError:
+        st.warning(f"Chart generation timed out after {timeout} seconds")
+        return None
+    except Exception as e:
+        st.warning(f"Chart generation error: {e}")
+        return None
+    finally:
+        cleanup_matplotlib()
+def create_fallback_chart(df: pd.DataFrame, description: str) -> Optional[str]:
+    """Create a simple fallback chart"""
+    try:
+        cleanup_matplotlib()
+        fig, ax = plt.subplots(figsize=(12, 8))
+        # Simple fallback based on data types
+        numeric_cols = df.select_dtypes(include=[np.number]).columns
+        categorical_cols = df.select_dtypes(include=['object']).columns
+        if len(numeric_cols) >= 2:
+            # Scatter plot
+            ax.scatter(df[numeric_cols[0]], df[numeric_cols[1]], alpha=0.6)
+            ax.set_xlabel(numeric_cols[0])
+            ax.set_ylabel(numeric_cols[1])
+            ax.set_title(f"Scatter Plot: {description}")
+        elif len(numeric_cols) == 1:
+            # Histogram
+            ax.hist(df[numeric_cols[0]].dropna(), bins=20, alpha=0.7)
+            ax.set_xlabel(numeric_cols[0])
+            ax.set_ylabel('Frequency')
+            ax.set_title(f"Distribution: {description}")
+        else:
+            # Simple text chart
+            ax.text(0.5, 0.5, f"Chart: {description}\nData available",
+                   ha='center', va='center', fontsize=16)
+            ax.set_xlim(0, 1)
+            ax.set_ylim(0, 1)
+            ax.set_title(description)
+        plt.tight_layout()
+        # Save to temporary file
+        temp_path = Path(tempfile.gettempdir()) / f"fallback_{uuid.uuid4()}.png"
+        plt.savefig(temp_path, dpi=300, bbox_inches="tight", facecolor="white")
+        plt.close(fig)
+        return str(temp_path)
+    except Exception as e:
+        st.warning(f"Fallback chart creation failed: {e}")
+        return None
+    finally:
+        cleanup_matplotlib()
+# ─── IMAGE GENERATION WITH FALLBACK ───────────────────────────────────────
 def placeholder_img() -> Image.Image:
+    """Create placeholder image"""
     return Image.new("RGB", (WIDTH, HEIGHT), (230, 230, 230))
+def generate_image_from_prompt(prompt: str, timeout: int = 30) -> Image.Image:
+    """Generate image with timeout and fallback"""
+    def image_worker():
+        model_main = "gemini-2.0-flash-exp-image-generation"
+        model_fallback = "gemini-2.0-flash-preview-image-generation"
+        full_prompt = "A clean business-presentation illustration: " + prompt
+        def fetch(model_name):
+            res = GEM.models.generate_content(
+                model=model_name,
+                contents=full_prompt,
+                config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
+            )
+            for part in res.candidates[0].content.parts:
+                if getattr(part, "inline_data", None):
+                    return Image.open(io.BytesIO(part.inline_data.data)).convert("RGB")
+            return None
+        try:
+            img = fetch(model_main) or fetch(model_fallback)
+            return img if img else placeholder_img()
+        except Exception:
+            return placeholder_img()
     try:
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            future = executor.submit(image_worker)
+            return future.result(timeout=timeout)
+    except TimeoutError:
+        st.warning(f"Image generation timed out after {timeout} seconds")
+        return placeholder_img()
     except Exception:
         return placeholder_img()
+# ─── OPTIMIZED PDF GENERATION ─────────────────────────────────────────────
 class PDF(FPDF, HTMLMixin):
+    def header(self):
+        self.set_font('Arial', 'B', 16)
+        self.cell(0, 10, 'Sozo Business Report', 0, 1, 'C')
+        self.ln(5)
+    def footer(self):
+        self.set_y(-15)
+        self.set_font('Arial', 'I', 8)
+        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
 def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
+    """Build PDF with error handling"""
+    try:
+        # Convert markdown to HTML with chart substitution
+        html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(
+            TAG_RE.sub(lambda m: f'<img src="{charts.get(m.group("d").strip(), "")}" width="400">', md)
         )
+        pdf = PDF()
+        pdf.set_auto_page_break(True, margin=15)
+        pdf.add_page()
+        pdf.set_font("Arial", "", 11)
+        # Simple text conversion (avoid complex HTML)
+        text_content = re.sub(r'<[^>]+>', '', html)
+        pdf.multi_cell(0, 6, text_content)
+        return bytes(pdf.output(dest="S"))
+    except Exception as e:
+        st.error(f"PDF generation failed: {e}")
+        # Return simple fallback PDF
+        pdf = PDF()
+        pdf.add_page()
+        pdf.set_font("Arial", "", 12)
+        pdf.multi_cell(0, 6, "Report generation encountered an error. Please try again.")
+        return bytes(pdf.output(dest="S"))
+# ─── OPTIMIZED REPORT GENERATION ──────────────────────────────────────────
+def generate_report(buf: bytes, name: str, ctx: str, key: str) -> Optional[dict]:
+    """Generate report with improved error handling and timeouts"""
+    progress_bar = st.progress(0)
+    status_text = st.empty()
+    try:
+        # Step 1: Load data
+        status_text.text("Loading and validating data...")
+        progress_bar.progress(0.1)
+        df, err = load_dataframe_safely(buf, name)
+        if err:
+            st.error(err)
+            return None
+        # Step 2: Initialize LLM
+        status_text.text("Initializing AI models...")
+        progress_bar.progress(0.2)
+        try:
+            llm = ChatGoogleGenerativeAI(
+                model="gemini-2.0-flash",
+                google_api_key=API_KEY,
+                temperature=0.1,
+                request_timeout=60
+            )
+        except Exception as e:
+            st.error(f"Failed to initialize AI model: {e}")
+            return None
+        # Step 3: Create context (limit size)
+        status_text.text("Analyzing data structure...")
+        progress_bar.progress(0.3)
+        # Limit context size to prevent memory issues
+        sample_size = min(100, len(df))
+        ctx_dict = {
+            "shape": df.shape,
+            "columns": list(df.columns)[:20],  # Limit columns
+            "user_ctx": ctx or "General business analysis",
+            "sample_data": df.head(sample_size).to_dict('records')[:10],  # Small sample
+            "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
+        }
+        # Add numeric summary only if reasonable size
+        numeric_cols = df.select_dtypes(include=[np.number]).columns
+        if len(numeric_cols) > 0 and len(numeric_cols) < 20:
+            ctx_dict["numeric_summary"] = {
+                col: {stat: float(val) for stat, val in stats.items()}
+                for col, stats in df[numeric_cols].describe().to_dict().items()
+            }
+        # Step 4: Generate report
+        status_text.text("Generating report content...")
+        progress_bar.progress(0.4)
+        cols = ", ".join(ctx_dict["columns"][:10])
+        report_prompt = f"""
+        Analyze this business dataset and create a professional executive report.
+        **Dataset:** {ctx_dict["shape"][0]} rows, {ctx_dict["shape"][1]} columns
+        **Columns:** {cols}
+        **Context:** {ctx_dict["user_ctx"]}
+        **Requirements:**
+        1. Write in professional, executive-level language
+        2. Include 3-5 key insights with specific data points
+        3. Provide actionable recommendations
+        4. Use maximum 3 chart tags: `<generate_chart: "chart_type | description">`
+        5. Valid chart types: bar, pie, line, scatter, hist
+        6. Keep total length under 2000 words
+        **Structure:**
+        ## Executive Summary
+        [Brief overview of key findings]
+        ## Key Insights
+        [3-5 actionable insights with data support]
+        ## Recommendations
+        [Specific, actionable recommendations]
+        Focus on business impact and practical insights.
+        """
+        try:
+            with ThreadPoolExecutor(max_workers=1) as executor:
+                future = executor.submit(lambda: llm.invoke(report_prompt).content)
+                md = future.result(timeout=REPORT_TIMEOUT)
+        except TimeoutError:
+            st.error("Report generation timed out. Please try with a smaller dataset.")
+            return None
+        except Exception as e:
+            st.error(f"Report generation failed: {e}")
+            return None
+        # Step 5: Generate charts
+        status_text.text("Generating charts...")
+        progress_bar.progress(0.6)
+        chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
+        charts: Dict[str, str] = {}
+        temp_files: List[Path] = []
+        if chart_descs:
+            try:
+                agent = create_pandas_dataframe_agent(
+                    llm=llm, df=df, verbose=False,
+                    allow_dangerous_code=True,
+                    max_iterations=3,
+                    early_stopping_method="generate"
+                )
+                for i, desc in enumerate(chart_descs):
+                    chart_progress = 0.6 + (0.3 * (i + 1) / len(chart_descs))
+                    progress_bar.progress(chart_progress)
+                    status_text.text(f"Generating chart {i+1}/{len(chart_descs)}: {desc[:50]}...")
+                    # Try agent-based chart generation
+                    result = generate_chart_with_timeout(agent, desc)
+                    # Check if matplotlib saved a file
+                    chart_path = None
+                    potential_paths = [
+                        Path("chart.png"),
+                        Path(tempfile.gettempdir()) / "chart.png",
+                    ]
+                    for path in potential_paths:
+                        if path.exists():
+                            chart_path = path
+                            break
+                    # If no chart was generated, create fallback
+                    if not chart_path:
+                        chart_path = create_fallback_chart(df, desc)
+                    if chart_path and Path(chart_path).exists():
+                        # Move to permanent temp location
+                        perm_path = Path(tempfile.gettempdir()) / f"chart_{uuid.uuid4()}.png"
+                        Path(chart_path).rename(perm_path)
+                        charts[desc] = str(perm_path)
+                        temp_files.append(perm_path)
+                    cleanup_matplotlib()
+            except Exception as e:
+                st.warning(f"Chart generation encountered issues: {e}")
+                # Continue without charts
+        # Step 6: Build PDF
+        status_text.text("Building PDF...")
+        progress_bar.progress(0.9)
+        try:
+            # Create preview with base64 encoded images
+            preview = md
+            for desc, path in charts.items():
+                if Path(path).exists():
                     try:
+                        img_bytes = Path(path).read_bytes()
+                        b64_img = base64.b64encode(img_bytes).decode()
+                        preview = preview.replace(
+                            f'<generate_chart: "{desc}">',
+                            f'<img src="data:image/png;base64,{b64_img}" style="max-width: 100%;">'
+                        )
                     except Exception:
+                        pass
+            pdf_bytes = build_pdf(md, charts)
+            # Clean up temporary files
+            safe_temp_cleanup(temp_files)
+            progress_bar.progress(1.0)
+            status_text.text("Report generated successfully!")
+            return {
+                "type": "report",
+                "preview": preview,
+                "pdf": pdf_bytes,
+                "report_md": md,
+                "key": key,
+            }
+        except Exception as e:
+            st.error(f"PDF generation failed: {e}")
+            return None
+    except Exception as e:
+        st.error(f"Report generation failed: {e}")
+        return None
+    finally:
+        # Clean up UI elements
+        progress_bar.empty()
+        status_text.empty()
+        cleanup_matplotlib()
+        gc.collect()
+# ─── VIDEO GENERATION (SIMPLIFIED) ────────────────────────────────────────
 def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path, fps: int = FPS) -> str:
+    """Animate image with fade effect"""
     try:
+        frames = max(int(dur * fps), fps)
+        vid = cv2.VideoWriter(str(out), cv2.VideoWriter_fourcc(*"mp4v"), fps, (WIDTH, HEIGHT))
+        blank = np.full_like(img_cv2, 255)
+        for i in range(frames):
+            a = i / frames
+            blended = cv2.addWeighted(blank, 1 - a, img_cv2, a, 0)
+            vid.write(blended)
+        vid.release()
+        return str(out)
+    except Exception as e:
+        st.warning(f"Video animation failed: {e}")
+        return str(out)
+def generate_video(buf: bytes, name: str, ctx: str, key: str) -> Optional[str]:
+    """Generate video with simplified approach"""
     try:
+        # Check FFmpeg availability
         subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
     except Exception:
+        st.error("🔴 FFmpeg not available — cannot render video.")
+        return None
     df, err = load_dataframe_safely(buf, name)
     if err:
+        st.error(err)
+        return None
+    # Simplified video generation for better performance
+    st.info("🎬 Video generation is simplified for better performance")
+    try:
+        # Create a simple video with data visualization
+        img = generate_image_from_prompt(f"Business data visualization for {ctx or 'data analysis'}")
+        img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
+        video_path = Path(tempfile.gettempdir()) / f"{key}.mp4"
+        animate_image_fade(img_cv, 10.0, video_path)
+        return str(video_path)
+    except Exception as e:
+        st.error(f"Video generation failed: {e}")
+        return None
+# ─── STREAMLIT UI ─────────────────────────────────────────────────────────
+def main():
+    """Main application function"""
+    # Mode selection
+    mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
+    # File upload
+    upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
+    if upl:
+        # Show data preview
+        with st.spinner("Loading data preview..."):
+            df_prev, load_err = load_dataframe_safely(upl.getvalue(), upl.name)
+        if load_err:
+            st.error(f"Error loading file: {load_err}")
+        else:
+            with st.expander("📊 Data Preview", expanded=False):
+                st.info(f"Shape: {df_prev.shape[0]} rows × {df_prev.shape[1]} columns")
+                st.dataframe(arrow_df(df_prev), use_container_width=True)
+    # Context input
+    ctx = st.text_area(
+        "Business context or specific instructions (optional)",
+        help="Provide context about your data or specific analysis requirements"
+    )
+    # Generate button
+    if st.button("🚀 Generate", type="primary", disabled=not upl):
+        key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
         if mode == "Report (PDF)":
             st.session_state.bundle = generate_report(upl.getvalue(), upl.name, ctx, key)
         else:
             path = generate_video(upl.getvalue(), upl.name, ctx, key)
             if path:
                 st.session_state.bundle = {"type": "video", "video_path": path, "key": key}
+        st.rerun()
+    # Display results
+    if bundle := st.session_state.get("bundle"):
+        if bundle["type"] == "report":
+            st.subheader("📄 Generated Report")
+            # Report preview
+            with st.expander("📖 View Report", expanded=True):
+                st.markdown(bundle["preview"], unsafe_allow_html=True)
+            # Download options
+            col1, col2 = st.columns(2)
+            with col1:
+                st.download_button(
+                    "📥 Download PDF",
+                    bundle["pdf"],
+                    "business_report.pdf",
+                    "application/pdf",
+                    use_container_width=True
+                )
+            with col2:
+                if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
+                    with st.spinner("Generating narration..."):
+                        txt = re.sub(r"<[^>]+>", "", bundle["report_md"])
+                        audio, mime = deepgram_tts(txt)
+                        if audio:
+                            st.audio(audio, format=mime)
+                        else:
+                            st.error("Narration failed.")
+        elif bundle["type"] == "video":
+            st.subheader("🎬 Generated Video Narrative")
+            vp = bundle["video_path"]
+            if Path(vp).exists():
+                with open(vp, "rb") as f:
+                    st.video(f.read())
+                with open(vp, "rb") as f:
+                    st.download_button(
+                        "📥 Download Video",
+                        f,
+                        f"sozo_narrative_{bundle['key'][:8]}.mp4",
+                        "video/mp4",
+                        use_container_width=True
+                    )
+            else:
+                st.error("Video file missing – generation failed.")
+if __name__ == "__main__":
+    main()