Adk-Analyst2

Sleeping

App Files Files Community

rairo commited on Jul 6, 2025

Commit

9823ff4

verified ·

1 Parent(s): 0c4e8d1

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -327

app.py CHANGED Viewed

@@ -1,18 +1,14 @@
-###############################################################################
 # Sozo Business Studio · 10-Jul-2025
-# • Restores PDF branch alongside fixed Video branch
-# • Shared chart-tag grammar across both paths
-# • Narrator text cleans scene labels + chart talk
-# • Matplotlib animation starts from blank; artists returned (blit=True)
-# • Gemini Flash-preview image gen with placeholder fallback
-# • Silent-audio fallback keeps mux lengths equal
-# • NEW (2025-07-06): Lazy-loading of PDF charts  +  st.rerun()
 ##############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
 from pathlib import Path
 from typing import Tuple, Dict, List
-from concurrent.futures import ThreadPoolExecutor
 import streamlit as st
 import pandas as pd
@@ -49,11 +45,8 @@ DG_KEY = os.getenv("DEEPGRAM_API_KEY")   # optional narration
 sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
-# ─── LAZY-LOADING SCAFFOLDING ──────────────────────────────────────────────
-EXEC = ThreadPoolExecutor(max_workers=4)        # parallel chart threads
-if "lazy_reports" not in st.session_state:      # key → report dict
-    st.session_state.lazy_reports = {}
-st.session_state.setdefault("bundle", None)     # video branch
 # ─── HELPERS ───────────────────────────────────────────────────────────────
 def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
@@ -101,52 +94,22 @@ def deepgram_tts(txt: str) -> Tuple[bytes, str]:
 def generate_silence_mp3(duration: float, out: Path):
     subprocess.run(
-        [
-            "ffmpeg",
-            "-y",
-            "-f",
-            "lavfi",
-            "-i",
-            "anullsrc=r=44100:cl=mono",
-            "-t",
-            f"{duration:.3f}",
-            "-q:a",
-            "9",
-            str(out),
-        ],
-        check=True,
-        capture_output=True,
     )
 def audio_duration(path: str) -> float:
     try:
         res = subprocess.run(
-            [
-                "ffprobe",
-                "-v",
-                "error",
-                "-show_entries",
-                "format=duration",
-                "-of",
-                "default=nw=1:nk=1",
-                path,
-            ],
-            text=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            check=True,
         )
         return float(res.stdout.strip())
     except Exception:
         return 5.0
-TAG_RE = re.compile(
-    r'[<[]\s*generate_?chart\s*[:=]?\s*[\"\'“”]?(?P<d>[^>\"\'”\]]+?)[\"\'“”]?\s*[>\]]',
-    re.I,
-)
-extract_chart_tags = lambda t: list(
-    dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or ""))
-)
 re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I)
 def clean_narration(txt: str) -> str:
@@ -166,8 +129,7 @@ def generate_image_from_prompt(prompt: str) -> Image.Image:
     def fetch(model_name):
         res = GEM.models.generate_content(
-            model=model_name,
-            contents=full_prompt,
             config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
         )
         for part in res.candidates[0].content.parts:
@@ -182,11 +144,9 @@ def generate_image_from_prompt(prompt: str) -> Image.Image:
         return placeholder_img()
 # ─── PDF GENERATION ────────────────────────────────────────────────────────
-class PDF(FPDF, HTMLMixin):
-    pass
 def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
-    # For robust PDF creation, embed images as base64 data URIs
     def embed_chart_for_pdf(match):
         desc = match.group("d").strip()
         path = charts.get(desc)
@@ -235,33 +195,25 @@ def quick_chart(desc: str, df: pd.DataFrame, out: Path):
         fig.savefig(out, bbox_inches="tight", facecolor="white")
         plt.close(fig)
-# ─── REPORT (STEP 1)  — prepare markdown instantly ────────────────────────
-def prepare_report(buf: bytes, name: str, ctx: str):
     df, err = load_dataframe_safely(buf, name)
     if err:
         st.error(err)
-        return None, None, None
-    llm = ChatGoogleGenerativeAI(
-        model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1
-    )
-    # ─── original enhanced context & prompt (UNTOUCHED) ───────────────────
     ctx_dict = {
-        "shape": df.shape,
-        "columns": list(df.columns),
-        "user_ctx": ctx or "General business analysis",
         "full_dataframe": df.to_dict("records"),
-        "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
-        "missing_values": {
-            col: int(count) for col, count in df.isnull().sum().to_dict().items()
-        },
-        "numeric_summary": {
-            col: {stat: float(val) for stat, val in stats.items()}
-            for col, stats in df.describe().to_dict().items()
-        }
-        if len(df.select_dtypes(include=["number"]).columns) > 0
-        else {},
     }
     cols = ", ".join(ctx_dict["columns"][:6])
@@ -307,76 +259,56 @@ def prepare_report(buf: bytes, name: str, ctx: str):
     Generate insights that would be valuable to C-level executives and department heads.
     """
-    # ─── end original prompt ───────────────────────────────────────────────
     md = llm.invoke(report_prompt).content
     chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
-    return df, md, chart_descs
-# ─── REPORT (STEP 2)  — background worker per chart ───────────────────────
-def render_chart_worker(rep_key: str, desc: str):
-    """Generate one chart (LLM + fallback)."""
-    rep = st.session_state.lazy_reports[rep_key]
-    df = rep["df"]
-    img_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
-    try:
-        agent = create_pandas_dataframe_agent(
-            llm=ChatGoogleGenerativeAI(
-                model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1
-            ),
-            df=df,
-            verbose=False,
-            allow_dangerous_code=True,
-        )
-        chart_prompt = f"""
-        Create a professional {desc} chart using matplotlib with these requirements:
-        1. Use a clean, business-appropriate style
-        2. Include proper title, axis labels, and legends
-        3. Apply appropriate color schemes (avoid rainbow colors)
-        4. Ensure text is readable (font size 10+)
-        5. Format numbers appropriately (e.g., currency, percentages)
-        6. Save the figure with high quality
-        7. Handle any missing or null values appropriately
-        """
-        agent.run(chart_prompt)
-        if not img_path.exists():
-            raise RuntimeError("LLM did not save figure")
-    except Exception:
-        try:
-            quick_chart(desc, df, img_path)
-        except Exception:
-            img_path = None
-    rep["charts"][desc] = str(img_path) if img_path and img_path.exists() else ""
-    rep["pending"].discard(desc)
-    if not rep["pending"]:
-        rep["pdf"] = build_pdf(rep["md"], rep["charts"])
-        rep["finished"] = True
-    st.rerun()
-# ─── FIXED Helper: inline image or text placeholder for preview ───────────
-def _substitute_chart_tags_for_preview(rep, desc):
-    """
-    Returns an HTML <img> tag for a completed chart or a markdown placeholder.
-    This function is used by re.sub to render the live report preview.
-    The img tag styling is based on the working reference script for robustness.
-    """
-    path = rep["charts"].get(desc)
-    if path and Path(path).exists():
-        b64 = base64.b64encode(Path(path).read_bytes()).decode()
-        # The style attribute is crucial for responsive rendering on all platforms.
-        return f'<img src="data:image/png;base64,{b64}" style="max-width:100%;">'
-    # A textual placeholder is safer and more informative than a broken/styled <img> tag.
-    return f'\n\n> *⏳ Rendering chart: "{desc}"...*\n\n'
-# ─── ANIMATION HELPERS (unchanged) ────────────────────────────────────────
-def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path,
-                       fps: int = FPS) -> str:
     frames = max(int(dur * fps), fps)
-    vid = cv2.VideoWriter(str(out), cv2.VideoWriter_fourcc(*"mp4v"),
-                          fps, (WIDTH, HEIGHT))
     blank = np.full_like(img_cv2, 255)
     for i in range(frames):
         a = i / frames
@@ -384,8 +316,7 @@ def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path,
     vid.release()
     return str(out)
-def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path,
-                  fps: int = FPS) -> str:
     """Render an animated chart whose clip length equals `dur`."""
     ctype, *rest = [s.strip().lower() for s in desc.split("|", 1)]
     ctype = ctype or "bar"
@@ -410,7 +341,6 @@ def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path,
     if ctype == "pie":
         wedges, _ = ax.pie(plot_df, labels=plot_df.index, startangle=90)
         ax.set_title(title)
         def init(): [w.set_alpha(0) for w in wedges]; return wedges
         def update(i):
             a = i / (frames - 1)
@@ -420,18 +350,15 @@ def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path,
     elif ctype == "bar":
         bars = ax.bar(plot_df.index, np.zeros_like(plot_df.values), color="#1f77b4")
         ax.set_ylim(0, plot_df.max() * 1.1); ax.set_title(title)
         def init(): return bars
         def update(i):
             a = i / (frames - 1)
-            for b, h in zip(bars, plot_df.values):
-                b.set_height(h * a)
             return bars
     elif ctype == "hist":
         _, _, patches = ax.hist(plot_df, bins=20, color="#1f77b4", alpha=0)
         ax.set_title(title)
         def init(): [p.set_alpha(0) for p in patches]; return patches
         def update(i):
             a = i / (frames - 1)
@@ -439,42 +366,30 @@ def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path,
             return patches
     elif ctype == "scatter":
-        pts = ax.scatter(plot_df.iloc[:, 0], plot_df.iloc[:, 1],
-                         s=10, alpha=0)
         ax.set_title(title); ax.grid(alpha=.3)
         def init(): pts.set_alpha(0); return [pts]
-        def update(i):
-            pts.set_alpha(i / (frames - 1)); return [pts]
     else:  # line
         line, = ax.plot([], [], lw=2)
-        x_full = (plot_df.iloc[:, 0] if plot_df.shape[1] > 1
-                  else np.arange(len(plot_df)))
-        y_full = (plot_df.iloc[:, 1] if plot_df.shape[1] > 1
-                  else plot_df.iloc[:, 0])
-        ax.set_xlim(x_full.min(), x_full.max())
-        ax.set_ylim(y_full.min(), y_full.max())
         ax.set_title(title); ax.grid(alpha=.3)
         def init(): line.set_data([], []); return [line]
         def update(i):
             k = max(2, int(len(x_full) * i / (frames - 1)))
             line.set_data(x_full[:k], y_full.iloc[:k])
             return [line]
-    anim = FuncAnimation(fig, update, init_func=init,
-                         frames=frames, blit=True,
-                         interval=1000 / fps)
-    anim.save(str(out),
-              writer=FFMpegWriter(fps=fps, metadata={'artist':'Sozo'}),
-              dpi=144)
     plt.close(fig)
     return str(out)
 def safe_chart(desc, df, dur, out):
-    try:
-        return animate_chart(desc, df, dur, out)
     except Exception:
         with plt.ioff():
             df.plot(ax=plt.gca())
@@ -484,33 +399,18 @@ def safe_chart(desc, df, dur, out):
         return animate_image_fade(img, dur, out)
 def concat_media(paths: List[str], out: Path, kind="video"):
-    if not paths:
-        return
     lst = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
     with lst.open("w") as f:
         for p in paths:
-            if Path(p).exists():
-                f.write(f"file '{Path(p).resolve()}'\n")
     subprocess.run(
-        [
-            "ffmpeg",
-            "-y",
-            "-f",
-            "concat",
-            "-safe",
-            "0",
-            "-i",
-            str(lst),
-            "-c:v" if kind == "video" else "-c:a",
-            "copy",
-            str(out),
-        ],
-        check=True,
-        capture_output=True,
     )
     lst.unlink(missing_ok=True)
-# ─── VIDEO GENERATION (original prompt & logic) ────────────────────────────
 def build_story_prompt(ctx_dict):
     cols = ", ".join(ctx_dict["columns"][:6])
     return f"""
@@ -568,108 +468,54 @@ def build_story_prompt(ctx_dict):
     """
 def generate_video(buf: bytes, name: str, ctx: str, key: str):
-    try:
-        subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
-    except Exception:
-        st.error("🔴 FFmpeg not available — cannot render video.")
-        return None
     df, err = load_dataframe_safely(buf, name)
-    if err:
-        st.error(err)
-        return None
-    llm = ChatGoogleGenerativeAI(
-        model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.2
-    )
     ctx_dict = {
-        "shape": df.shape,
-        "columns": list(df.columns),
-        "user_ctx": ctx or "General business analysis",
         "full_dataframe": df.to_dict("records"),
         "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
-        "numeric_summary": {
-            col: {stat: float(val) for stat, val in stats.items()}
-            for col, stats in df.describe().to_dict().items()
-        }
-        if len(df.select_dtypes(include=["number"]).columns) > 0
-        else {},
     }
     script = llm.invoke(build_story_prompt(ctx_dict)).content
     scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
     video_parts, audio_parts, temps = [], [], []
     for idx, sc in enumerate(scenes[:VIDEO_SCENES]):
-        st.progress(
-            (idx + 1) / VIDEO_SCENES,
-            text=f"Rendering Scene {idx + 1}/{VIDEO_SCENES}",
-        )
-        descs = extract_chart_tags(sc)
-        narrative = clean_narration(sc)
-        # audio
         audio_bytes, _ = deepgram_tts(narrative)
         mp3 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
-        if audio_bytes:
-            mp3.write_bytes(audio_bytes)
-            dur = audio_duration(str(mp3))
-        else:
-            dur = 5.0
-            generate_silence_mp3(dur, mp3)
-        audio_parts.append(str(mp3))
-        temps.append(mp3)
-        # visual
         mp4 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
-        if descs:
-            safe_chart(descs[0], df, dur, mp4)
         else:
             img = generate_image_from_prompt(narrative)
-            img_cv = cv2.cvtColor(
-                np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR
-            )
             animate_image_fade(img_cv, dur, mp4)
-        video_parts.append(str(mp4))
-        temps.append(mp4)
-    # concat
-    silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
     concat_media(video_parts, silent_vid, "video")
-    audio_mix = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
     concat_media(audio_parts, audio_mix, "audio")
     final_vid = Path(tempfile.gettempdir()) / f"{key}.mp4"
     subprocess.run(
-        [
-            "ffmpeg",
-            "-y",
-            "-i",
-            str(silent_vid),
-            "-i",
-            str(audio_mix),
-            "-c:v",
-            "copy",
-            "-c:a",
-            "aac",
-            "-shortest",
-            str(final_vid),
-        ],
-        check=True,
-        capture_output=True,
     )
-    for p in temps + [silent_vid, audio_mix]:
-        p.unlink(missing_ok=True)
     return str(final_vid)
-# ─── UI ────────────────────────────────────────────────────────────────────
-mode = st.radio(
-    "Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True
-)
 upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
 if upl:
@@ -679,81 +525,48 @@ if upl:
 ctx = st.text_area("Business context or specific instructions (optional)")
-# ─── Generate button ──────────────────────────────────────────────────────
 if st.button("🚀 Generate", type="primary", disabled=not upl):
     key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
     if mode == "Report (PDF)":
-        df, md, chart_descs = prepare_report(upl.getvalue(), upl.name, ctx)
-        if df is None:
-            st.stop()
-        st.session_state.lazy_reports[key] = {
-            "df": df,
-            "md": md,
-            "charts": {},
-            "pending": set(chart_descs),
-            "finished": False,
-        }
-        for d in chart_descs:
-            EXEC.submit(render_chart_worker, key, d)
-        st.rerun()
-    else:  # video branch
-        st.session_state.bundle = None
-        path = generate_video(upl.getvalue(), upl.name, ctx, key)
-        if path:
-            st.session_state.bundle = {"type": "video", "video_path": path, "key": key}
-        st.rerun()
-# ─── OUTPUT (with fixed preview rendering) ────────────────────────────────
-# 1) live PDF reports (may be multiple)
-for rep_key, rep in st.session_state.lazy_reports.items():
-    st.subheader("📄 Generated Report")
-    with st.expander("View Report", expanded=True):
-        # This robust method substitutes tags with base64 <img> tags for completed
-        # charts or a text placeholder for pending ones. This ensures correct rendering
-        # of the interleaved text and images, as guided by the working example.
-        md_with_imgs = TAG_RE.sub(
-            lambda m: _substitute_chart_tags_for_preview(rep, m.group("d").strip()), rep["md"]
-        )
-        st.markdown(md_with_imgs, unsafe_allow_html=True)
-    if rep["finished"]:
         c1, c2 = st.columns(2)
         with c1:
             st.download_button(
-                "Download PDF",
-                rep["pdf"],
-                f"business_report_{rep_key[:8]}.pdf",
-                "application/pdf",
-                use_container_width=True,
             )
         with c2:
-            if DG_KEY and st.button("🔊 Narrate Summary", key=f"aud_{rep_key}"):
-                txt = re.sub(r"<[^>]+>", "", rep["md"])
                 audio, mime = deepgram_tts(txt)
-                if audio:
-                    st.audio(audio, format=mime)
-                else:
-                    st.error("Narration failed.")
-    else:
-        st.info("Charts are still rendering… feel free to keep browsing.")
-# 2) video branch output
-if (bundle := st.session_state.get("bundle")) and bundle.get("type") == "video":
-    st.subheader("🎬 Generated Video Narrative")
-    vp = bundle["video_path"]
-    if Path(vp).exists():
-        with open(vp, "rb") as f:
-            st.video(f.read())
-        with open(vp, "rb") as f:
-            st.download_button(
-                "Download Video",
-                f,
-                f"sozo_narrative_{bundle['key'][:8]}.mp4",
-                "video/mp4",
-            )
-    else:
-        st.error("Video file missing – generation failed.")

+##############################################################################
 # Sozo Business Studio · 10-Jul-2025
+# • REFACTORED: Removed lazy-loading to ensure stability on Streamlit.
+# • Report generation is now a single, synchronous process.
+# • Unified output under a single `st.session_state.bundle` for both modes.
+# • This is the complete, unabridged code with no functions skipped.
 ##############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
 from pathlib import Path
 from typing import Tuple, Dict, List
 import streamlit as st
 import pandas as pd
 sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
+# --- Simplified Session State (No Lazy Loading) ---
+st.session_state.setdefault("bundle", None)
 # ─── HELPERS ───────────────────────────────────────────────────────────────
 def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
 def generate_silence_mp3(duration: float, out: Path):
     subprocess.run(
+        [ "ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono", "-t", f"{duration:.3f}", "-q:a", "9", str(out), ],
+        check=True, capture_output=True,
     )
 def audio_duration(path: str) -> float:
     try:
         res = subprocess.run(
+            [ "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=nw=1:nk=1", path, ],
+            text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True,
         )
         return float(res.stdout.strip())
     except Exception:
         return 5.0
+TAG_RE = re.compile( r'[<[]\s*generate_?chart\s*[:=]?\s*[\"\'“”]?(?P<d>[^>\"\'”\]]+?)[\"\'“”]?\s*[>\]]', re.I, )
+extract_chart_tags = lambda t: list( dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")) )
 re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I)
 def clean_narration(txt: str) -> str:
     def fetch(model_name):
         res = GEM.models.generate_content(
+            model=model_name, contents=full_prompt,
             config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
         )
         for part in res.candidates[0].content.parts:
         return placeholder_img()
 # ─── PDF GENERATION ────────────────────────────────────────────────────────
+class PDF(FPDF, HTMLMixin): pass
 def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
     def embed_chart_for_pdf(match):
         desc = match.group("d").strip()
         path = charts.get(desc)
         fig.savefig(out, bbox_inches="tight", facecolor="white")
         plt.close(fig)
+# ─── SYNCHRONOUS REPORT GENERATION (NO LAZY LOADING) ─────────────────────────
+def generate_report_bundle(buf: bytes, name: str, ctx: str, key: str):
+    """
+    Generates the full report and all assets in a single, synchronous pass.
+    """
+    # 1. Load data and generate markdown text
     df, err = load_dataframe_safely(buf, name)
     if err:
         st.error(err)
+        return None
+    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
     ctx_dict = {
+        "shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis",
         "full_dataframe": df.to_dict("records"),
+        "data_types": {c: str(d) for c, d in df.dtypes.to_dict().items()},
+        "missing_values": {c: int(v) for c, v in df.isnull().sum().to_dict().items()},
+        "numeric_summary": {c: {s: float(v) for s, v in stats.items()} for c, stats in df.describe().to_dict().items()} if len(df.select_dtypes(include=["number"]).columns) > 0 else {},
     }
     cols = ", ".join(ctx_dict["columns"][:6])
     Generate insights that would be valuable to C-level executives and department heads.
     """
     md = llm.invoke(report_prompt).content
     chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
+    # 2. Generate all charts sequentially
+    chart_paths = {}
+    agent = create_pandas_dataframe_agent(llm=llm, df=df, verbose=False, allow_dangerous_code=True)
+    for desc in chart_descs:
+        with st.spinner(f"Generating chart: {desc}..."):
+            img_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
+            try:
+                chart_prompt = f"""
+                Create a professional {desc} chart using matplotlib with these requirements:
+                1. Use a clean, business-appropriate style
+                2. Include proper title, axis labels, and legends
+                3. Apply appropriate color schemes (avoid rainbow colors)
+                4. Ensure text is readable (font size 10+)
+                5. Format numbers appropriately (e.g., currency, percentages)
+                6. Save the figure with high quality
+                7. Handle any missing or null values appropriately
+                """
+                agent.run(chart_prompt)
+                if not img_path.exists(): raise RuntimeError("LLM did not save figure")
+            except Exception:
+                try: quick_chart(desc, df, img_path)
+                except Exception: img_path = None
+            if img_path and img_path.exists():
+                chart_paths[desc] = str(img_path)
+    # 3. Assemble the final report bundle
+    pdf_bytes = build_pdf(md, chart_paths)
+    def _substitute_tags_for_preview(match):
+        desc = match.group("d").strip()
+        path = chart_paths.get(desc)
+        if path:
+            b64 = base64.b64encode(Path(path).read_bytes()).decode()
+            return f'<img src="data:image/png;base64,{b64}" style="max-width:100%;">'
+        return f"*Chart '{desc}' could not be generated.*"
+    preview_md = TAG_RE.sub(_substitute_tags_for_preview, md)
+    return {
+        "type": "report", "key": key, "preview_md": preview_md,
+        "pdf": pdf_bytes, "raw_md": md
+    }
+# ─── ANIMATION HELPERS ────────────────────────────────────────
+def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path, fps: int = FPS) -> str:
     frames = max(int(dur * fps), fps)
+    vid = cv2.VideoWriter(str(out), cv2.VideoWriter_fourcc(*"mp4v"), fps, (WIDTH, HEIGHT))
     blank = np.full_like(img_cv2, 255)
     for i in range(frames):
         a = i / frames
     vid.release()
     return str(out)
+def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path, fps: int = FPS) -> str:
     """Render an animated chart whose clip length equals `dur`."""
     ctype, *rest = [s.strip().lower() for s in desc.split("|", 1)]
     ctype = ctype or "bar"
     if ctype == "pie":
         wedges, _ = ax.pie(plot_df, labels=plot_df.index, startangle=90)
         ax.set_title(title)
         def init(): [w.set_alpha(0) for w in wedges]; return wedges
         def update(i):
             a = i / (frames - 1)
     elif ctype == "bar":
         bars = ax.bar(plot_df.index, np.zeros_like(plot_df.values), color="#1f77b4")
         ax.set_ylim(0, plot_df.max() * 1.1); ax.set_title(title)
         def init(): return bars
         def update(i):
             a = i / (frames - 1)
+            for b, h in zip(bars, plot_df.values): b.set_height(h * a)
             return bars
     elif ctype == "hist":
         _, _, patches = ax.hist(plot_df, bins=20, color="#1f77b4", alpha=0)
         ax.set_title(title)
         def init(): [p.set_alpha(0) for p in patches]; return patches
         def update(i):
             a = i / (frames - 1)
             return patches
     elif ctype == "scatter":
+        pts = ax.scatter(plot_df.iloc[:, 0], plot_df.iloc[:, 1], s=10, alpha=0)
         ax.set_title(title); ax.grid(alpha=.3)
         def init(): pts.set_alpha(0); return [pts]
+        def update(i): pts.set_alpha(i / (frames - 1)); return [pts]
     else:  # line
         line, = ax.plot([], [], lw=2)
+        x_full = (plot_df.iloc[:, 0] if plot_df.shape[1] > 1 else np.arange(len(plot_df)))
+        y_full = (plot_df.iloc[:, 1] if plot_df.shape[1] > 1 else plot_df.iloc[:, 0])
+        ax.set_xlim(x_full.min(), x_full.max()); ax.set_ylim(y_full.min(), y_full.max())
         ax.set_title(title); ax.grid(alpha=.3)
         def init(): line.set_data([], []); return [line]
         def update(i):
             k = max(2, int(len(x_full) * i / (frames - 1)))
             line.set_data(x_full[:k], y_full.iloc[:k])
             return [line]
+    anim = FuncAnimation(fig, update, init_func=init, frames=frames, blit=True, interval=1000 / fps)
+    anim.save(str(out), writer=FFMpegWriter(fps=fps, metadata={'artist':'Sozo'}), dpi=144)
     plt.close(fig)
     return str(out)
 def safe_chart(desc, df, dur, out):
+    try: return animate_chart(desc, df, dur, out)
     except Exception:
         with plt.ioff():
             df.plot(ax=plt.gca())
         return animate_image_fade(img, dur, out)
 def concat_media(paths: List[str], out: Path, kind="video"):
+    if not paths: return
     lst = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
     with lst.open("w") as f:
         for p in paths:
+            if Path(p).exists(): f.write(f"file '{Path(p).resolve()}'\n")
     subprocess.run(
+        [ "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(lst), "-c:v" if kind == "video" else "-c:a", "copy", str(out), ],
+        check=True, capture_output=True,
     )
     lst.unlink(missing_ok=True)
+# ─── VIDEO GENERATION ────────────────────────────
 def build_story_prompt(ctx_dict):
     cols = ", ".join(ctx_dict["columns"][:6])
     return f"""
     """
 def generate_video(buf: bytes, name: str, ctx: str, key: str):
+    try: subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
+    except Exception: st.error("🔴 FFmpeg not available — cannot render video."); return None
     df, err = load_dataframe_safely(buf, name)
+    if err: st.error(err); return None
+    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.2)
     ctx_dict = {
+        "shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis",
         "full_dataframe": df.to_dict("records"),
         "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
+        "numeric_summary": {col: {stat: float(val) for stat, val in stats.items()} for col, stats in df.describe().to_dict().items()} if len(df.select_dtypes(include=["number"]).columns) > 0 else {},
     }
     script = llm.invoke(build_story_prompt(ctx_dict)).content
     scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
     video_parts, audio_parts, temps = [], [], []
     for idx, sc in enumerate(scenes[:VIDEO_SCENES]):
+        st.progress((idx + 1) / VIDEO_SCENES, text=f"Rendering Scene {idx + 1}/{VIDEO_SCENES}")
+        descs, narrative = extract_chart_tags(sc), clean_narration(sc)
         audio_bytes, _ = deepgram_tts(narrative)
         mp3 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
+        if audio_bytes: mp3.write_bytes(audio_bytes); dur = audio_duration(str(mp3))
+        else: dur = 5.0; generate_silence_mp3(dur, mp3)
+        audio_parts.append(str(mp3)); temps.append(mp3)
         mp4 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
+        if descs: safe_chart(descs[0], df, dur, mp4)
         else:
             img = generate_image_from_prompt(narrative)
+            img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
             animate_image_fade(img_cv, dur, mp4)
+        video_parts.append(str(mp4)); temps.append(mp4)
+    silent_vid, audio_mix = Path(tempfile.gettempdir())/f"{uuid.uuid4()}.mp4", Path(tempfile.gettempdir())/f"{uuid.uuid4()}.mp3"
     concat_media(video_parts, silent_vid, "video")
     concat_media(audio_parts, audio_mix, "audio")
     final_vid = Path(tempfile.gettempdir()) / f"{key}.mp4"
     subprocess.run(
+        [ "ffmpeg", "-y", "-i", str(silent_vid), "-i", str(audio_mix), "-c:v", "copy", "-c:a", "aac", "-shortest", str(final_vid), ],
+        check=True, capture_output=True,
     )
+    for p in temps + [silent_vid, audio_mix]: p.unlink(missing_ok=True)
     return str(final_vid)
+# ─── UI & MAIN WORKFLOW ──────────────────────────────────────────────────
+mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
 upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
 if upl:
 ctx = st.text_area("Business context or specific instructions (optional)")
+# ─── Generate button (with synchronous flow) ──────────────────────────
 if st.button("🚀 Generate", type="primary", disabled=not upl):
     key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
+    st.session_state.bundle = None # Clear previous results
     if mode == "Report (PDF)":
+        with st.spinner("Generating full report and charts... Please wait."):
+            bundle = generate_report_bundle(upl.getvalue(), upl.name, ctx, key)
+        st.session_state.bundle = bundle
+    else:  # Video branch (already synchronous)
+        # The video function already shows progress, so a top-level spinner is not needed.
+        bundle_path = generate_video(upl.getvalue(), upl.name, ctx, key)
+        if bundle_path:
+            st.session_state.bundle = {"type": "video", "video_path": bundle_path, "key": key}
+    st.rerun() # Rerun once to display the final state
+# ─── UNIFIED OUTPUT AREA ─────────────────────────────────────────────────
+if (bundle := st.session_state.get("bundle")):
+    if bundle.get("type") == "report":
+        st.subheader("📄 Generated Report")
+        with st.expander("View Report", expanded=True):
+            st.markdown(bundle["preview_md"], unsafe_allow_html=True)
         c1, c2 = st.columns(2)
         with c1:
             st.download_button(
+                "Download PDF", bundle["pdf"], f"business_report_{bundle['key'][:8]}.pdf",
+                "application/pdf", use_container_width=True,
             )
         with c2:
+            if DG_KEY and st.button("🔊 Narrate Summary", key=f"aud_{bundle['key']}"):
+                txt = re.sub(r"<[^>]+>", "", bundle["raw_md"])
                 audio, mime = deepgram_tts(txt)
+                if audio: st.audio(audio, format=mime)
+                else: st.error("Narration failed.")
+    elif bundle.get("type") == "video":
+        st.subheader("🎬 Generated Video Narrative")
+        vp = bundle["video_path"]
+        if Path(vp).exists():
+            with open(vp, "rb") as f: st.video(f.read())
+            with open(vp, "rb") as f:
+                st.download_button("Download Video", f, f"sozo_narrative_{bundle['key'][:8]}.mp4", "video/mp4")
+        else:
+            st.error("Video file missing – generation may have failed.")