Adk-Analyst2

Sleeping

App Files Files Community

rairo commited on Jul 5, 2025

Commit

a9e09a8

verified ·

1 Parent(s): 0d73b2a

Update app.py

Browse files

Files changed (1) hide show

app.py +340 -180

app.py CHANGED Viewed

@@ -1,23 +1,33 @@
 ###############################################################################
 #  Sozo Business Studio · AI transforms business data into compelling narratives
 ###############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, wave, requests, subprocess
 from pathlib import Path
 import streamlit as st
 import pandas as pd
 import numpy as np
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 from fpdf import FPDF, HTMLMixin
 from markdown_it import MarkdownIt
 from PIL import Image
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
 from google import genai
-import cv2 # Added for video processing
 # ─────────────────────────────────────────────────────────────────────────────
 # CONFIG & CONSTANTS
@@ -26,21 +36,20 @@ st.set_page_config(page_title="Sozo Business Studio", layout="wide")
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
-# --- Feature Caps ---
-MAX_CHARTS = 5
-VIDEO_SCENES = 5 # Number of scenes for the video
-# --- API Keys & Clients (Correct Initialization) ---
 API_KEY = os.getenv("GEMINI_API_KEY")
 if not API_KEY:
     st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
-# Use the Client pattern from the original script
-GEM = genai.Client(api_key=API_KEY)
-DG_KEY = os.getenv("DEEPGRAM_API_KEY") # Optional but needed for narration
-# --- Session State ---
-# Simplified state to hold the most recent generated output
 st.session_state.setdefault("bundle", None)
 # ─────────────────────────────────────────────────────────────────────────────
@@ -57,114 +66,239 @@ def validate_file_upload(f):
         errs.append("Unsupported file type")
     return errs
-def load_dataframe_safely(buf:bytes, name:str):
     try:
         ext = Path(name).suffix.lower()
-        df = pd.read_excel(io.BytesIO(buf)) if ext in (".xlsx", ".xls") else pd.read_csv(io.BytesIO(buf))
-        if df.empty or len(df.columns)==0: raise ValueError("File contains no data")
-        df.columns=df.columns.astype(str).str.strip()
-        df=df.dropna(how="all")
         if df.empty: raise ValueError("Rows all empty")
-        return df,None
-    except Exception as e: return None,str(e)
-def fix_bullet(t:str)->str:
-    return re.sub(r"[\x80-\x9f]", "", t) if isinstance(t, str) else ""
-# ——— Arrow helpers ————————————————————————————————————————————————
-def arrow_df(df:pd.DataFrame)->pd.DataFrame:
-    safe=df.copy()
     for c in safe.columns:
-        if safe[c].dtype.name in ("Int64","Float64","Boolean"):
-            safe[c]=safe[c].astype(safe[c].dtype.name.lower())
     return safe
-# ——— Text-to-Speech (Used by Both Features) ————————————————————————
 @st.cache_data(show_spinner=False)
-def deepgram_tts(text:str):
     if not DG_KEY or not text: return None, None
     text = re.sub(r"[^\w\s.,!?;:-]", "", text)[:1000]
     try:
-        r = requests.post("https://api.deepgram.com/v1/speak",
-                        params={"model":"aura-asteria-en"},
-                        headers={"Authorization":f"Token {DG_KEY}", "Content-Type":"application/json"},
-                        json={"text":text}, timeout=30)
         r.raise_for_status()
         return r.content, r.headers.get("Content-Type", "audio/mpeg")
     except Exception:
         return None, None
-def pcm_to_wav(pcm,sr=24000,ch=1,w=2):
-    buf=io.BytesIO()
-    with wave.open(buf,'wb') as wf:
-        wf.setnchannels(ch); wf.setsampwidth(w); wf.setframerate(sr); wf.writeframes(pcm)
-    buf.seek(0); return buf.getvalue()
-# ——— Chart & Tag Helpers ———————————————————————————————————————————
 TAG_RE = re.compile(r'[<\[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>\]\'"”’]+?)["\']?\s*[>\]]', re.I)
 extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")))
-def repl_tags(txt:str,mp:dict,str_fn):
     return TAG_RE.sub(lambda m: str_fn(mp[m.group("d").strip()]) if m.group("d").strip() in mp else m.group(0), txt)
 # ─────────────────────────────────────────────────────────────────────────────
-# FEATURE 1: REPORT GENERATION
 # ─────────────────────────────────────────────────────────────────────────────
-class PDF(FPDF,HTMLMixin): pass
 def build_pdf(md, charts):
-    md = fix_bullet(md).replace("•", "*")
-    md = repl_tags(md, charts, lambda p: f'<img src="{p}">')
-    html = MarkdownIt("commonmark", {"breaks":True}).enable("table").render(md)
-    pdf = PDF(); pdf.set_auto_page_break(True, margin=15)
     pdf.add_page()
-    pdf.set_font("Arial", "B", 18)
-    pdf.cell(0, 12, "AI-Generated Business Report", ln=True); pdf.ln(3)
-    pdf.set_font("Arial", "", 11)
-    pdf.write_html(html)
     return bytes(pdf.output(dest="S"))
 def generate_report_assets(key, buf, name, ctx):
     df, err = load_dataframe_safely(buf, name)
     if err: st.error(err); return None
-    llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=API_KEY, temperature=0.1)
-    ctx_dict = {"shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis"}
-    report_md = llm.invoke(f"""You are a senior business analyst. Write an executive-level Markdown report
 with insights & recommendations. Use chart tags like <generate_chart: "description"> where helpful.
-Data Context: {json.dumps(ctx_dict, indent=2)}""").content
-    chart_descs = extract_chart_tags(report_md)[:MAX_CHARTS]
-    chart_paths = {}
     if chart_descs:
-        ag = create_pandas_dataframe_agent(llm=llm, df=df, verbose=False, allow_dangerous_code=True)
         for d in chart_descs:
             with st.spinner(f"Generating chart: {d}"):
                 with plt.ioff():
                     try:
                         ag.run(f"Create a {d} with Matplotlib and save.")
-                        fig = plt.gcf()
                         if fig.axes:
                             p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
                             fig.savefig(p, dpi=300, bbox_inches="tight", facecolor="white")
                             chart_paths[d] = str(p)
                         plt.close("all")
-                    except: plt.close("all")
-    md = fix_bullet(report_md)
-    pdf = build_pdf(md, chart_paths)
-    preview = repl_tags(md, chart_paths, lambda p: f'<img src="data:image/png;base64,{base64.b64encode(Path(p).read_bytes()).decode()}" style="max-width:100%;">')
-    return {"type": "report", "preview": preview, "pdf": pdf, "report_md": md, "key": key}
 # ─────────────────────────────────────────────────────────────────────────────
-# FEATURE 2: VIDEO GENERATION
 # ─────────────────────────────────────────────────────────────────────────────
 def generate_image_from_prompt(prompt, style):
-    """Generates an illustrative image using the Gemini Client."""
     try:
         full_prompt = f"A professional, clean, illustrative image for a business presentation: {prompt}, in the style of {style}."
-        # Use the globally defined GEM client, as per the original script's pattern
-        response = GEM.generate_content(
             contents=full_prompt,
             model="models/gemini-1.5-flash-latest",
             generation_config={"response_mime_type": "image/png"}
@@ -173,46 +307,23 @@ def generate_image_from_prompt(prompt, style):
         return Image.open(io.BytesIO(img_bytes)).convert("RGB")
     except Exception as e:
         st.warning(f"Illustrative image generation failed: {e}. Using placeholder.")
-        return Image.new('RGB', (1024, 768), color = (230, 230, 230))
-def create_silent_video(images, durations, output_path):
-    width, height = 1280, 720
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    video = cv2.VideoWriter(output_path, fourcc, 24, (width, height))
-    for img, duration in zip(images, durations):
-        # Resize image and convert to BGR for OpenCV
-        frame = np.array(img.resize((width, height)))
-        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-        for _ in range(int(duration * 24)): # 24 fps
-            video.write(frame_bgr)
-    video.release()
-    return output_path
-def combine_video_audio(video_path, audio_paths, output_path):
-    concat_list_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
-    with open(concat_list_path, 'w') as f:
-        for af in audio_paths:
-            f.write(f"file '{Path(af).resolve()}'\n")
-    concat_audio_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
-    subprocess.run(['ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_list_path), '-c', 'copy', str(concat_audio_path)], check=True, capture_output=True)
-    subprocess.run(['ffmpeg', '-y', '-i', video_path, '-i', str(concat_audio_path), '-c:v', 'copy', '-c:a', 'aac', '-shortest', output_path], check=True, capture_output=True)
-    concat_list_path.unlink(missing_ok=True)
-    concat_audio_path.unlink(missing_ok=True)
-    return output_path
-def get_audio_duration(audio_file):
-    try:
-        result = subprocess.run(['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', audio_file],
-                                stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True)
-        return float(result.stdout.strip())
-    except Exception:
-        return 5.0 # Default duration
-def generate_video_assets(key, buf, name, ctx, style):
     try:
         subprocess.run(['ffmpeg', '-version'], check=True, capture_output=True)
     except (FileNotFoundError, subprocess.CalledProcessError):
@@ -221,89 +332,131 @@ def generate_video_assets(key, buf, name, ctx, style):
     df, err = load_dataframe_safely(buf, name)
     if err: st.error(err); return None
-    llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=API_KEY, temperature=0.2)
-    ctx_dict = {"shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis"}
     story_prompt = f"""Create a script for a short business video with exactly {VIDEO_SCENES} scenes.
 For each scene:
-1. Write a concise narration (1-2 sentences).
 2. If the data can be visualized for this scene, add a chart tag like <generate_chart: "bar chart of sales by region">.
 3. Separate each scene with the marker `[SCENE_BREAK]`.
 Data Context: {json.dumps(ctx_dict, indent=2)}"""
-    with st.spinner("Generating video script..."):
         full_script = llm.invoke(story_prompt).content
-    scenes = [s.strip() for s in full_script.split("[SCENE_BREAK]")]
-    visuals, audio_paths, temp_files = [], [], []
     try:
-        ag = create_pandas_dataframe_agent(llm=llm, df=df, verbose=False, allow_dangerous_code=True)
         for i, scene_text in enumerate(scenes[:VIDEO_SCENES]):
-            progress = (i + 1) / VIDEO_SCENES
-            st.progress(progress, text=f"Processing Scene {i+1}/{VIDEO_SCENES}...")
             chart_descs = extract_chart_tags(scene_text)
-            narrative = repl_tags(scene_text, {}, lambda _: "").strip()
-            if narrative: # Only process scenes with text
-                # 1. Generate Visual
-                if chart_descs:
-                    with plt.ioff():
-                        try:
-                            ag.run(f"Create a {chart_descs[0]} with Matplotlib and save.")
-                            fig = plt.gcf()
-                            if fig.axes:
-                                p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
-                                fig.savefig(p, dpi=200, bbox_inches="tight", facecolor="white")
-                                visuals.append(Image.open(p).convert("RGB"))
-                                temp_files.append(p)
-                            else: raise ValueError("No chart produced")
-                        except Exception:
-                            visuals.append(generate_image_from_prompt(narrative, style))
-                        finally: plt.close("all")
                 else:
-                    visuals.append(generate_image_from_prompt(narrative, style))
-                # 2. Generate Audio
-                audio_content, _ = deepgram_tts(narrative)
-                if audio_content:
-                    audio_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
-                    audio_path.write_bytes(audio_content)
-                    audio_paths.append(str(audio_path))
-                    temp_files.append(audio_path)
-        if not visuals or not audio_paths:
-            st.error("Could not generate any scenes for the video. Please try a different context or file.")
-            return None
-        st.progress(1.0, text="Assembling video...")
-        durations = [get_audio_duration(ap) for ap in audio_paths]
-        silent_video_path = str(Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4")
-        final_video_path = str(Path(tempfile.gettempdir()) / f"{key}.mp4")
-        create_silent_video(visuals, durations, silent_video_path)
-        temp_files.append(Path(silent_video_path))
-        combine_video_audio(silent_video_path, audio_paths, final_video_path)
-        return {"type": "video", "video_path": final_video_path, "key": key}
     finally:
-        for f in temp_files: f.unlink(missing_ok=True) # Cleanup all temp files
 # ─────────────────────────────────────────────────────────────────────────────
 # UI & MAIN WORKFLOW
 # ─────────────────────────────────────────────────────────────────────────────
 mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
-# --- Conditional UI ---
-video_style = "professional illustration"
 if mode == "Video Narrative":
     with st.sidebar:
         st.subheader("🎬 Video Options")
-        video_style = st.selectbox("Visual Style",
-            ["professional illustration", "minimalist infographic", "photorealistic", "cinematic", "data visualization aesthetic"])
-        st.info("The AI will generate charts from your data where possible, and illustrative images for other scenes.")
-# --- Common UI ---
 upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
 if upl:
     df_prev, _ = load_dataframe_safely(upl.getvalue(), upl.name)
@@ -314,22 +467,26 @@ ctx = st.text_area("Business context or specific instructions (optional)")
 if st.button("🚀 Generate", type="primary"):
     if not upl:
-        st.warning("Please upload a file first.")
-        st.stop()
-    bkey = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode(), video_style.encode()]))
     if mode == "Report (PDF)":
-        with st.spinner("Generating report and charts..."):
             bundle = generate_report_assets(bkey, upl.getvalue(), upl.name, ctx)
-    else: # Video Narrative
-        bundle = generate_video_assets(bkey, upl.getvalue(), upl.name, ctx, video_style)
     st.session_state.bundle = bundle
     st.rerun()
-# --- Display Area (handles state correctly after rerun) ---
-if "bundle" in st.session_state and st.session_state.bundle:
     bundle = st.session_state.bundle
     if bundle.get("type") == "report":
@@ -337,13 +494,14 @@ if "bundle" in st.session_state and st.session_state.bundle:
         with st.expander("View Report", expanded=True):
             if bundle["preview"]:
                 st.markdown(bundle["preview"], unsafe_allow_html=True)
                 c1, c2 = st.columns(2)
                 with c1:
-                    st.download_button("Download PDF", bundle["pdf"], "business_report.pdf", "application/pdf", use_container_width=True)
                 with c2:
                     if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
-                        report_text = re.sub(r'<[^>]+>', '', bundle["report_md"]) # Basic HTML strip
                         audio, mime = deepgram_tts(report_text)
                         if audio:
                             st.audio(audio, format=mime)
@@ -359,6 +517,8 @@ if "bundle" in st.session_state and st.session_state.bundle:
             with open(video_path, "rb") as f:
                 st.video(f.read())
             with open(video_path, "rb") as f:
-                st.download_button("Download Video", f, f"sozo_narrative_{bundle['key'][:8]}.mp4", "video/mp4")
         else:
             st.error("Video file could not be found or generation failed.")

 ###############################################################################
 #  Sozo Business Studio · AI transforms business data into compelling narratives
+#  (video branch now supports animated charts)
 ###############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, wave, requests, subprocess
 from pathlib import Path
+# ─── Third-party ──────────────────────────────────────────────────────────────
 import streamlit as st
 import pandas as pd
 import numpy as np
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
+from matplotlib.animation import FuncAnimation, FFMpegWriter
 from fpdf import FPDF, HTMLMixin
 from markdown_it import MarkdownIt
 from PIL import Image
+import cv2                                                   # video processing
+try:
+    import bar_chart_race as bcr                             # optional helper
+    HAS_BCR = True
+except ImportError:
+    HAS_BCR = False
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
 from google import genai
 # ─────────────────────────────────────────────────────────────────────────────
 # CONFIG & CONSTANTS
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
+FPS            = 24                     # video frames per second
+MAX_CHARTS     = 5                      # per report
+VIDEO_SCENES   = 5                      # per video
+WIDTH, HEIGHT  = 1280, 720              # video resolution
+# --- API Keys ---
 API_KEY = os.getenv("GEMINI_API_KEY")
 if not API_KEY:
     st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
+GEM  = genai.Client(api_key=API_KEY)
+DG_KEY = os.getenv("DEEPGRAM_API_KEY")  # optional (narration)
+# --- Session State shortcut ---
 st.session_state.setdefault("bundle", None)
 # ─────────────────────────────────────────────────────────────────────────────
         errs.append("Unsupported file type")
     return errs
+def load_dataframe_safely(buf: bytes, name: str):
     try:
         ext = Path(name).suffix.lower()
+        df  = pd.read_excel(io.BytesIO(buf)) if ext in (".xlsx", ".xls") else pd.read_csv(io.BytesIO(buf))
+        if df.empty or len(df.columns) == 0: raise ValueError("File contains no data")
+        df.columns = df.columns.astype(str).str.strip()
+        df = df.dropna(how="all")
         if df.empty: raise ValueError("Rows all empty")
+        return df, None
+    except Exception as e:
+        return None, str(e)
+def fix_bullet(t: str) -> str:
+    return re.sub(r"[\x80-\x9f]", "", t) if isinstance(t, str) else t
+def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
+    safe = df.copy()
     for c in safe.columns:
+        if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
+            safe[c] = safe[c].astype(safe[c].dtype.name.lower())
     return safe
+# —── DeepGram TTS ──────────────────────────���─────────────────────────────────
 @st.cache_data(show_spinner=False)
+def deepgram_tts(text: str):
     if not DG_KEY or not text: return None, None
     text = re.sub(r"[^\w\s.,!?;:-]", "", text)[:1000]
     try:
+        r = requests.post(
+            "https://api.deepgram.com/v1/speak",
+            params={"model": "aura-asteria-en"},
+            headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"},
+            json={"text": text},
+            timeout=30,
+        )
         r.raise_for_status()
         return r.content, r.headers.get("Content-Type", "audio/mpeg")
     except Exception:
         return None, None
+def get_audio_duration(audio_file):
+    """Return duration (seconds) of an audio file via ffprobe (fallback 5 s)."""
+    try:
+        out = subprocess.run(
+            ['ffprobe', '-v', 'error', '-show_entries', 'format=duration',
+             '-of', 'default=noprint_wrappers=1:nokey=1', audio_file],
+            stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True
+        ).stdout.strip()
+        return float(out)
+    except Exception:
+        return 5.0
+# ─────────────────────────────────────────────────────────────────────────────
+# MARKDOWN TAG UTILS
+# ─────────────────────────────────────────────────────────────────────────────
 TAG_RE = re.compile(r'[<\[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>\]\'"”’]+?)["\']?\s*[>\]]', re.I)
 extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")))
+def repl_tags(txt: str, mp: dict, str_fn):
+    """Replace generated-chart tags with something else (pdf/img injection)."""
     return TAG_RE.sub(lambda m: str_fn(mp[m.group("d").strip()]) if m.group("d").strip() in mp else m.group(0), txt)
 # ─────────────────────────────────────────────────────────────────────────────
+# PDF GENERATION (unchanged)
 # ─────────────────────────────────────────────────────────────────────────────
+class PDF(FPDF, HTMLMixin): pass
 def build_pdf(md, charts):
+    md   = fix_bullet(md).replace("•", "*")
+    md   = repl_tags(md, charts, lambda p: f'<img src="{p}">')
+    html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(md)
+    pdf  = PDF(); pdf.set_auto_page_break(True, margin=15)
     pdf.add_page()
+    pdf.set_font("Arial", "B", 18); pdf.cell(0, 12, "AI-Generated Business Report", ln=True); pdf.ln(3)
+    pdf.set_font("Arial", "", 11);  pdf.write_html(html)
     return bytes(pdf.output(dest="S"))
+# ─────────────────────────────────────────────────────────────────────────────
+# VIDEO-ONLY ANIMATION HELPERS
+# ─────────────────────────────────────────────────────────────────────────────
+def animate_image_fade(img_cv2: np.ndarray, duration: float, out_path: Path, fps: int = FPS):
+    """Simple fade-in from white background to the provided image."""
+    frames  = max(int(duration * fps), fps)   # at least 1 s
+    fourcc  = cv2.VideoWriter_fourcc(*'mp4v')
+    video   = cv2.VideoWriter(str(out_path), fourcc, fps, (WIDTH, HEIGHT))
+    blank   = np.full_like(img_cv2, 255)
+    for i in range(frames):
+        alpha  = i / frames
+        frame  = cv2.addWeighted(blank, 1 - alpha, img_cv2, alpha, 0)
+        video.write(frame)
+    video.release()
+    return str(out_path)
+def animate_chart(desc: str, df: pd.DataFrame, duration: float, out_path: Path, fps: int = FPS) -> tuple[str, str]:
+    """
+    Build an animated chart clip matching *desc*.
+    Returns (mp4_path, preview_png_path).
+    Falls back to simple fade-in if animation fails.
+    """
+    try:
+        # VERY rough heuristic parser
+        desc_low = desc.lower()
+        if ("bar race" in desc_low or "race" in desc_low) and HAS_BCR:
+            # --------------- bar chart race ---------------------------------
+            tmp_csv = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.csv"
+            df.to_csv(tmp_csv, index=False)
+            bcr.bar_chart_race(
+                input_filename=tmp_csv,
+                output_filename=str(out_path),
+                n_bars=10,
+                period_length=duration / df.shape[0] if df.shape[0] else 0.5,
+                steps_per_period=3,
+                dpi=144,
+                fig=(WIDTH / 100, HEIGHT / 100),
+                bar_label_font=4,
+                fixed_order=False,
+                interpolate_period=False,
+                period_template='{x:.0f}',
+            )
+            tmp_csv.unlink(missing_ok=True)
+            # grab first frame for preview
+            cap = cv2.VideoCapture(str(out_path))
+            ok, frame = cap.read(); cap.release()
+            if ok:
+                preview = Path(out_path.with_suffix(".png"))
+                cv2.imwrite(str(preview), frame)
+                return str(out_path), str(preview)
+            raise RuntimeError("Could not capture preview")
+        else:
+            # --------------- generic line/bar growth using FuncAnimation ----
+            # Pick numeric columns
+            num_cols = df.select_dtypes(include=['number']).columns.tolist()
+            if len(num_cols) < 1:
+                raise ValueError("No numeric data to plot")
+            col_y   = num_cols[0]
+            col_x   = num_cols[1] if len(num_cols) > 1 else None
+            fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
+            if "bar" in desc_low:
+                bars = ax.bar([], [])
+                def update(frame_idx):
+                    frac = frame_idx / frames
+                    upto = int(len(df) * frac) or 1
+                    ydata = df[col_y].iloc[:upto]
+                    xdata = df[col_x].iloc[:upto] if col_x else np.arange(upto)
+                    ax.clear()
+                    ax.bar(xdata, ydata, color="#1f77b4")
+                    ax.set_title(desc); ax.grid(True, alpha=0.3)
+                frames = max(int(duration * fps), fps)
+                anim   = FuncAnimation(fig, update, frames=frames, blit=False)
+            else:
+                line, = ax.plot([], [], lw=2)
+                ax.set_xlim(df.index.min(), df.index.max() or len(df))
+                ax.set_ylim(df[col_y].min(), df[col_y].max())
+                ax.set_title(desc); ax.grid(True, alpha=0.3)
+                def update(frame_idx):
+                    upto = int(len(df) * frame_idx / frames) or 1
+                    line.set_data(df.index[:upto], df[col_y].iloc[:upto])
+                    return line,
+                frames = max(int(duration * fps), fps)
+                anim   = FuncAnimation(fig, update, frames=frames, blit=True)
+            writer = FFMpegWriter(fps=fps, metadata=dict(artist='Sozo Studio'))
+            anim.save(str(out_path), writer=writer, dpi=144)
+            preview = Path(out_path.with_suffix(".png"))
+            fig.savefig(preview, bbox_inches="tight", facecolor="white")
+            plt.close('all')
+            return str(out_path), str(preview)
+    except Exception as e:
+        # Fallback: simple fade-in on static chart generated by agent
+        with st.spinner(f"Animation fallback due to {e}. Generating static image."):
+            fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
+            df.plot(ax=ax); ax.set_title(desc); ax.grid(alpha=0.3)
+            png_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
+            fig.savefig(png_path, bbox_inches="tight", facecolor="white"); plt.close('all')
+            img      = cv2.imread(str(png_path)); img = cv2.resize(img, (WIDTH, HEIGHT))
+            mp4_path = Path(out_path)
+            animate_image_fade(img, duration, mp4_path, fps=fps)
+            return str(mp4_path), str(png_path)
+# ─────────────────────────────────────────────────────────────────────────────
+# REPORT GENERATION (unchanged)
+# ─────────────────────────────────────────────────────────────────────────────
 def generate_report_assets(key, buf, name, ctx):
     df, err = load_dataframe_safely(buf, name)
     if err: st.error(err); return None
+    llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
+                                 google_api_key=API_KEY, temperature=0.1)
+    ctx_dict = {"shape": df.shape, "columns": list(df.columns),
+                "user_ctx": ctx or "General business analysis"}
+    report_md = llm.invoke(
+        f"""You are a senior business analyst. Write an executive-level Markdown report
 with insights & recommendations. Use chart tags like <generate_chart: "description"> where helpful.
+Data Context: {json.dumps(ctx_dict, indent=2)}"""
+    ).content
+    chart_descs  = extract_chart_tags(report_md)[:MAX_CHARTS]
+    chart_paths  = {}
     if chart_descs:
+        ag = create_pandas_dataframe_agent(llm=llm, df=df, verbose=False,
+                                           allow_dangerous_code=True)
         for d in chart_descs:
             with st.spinner(f"Generating chart: {d}"):
                 with plt.ioff():
                     try:
                         ag.run(f"Create a {d} with Matplotlib and save.")
+                        fig   = plt.gcf()
                         if fig.axes:
                             p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
                             fig.savefig(p, dpi=300, bbox_inches="tight", facecolor="white")
                             chart_paths[d] = str(p)
                         plt.close("all")
+                    except:
+                        plt.close("all")
+    md      = fix_bullet(report_md)
+    pdf     = build_pdf(md, chart_paths)
+    preview = repl_tags(md, chart_paths,
+                        lambda p: f'<img src="data:image/png;base64,{base64.b64encode(Path(p).read_bytes()).decode()}" style="max-width:100%;">')
+    return {"type": "report", "preview": preview, "pdf": pdf,
+            "report_md": md, "key": key}
 # ─────────────────────────────────────────────────────────────────────────────
+# VIDEO GENERATION  (animated charts!)
 # ─────────────────────────────────────────────────────────────────────────────
 def generate_image_from_prompt(prompt, style):
+    """Image placeholder using Gemini; falls back to gray canvas on error."""
     try:
         full_prompt = f"A professional, clean, illustrative image for a business presentation: {prompt}, in the style of {style}."
+        response    = GEM.generate_content(
             contents=full_prompt,
             model="models/gemini-1.5-flash-latest",
             generation_config={"response_mime_type": "image/png"}
         return Image.open(io.BytesIO(img_bytes)).convert("RGB")
     except Exception as e:
         st.warning(f"Illustrative image generation failed: {e}. Using placeholder.")
+        return Image.new('RGB', (WIDTH, HEIGHT), color=(230, 230, 230))
+def concat_media(inputs, output_path, media_type="video"):
+    """Concat list of mp4 or mp3 files using ffmpeg demuxer (copy, no re-encode)."""
+    concat_list = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
+    with open(concat_list, 'w') as f:
+        for item in inputs:
+            f.write(f"file '{Path(item).resolve()}'\n")
+    codec_copy = 'copy'
+    what       = '-c:v' if media_type == "video" else '-c:a'
+    subprocess.run(['ffmpeg', '-y', '-f', 'concat', '-safe', '0',
+                    '-i', str(concat_list), what, codec_copy, str(output_path)],
+                   check=True, capture_output=True)
+    concat_list.unlink(missing_ok=True)
+def generate_video_assets(key, buf, name, ctx, style, animate_charts=True):
+    # --- environment check ---
     try:
         subprocess.run(['ffmpeg', '-version'], check=True, capture_output=True)
     except (FileNotFoundError, subprocess.CalledProcessError):
     df, err = load_dataframe_safely(buf, name)
     if err: st.error(err); return None
+    llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
+                                 google_api_key=API_KEY, temperature=0.2)
+    ctx_dict = {"shape": df.shape, "columns": list(df.columns),
+                "user_ctx": ctx or "General business analysis"}
     story_prompt = f"""Create a script for a short business video with exactly {VIDEO_SCENES} scenes.
 For each scene:
+1. Write a concise narration (1–2 sentences).
 2. If the data can be visualized for this scene, add a chart tag like <generate_chart: "bar chart of sales by region">.
 3. Separate each scene with the marker `[SCENE_BREAK]`.
 Data Context: {json.dumps(ctx_dict, indent=2)}"""
+    with st.spinner("Generating video script…"):
         full_script = llm.invoke(story_prompt).content
+    scenes = [s.strip() for s in full_script.split("[SCENE_BREAK]") if s.strip()]
+    video_clips, audio_paths, temp_files = [], [], []
+    ag = create_pandas_dataframe_agent(llm=llm, df=df,
+                                       verbose=False, allow_dangerous_code=True)
     try:
         for i, scene_text in enumerate(scenes[:VIDEO_SCENES]):
+            st.progress((i + 1) / VIDEO_SCENES, text=f"Processing Scene {i+1}/{VIDEO_SCENES}…")
             chart_descs = extract_chart_tags(scene_text)
+            narrative   = repl_tags(scene_text, {}, lambda _: "").strip()
+            # 1. Generate Audio (always)
+            audio_content, _ = deepgram_tts(narrative)
+            if audio_content:
+                audio_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
+                audio_path.write_bytes(audio_content)
+                audio_paths.append(str(audio_path))
+                temp_files.append(audio_path)
+                duration = get_audio_duration(str(audio_path))
+            else:
+                duration = 5.0  # fallback
+            # 2. Generate Visual (clip)
+            if chart_descs:
+                d    = chart_descs[0]
+                with plt.ioff():
+                    try:
+                        ag.run(f"Create a {d} with Matplotlib and save.")
+                        fig   = plt.gcf()
+                        if not fig.axes: raise ValueError("No axes")
+                        static_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
+                        fig.savefig(static_png, dpi=300, bbox_inches="tight", facecolor="white")
+                        plt.close("all")
+                    except Exception:
+                        plt.close("all")
+                        # fallback to illustrative image
+                        static_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
+                        generate_image_from_prompt(narrative, style).save(static_png)
+                # Animate?
+                if animate_charts:
+                    clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
+                    img       = cv2.imread(str(static_png)); img = cv2.resize(img, (WIDTH, HEIGHT))
+                    animate_image_fade(img, duration, clip_path)
+                    video_clips.append(str(clip_path))
+                    temp_files.extend([static_png, clip_path])
                 else:
+                    # Just still → Ken-Burns fade to duration seconds
+                    clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
+                    img       = cv2.imread(str(static_png)); img = cv2.resize(img, (WIDTH, HEIGHT))
+                    animate_image_fade(img, duration, clip_path)          # still a clip
+                    video_clips.append(str(clip_path))
+                    temp_files.extend([static_png, clip_path])
+            else:
+                # No chart; illustrative image
+                static_img = generate_image_from_prompt(narrative, style)
+                static_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
+                static_img.save(static_png)
+                clip_path  = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
+                img        = cv2.cvtColor(np.array(static_img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
+                animate_image_fade(img, duration, clip_path)
+                video_clips.append(str(clip_path))
+                temp_files.extend([static_png, clip_path])
+        # --- Assemble video ---
+        st.progress(1.0, text="Assembling video…")
+        silent_video_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
+        concat_media(video_clips, silent_video_path, media_type="video")
+        # --- Concat audio ---
+        audio_concat_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
+        concat_media(audio_paths, audio_concat_path, media_type="audio")
+        # --- Merge AV streams ---
+        final_video_path = Path(tempfile.gettempdir()) / f"{key}.mp4"
+        subprocess.run(['ffmpeg', '-y',
+                        '-i', str(silent_video_path),
+                        '-i', str(audio_concat_path),
+                        '-c:v', 'copy', '-c:a', 'aac',
+                        '-shortest', str(final_video_path)],
+                       check=True, capture_output=True)
+        return {"type": "video", "video_path": str(final_video_path), "key": key}
     finally:
+        # clean-up temps except final video
+        for f in temp_files:
+            f.unlink(missing_ok=True)
 # ─────────────────────────────────────────────────────────────────────────────
 # UI & MAIN WORKFLOW
 # ─────────────────────────────────────────────────────────────────────────────
 mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
+# Video options
+video_style       = "professional illustration"
+animate_charts_on = True
 if mode == "Video Narrative":
     with st.sidebar:
         st.subheader("🎬 Video Options")
+        video_style = st.selectbox("Visual Style",
+            ["professional illustration", "minimalist infographic",
+             "photorealistic", "cinematic", "data visualization aesthetic"])
+        animate_charts_on = st.toggle("Animate Charts", value=True)
+        st.caption("Disabling animation uses static slides with a quick fade-in.")
+# Common file uploader
 upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
 if upl:
     df_prev, _ = load_dataframe_safely(upl.getvalue(), upl.name)
 if st.button("🚀 Generate", type="primary"):
     if not upl:
+        st.warning("Please upload a file first."); st.stop()
+    bkey = sha1_bytes(b"".join([upl.getvalue(), mode.encode(),
+                                ctx.encode(), video_style.encode(),
+                                str(animate_charts_on).encode()]))
     if mode == "Report (PDF)":
+        with st.spinner("Generating report and charts…"):
             bundle = generate_report_assets(bkey, upl.getvalue(), upl.name, ctx)
+    else:  # Video
+        bundle = generate_video_assets(bkey, upl.getvalue(), upl.name, ctx,
+                                       video_style, animate_charts=animate_charts_on)
     st.session_state.bundle = bundle
     st.rerun()
+# ─────────────────────────────────────────────────────────────────────────────
+# DISPLAY AREA
+# ─────────────────────────────────────────────────────────────────────────────
+if st.session_state.get("bundle"):
     bundle = st.session_state.bundle
     if bundle.get("type") == "report":
         with st.expander("View Report", expanded=True):
             if bundle["preview"]:
                 st.markdown(bundle["preview"], unsafe_allow_html=True)
                 c1, c2 = st.columns(2)
                 with c1:
+                    st.download_button("Download PDF", bundle["pdf"],
+                                       "business_report.pdf", "application/pdf",
+                                       use_container_width=True)
                 with c2:
                     if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
+                        report_text = re.sub(r'<[^>]+>', '', bundle["report_md"])
                         audio, mime = deepgram_tts(report_text)
                         if audio:
                             st.audio(audio, format=mime)
             with open(video_path, "rb") as f:
                 st.video(f.read())
             with open(video_path, "rb") as f:
+                st.download_button("Download Video", f,
+                                   f"sozo_narrative_{bundle['key'][:8]}.mp4",
+                                   "video/mp4")
         else:
             st.error("Video file could not be found or generation failed.")