Adk-Analyst2

Sleeping

App Files Files Community

rairo commited on Jul 5, 2025

Commit

bfcb421

verified ·

1 Parent(s): c4cc4e0

Update app.py

Browse files

Files changed (1) hide show

app.py +352 -157

app.py CHANGED Viewed

@@ -1,10 +1,16 @@
 ##############################################################################
-# Sozo Business Studio · 07-Jul-2025 update                                  #
-# Fix image-animation issues, clean narrator text, drop visual-style UI      #
 ##############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
 from pathlib import Path
 from typing import Tuple, Dict, List
 import streamlit as st
 import pandas as pd
 import numpy as np
@@ -19,15 +25,17 @@ import cv2
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
-from google import genai, genai as _g
-from google.genai import types  # for GenerateContentConfig
-# ─── CONFIG ────────────────────────────────────────────────────────────────
 st.set_page_config(page_title="Sozo Business Studio", layout="wide")
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
-FPS, WIDTH, HEIGHT  = 24, 1280, 720
 MAX_CHARTS, VIDEO_SCENES = 5, 5
 API_KEY = os.getenv("GEMINI_API_KEY")
@@ -35,11 +43,13 @@ if not API_KEY:
     st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
 GEM = genai.Client(api_key=API_KEY)
-DG_KEY = os.getenv("DEEPGRAM_API_KEY")
 st.session_state.setdefault("bundle", None)
 sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
-# ─── HELPERS ───────────────────────────────────────────────────────────────
 def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
     try:
         ext = Path(name).suffix.lower()
@@ -49,12 +59,23 @@ def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
         if df.empty or len(df.columns) == 0:
             raise ValueError("No usable data found")
         return df, None
-    except Exception as e: return None, str(e)
 @st.cache_data(show_spinner=False)
 def deepgram_tts(txt: str) -> Tuple[bytes, str]:
-    if not DG_KEY or not txt: return None, None
-    txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000]
     try:
         r = requests.post(
             "https://api.deepgram.com/v1/speak",
@@ -63,176 +84,350 @@ def deepgram_tts(txt: str) -> Tuple[bytes, str]:
             json={"text": txt}, timeout=30)
         r.raise_for_status()
         return r.content, r.headers.get("Content-Type", "audio/mpeg")
-    except Exception: return None, None
-def silence_mp3(dur: float, path: Path):
-    subprocess.run(["ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono",
-                    "-t", f"{dur:.3f}", "-q:a", "9", str(path)],
-                   check=True, capture_output=True)
-def audio_len(p: str) -> float:
     try:
-        out = subprocess.run(["ffprobe","-v","error","-show_entries","format=duration",
-                              "-of","default=nw=1:nk=1", p],
-                             stdout=subprocess.PIPE,text=True,check=True).stdout.strip()
-        return float(out)
-    except Exception: return 5.0
-TAG_RE = re.compile(r'[<[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>"\'\]]+?)["\']?\s*[>\]]', re.I)
-extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")))
 re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I)
-def clean_narr(text: str) -> str:
-    text = re_scene.sub("", text)
-    text = TAG_RE.sub("", text)
-    text = re.sub(r"\s*\([^)]*\)", "", text)       # remove parentheticals
-    text = re.sub(r"\s{2,}", " ", text).strip()
-    return text
-# ─── PDF helper unchanged – omitted for brevity (keep from previous script) ─
-# ─── IMAGE PLACEHOLDER (rarely used now) ───────────────────────────────────
 def placeholder_img() -> Image.Image:
-    return Image.new("RGB", (WIDTH, HEIGHT), (230,230,230))
-# ─── CHART ANIMATION (init_func+artists) ───────────────────────────────────
-def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path) -> str:
-    ctype,*rest=[s.strip().lower() for s in desc.split("|",1)]; ctype=ctype or"bar"
-    ttl=rest[0] if rest else desc
-    if ctype=="pie":
-        cat=df.select_dtypes(exclude="number").columns[0]
-        num=df.select_dtypes(include="number").columns[0]
-        pdf=df.groupby(cat)[num].sum().sort_values(ascending=False).head(8)
-    elif ctype in("bar","hist"):
-        num=df.select_dtypes(include="number").columns[0]
-        pdf=df[num]
-    else:
-        cols=df.select_dtypes(include="number").columns[:2]
-        pdf=df[list(cols)].sort_index()
-    fig,ax=plt.subplots(figsize=(WIDTH/100,HEIGHT/100),dpi=100)
-    frames=max(10,min(30,int(dur*FPS)))
-    if ctype=="pie":
-        wedges,_=ax.pie(pdf,labels=pdf.index,startangle=90);ax.set_title(ttl)
-        def init(): [w.set_alpha(0) for w in wedges]; return wedges
-        def update(i): a=i/frames;[w.set_alpha(a) for w in wedges]; return wedges
-    elif ctype=="bar":
-        bars=ax.bar(pdf.index,np.zeros_like(pdf.values),color="#1f77b4");ax.set_ylim(0,pdf.max()*1.1);ax.set_title(ttl)
-        def init(): return bars
-        def update(i): f=i/frames;[b.set_height(h*f) for b,h in zip(bars,pdf.values)]; return bars
-    elif ctype=="hist":
-        n,bins,patch=ax.hist(pdf,bins=20,color="#1f77b4",alpha=0);ax.set_title(ttl)
-        def init(): [p.set_alpha(0) for p in patch]; return patch
-        def update(i): a=i/frames;[p.set_alpha(a) for p in patch]; return patch
-    elif ctype=="scatter":
-        pts=ax.scatter(pdf.iloc[:,0],pdf.iloc[:,1],s=10,alpha=0);ax.set_title(ttl);ax.grid(alpha=.3)
-        def init(): pts.set_alpha(0); return [pts]
-        def update(i): pts.set_alpha(i/frames); return [pts]
     else:  # line
-        line,=ax.plot([],[],lw=2);x=pdf.iloc[:,0] if pdf.shape[1]>1 else np.arange(len(pdf))
-        y=pdf.iloc[:,1] if pdf.shape[1]>1 else pdf.iloc[:,0]
-        ax.set_xlim(x.min(),x.max());ax.set_ylim(y.min(),y.max());ax.set_title(ttl);ax.grid(alpha=.3)
-        def init(): line.set_data([],[]); return [line]
-        def update(i): k=max(2,int(len(x)*i/frames)); line.set_data(x[:k],y.iloc[:k]); return [line]
-    anim=FuncAnimation(fig,update,init_func=init,frames=frames,blit=True,interval=1000/FPS)
-    anim.save(str(out),writer=FFMpegWriter(fps=FPS,metadata={'artist':'Sozo'}),dpi=144)
-    plt.close(fig); return str(out)
-def safe_chart(desc,df,dur,out):
-    try: return animate_chart(desc,df,dur,out)
     except Exception:
-        with plt.ioff(): df.plot(ax=plt.gca()); p=Path(tempfile.gettempdir())/f"{uuid.uuid4()}.png"
-        plt.savefig(p); plt.close(); img=cv2.resize(cv2.imread(str(p)),(WIDTH,HEIGHT))
-        blank=placeholder_img(); cv2.imwrite(str(p),cv2.cvtColor(np.array(blank),cv2.COLOR_RGB2BGR))
-        return animate_image_fade(img,dur,out)
-def animate_image_fade(img_cv2,dur,out,fps=FPS):
-    frames=max(int(dur*fps),fps); video=cv2.VideoWriter(str(out),cv2.VideoWriter_fourcc(*"mp4v"),fps,(WIDTH,HEIGHT))
-    blank=np.full_like(img_cv2,255)
-    for i in range(frames):
-        a=i/frames; video.write(cv2.addWeighted(blank,1-a,img_cv2,a,0))
-    video.release(); return str(out)
 def concat_media(paths: List[str], out: Path, kind="video"):
-    lst=Path(tempfile.gettempdir())/f"{uuid.uuid4()}.txt"
     with lst.open("w") as f:
         for p in paths:
-            if Path(p).exists(): f.write(f"file '{Path(p).resolve()}'\n")
-    subprocess.run(["ffmpeg","-y","-f","concat","-safe","0","-i",str(lst),
-                    "-c:v" if kind=="video" else "-c:a","copy",str(out)],
-                   check=True,capture_output=True)
     lst.unlink(missing_ok=True)
-# ─── REPORT & VIDEO generators (prompt tweaks) ─────────────────────────────
-def story_prompt(ctx_dict):
-    cols=", ".join(ctx_dict["columns"][:6])
     return (
-      f"Create a script for a short business video with exactly {VIDEO_SCENES} scenes.\n"
-      "Each scene **must** follow this template:\n"
-      "• 1–2 sentences of narration (no scene labels, no chart descriptions).\n"
-      '• Exactly one chart tag such as <generate_chart: "bar | total revenue by month">.\n'
-      "Valid chart types: bar, pie, line, scatter, hist.\n"
-      f"Use columns ({cols}) from the dataset; pick sensible aggregations.\n"
-      "Do **not** mention the tag or chart in the narration.\n"
-      "Separate scenes with [SCENE_BREAK]."
     )
-def build_story(df,ctx):
-    llm=ChatGoogleGenerativeAI(model="gemini-2.0-flash",google_api_key=API_KEY,temperature=0.2)
-    ctx_dict={"shape":df.shape,"columns":list(df.columns),"user_ctx":ctx or"General business analysis"}
-    return llm.invoke(story_prompt(ctx_dict)).content
-# UI ========================================================================
-upl=st.file_uploader("Upload CSV or Excel",type=["csv","xlsx","xls"])
 if upl:
-    df,_=load_dataframe_safely(upl.getvalue(),upl.name)
-    with st.expander("Data preview"): st.dataframe(df.head())
-ctx=st.text_area("Business context or specific instructions (optional)")
-if st.button("🚀 Generate video",type="primary",disabled=not upl):
-    key=sha1_bytes(b"".join([upl.getvalue(),ctx.encode()]))
-    df,_=load_dataframe_safely(upl.getvalue(),upl.name)
-    # 1⎯ Build script --------------------------------------------------------
-    script=build_story(df,ctx)
-    scenes=[s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
-    vid_parts,aud_parts,tmp=[],[],[]
-    for idx,sc in enumerate(scenes[:VIDEO_SCENES]):
-        st.progress((idx+1)/VIDEO_SCENES,text=f"Scene {idx+1}/{VIDEO_SCENES}")
-        descs=extract_chart_tags(sc)
-        narr = clean_narr(sc)
-        aud_b, _ = deepgram_tts(narr)
-        mp3 = Path(tempfile.gettempdir())/f"{uuid.uuid4()}.mp3"
-        if aud_b: mp3.write_bytes(aud_b); dur=audio_len(str(mp3))
-        else: dur=5.0; silence_mp3(dur,mp3)
-        aud_parts.append(str(mp3)); tmp.append(mp3)
-        mp4 = Path(tempfile.gettempdir())/f"{uuid.uuid4()}.mp4"
-        if descs: safe_chart(descs[0],df,dur,mp4)
-        else: img=cv2.cvtColor(np.array(placeholder_img()),cv2.COLOR_RGB2BGR); animate_image_fade(img,dur,mp4)
-        vid_parts.append(str(mp4)); tmp.append(mp4)
-    silent=Path(tempfile.gettempdir())/f"{uuid.uuid4()}.mp4"
-    concat_media(vid_parts,silent,"video")
-    mix=Path(tempfile.gettempdir())/f"{uuid.uuid4()}.mp3"
-    concat_media(aud_parts,mix,"audio")
-    final=Path(tempfile.gettempdir())/f"{key}.mp4"
-    subprocess.run(["ffmpeg","-y","-i",str(silent),"-i",str(mix),"-c:v","copy","-c:a","aac",
-                    "-shortest",str(final)],check=True,capture_output=True)
-    for p in tmp+[silent,mix]: p.unlink(missing_ok=True)
-    st.session_state.bundle={"video":str(final),"key":key}; st.rerun()
-# ─── OUTPUT ────────────────────────────────────────────────────────────────
-if "bundle" in st.session_state:
-    v=st.session_state.bundle["video"]
-    st.video(open(v,"rb").read())
-    st.download_button("Download video",open(v,"rb"),
-                       f"sozo_{st.session_state.bundle['key'][:8]}.mp4","video/mp4")

 ##############################################################################
+# Sozo Business Studio · 09-Jul-2025                                        #
+#  • Clean narrator text (no scene labels / chart talk)                      #
+#  • Enforce chart-tag-driven visuals (bar, pie, line, scatter, hist)        #
+#  • Fix image generation (Gemini Flash preview) & placeholder fallback      #
+#  • Animation starts blank; artists returned for blit=True                  #
+#  • Silent-audio fallback keeps mux lengths equal                           #
 ##############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
 from pathlib import Path
 from typing import Tuple, Dict, List
 import streamlit as st
 import pandas as pd
 import numpy as np
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
+from google import genai
+from google.genai import types   # GenerateContentConfig for image calls
+# ────────────────────────────────────────────────────────────────────────────
+# CONFIG
+# ────────────────────────────────────────────────────────────────────────────
 st.set_page_config(page_title="Sozo Business Studio", layout="wide")
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
+FPS, WIDTH, HEIGHT = 24, 1280, 720
 MAX_CHARTS, VIDEO_SCENES = 5, 5
 API_KEY = os.getenv("GEMINI_API_KEY")
     st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
 GEM = genai.Client(api_key=API_KEY)
+DG_KEY = os.getenv("DEEPGRAM_API_KEY")  # optional narration
 st.session_state.setdefault("bundle", None)
 sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
+# ────────────────────────────────────────────────────────────────────────────
+# HELPERS
+# ────────────────────────────────────────────────────────────────────────────
 def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
     try:
         ext = Path(name).suffix.lower()
         if df.empty or len(df.columns) == 0:
             raise ValueError("No usable data found")
         return df, None
+    except Exception as e:
+        return None, str(e)
+def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
+    safe = df.copy()
+    for c in safe.columns:
+        if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
+            safe[c] = safe[c].astype(safe[c].dtype.name.lower())
+    return safe
 @st.cache_data(show_spinner=False)
 def deepgram_tts(txt: str) -> Tuple[bytes, str]:
+    if not DG_KEY or not txt:
+        return None, None
+    txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000]  # Deepgram text hygiene
     try:
         r = requests.post(
             "https://api.deepgram.com/v1/speak",
             json={"text": txt}, timeout=30)
         r.raise_for_status()
         return r.content, r.headers.get("Content-Type", "audio/mpeg")
+    except Exception:
+        return None, None
+def generate_silence_mp3(duration: float, out: Path):
+    subprocess.run(
+        ["ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono",
+         "-t", f"{duration:.3f}", "-q:a", "9", str(out)],
+        check=True, capture_output=True)
+def audio_duration(path: str) -> float:
     try:
+        res = subprocess.run(
+            ["ffprobe", "-v", "error", "-show_entries", "format=duration",
+             "-of", "default=nw=1:nk=1", path],
+            text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
+        return float(res.stdout.strip())
+    except Exception:
+        return 5.0
+TAG_RE = re.compile(
+    r'[<[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>"\'\]]+?)["\']?\s*[>\]]',
+    re.I)
+extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip()
+                                                  for m in TAG_RE.finditer(t or "")))
 re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I)
+def clean_narration(txt: str) -> str:
+    txt = re_scene.sub("", txt)
+    txt = TAG_RE.sub("", txt)
+    txt = re.sub(r"\s*\([^)]*\)", "", txt)          # remove parentheticals
+    txt = re.sub(r"\s{2,}", " ", txt).strip()
+    return txt
+# ─── PDF GENERATION (unchanged logic) ───────────────────────────────────────
+class PDF(FPDF, HTMLMixin):
+    pass
+def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
+    html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(
+        TAG_RE.sub(lambda m: f'<img src="{charts.get(m.group("d").strip(), "")}">', md)
+    )
+    pdf = PDF()
+    pdf.set_auto_page_break(True, margin=15)
+    pdf.add_page()
+    pdf.set_font("Arial", "B", 18)
+    pdf.cell(0, 12, "AI-Generated Business Report", ln=True)
+    pdf.ln(3)
+    pdf.set_font("Arial", "", 11)
+    pdf.write_html(html)
+    return bytes(pdf.output(dest="S"))
+# ─── IMAGE GENERATION & PLACEHOLDER ────────────────────────────────────────
 def placeholder_img() -> Image.Image:
+    return Image.new("RGB", (WIDTH, HEIGHT), (230, 230, 230))
+def generate_image_from_prompt(prompt: str) -> Image.Image:
+    model_main = "gemini-2.0-flash-exp-image-generation"
+    model_fallback = "gemini-2.0-flash-preview-image-generation"
+    full_prompt = ("A clean business-presentation illustration: " + prompt)
+    def fetch(model_name):
+        res = GEM.models.generate_content(
+            model=model_name,
+            contents=full_prompt,
+            config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
+        )
+        for part in res.candidates[0].content.parts:
+            if getattr(part, "inline_data", None):
+                return Image.open(io.BytesIO(part.inline_data.data)).convert("RGB")
+        return None
+    try:
+        img = fetch(model_main) or fetch(model_fallback)
+        return img if img else placeholder_img()
+    except Exception:
+        return placeholder_img()
+# ─── ANIMATION HELPERS ─────────────────────────────────────────────────────
+def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path, fps: int = FPS) -> str:
+    frames = max(int(dur * fps), fps)
+    vid = cv2.VideoWriter(str(out), cv2.VideoWriter_fourcc(*"mp4v"), fps, (WIDTH, HEIGHT))
+    blank = np.full_like(img_cv2, 255)
+    for i in range(frames):
+        a = i / frames
+        vid.write(cv2.addWeighted(blank, 1 - a, img_cv2, a, 0))
+    vid.release()
+    return str(out)
+def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path, fps: int = FPS) -> str:
+    ctype, *rest = [s.strip().lower() for s in desc.split("|", 1)]
+    ctype = ctype or "bar"
+    title = rest[0] if rest else desc
+    # aggregate or prepare data
+    if ctype == "pie":
+        cat = df.select_dtypes(exclude="number").columns[0]
+        num = df.select_dtypes(include="number").columns[0]
+        pdf = df.groupby(cat)[num].sum().sort_values(ascending=False).head(8)
+    elif ctype in ("bar", "hist"):
+        num = df.select_dtypes(include="number").columns[0]
+        pdf = df[num]
+    else:  # line/scatter
+        cols = df.select_dtypes(include="number").columns[:2]
+        pdf = df[list(cols)].sort_index()
+    fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
+    frames = max(10, min(30, int(dur * fps)))
+    if ctype == "pie":
+        wedges, _ = ax.pie(pdf, labels=pdf.index, startangle=90)
+        ax.set_title(title)
+        def init():
+            for w in wedges: w.set_alpha(0)
+            return wedges
+        def update(i):
+            a = i / frames
+            for w in wedges: w.set_alpha(a)
+            return wedges
+    elif ctype == "bar":
+        bars = ax.bar(pdf.index, np.zeros_like(pdf.values), color="#1f77b4")
+        ax.set_ylim(0, pdf.max() * 1.1)
+        ax.set_title(title)
+        def init():
+            return bars
+        def update(i):
+            f = i / frames
+            for b, h in zip(bars, pdf.values):
+                b.set_height(h * f)
+            return bars
+    elif ctype == "hist":
+        _, _, patches = ax.hist(pdf, bins=20, color="#1f77b4", alpha=0)
+        ax.set_title(title)
+        def init():
+            for p in patches: p.set_alpha(0)
+            return patches
+        def update(i):
+            a = i / frames
+            for p in patches: p.set_alpha(a)
+            return patches
+    elif ctype == "scatter":
+        pts = ax.scatter(pdf.iloc[:, 0], pdf.iloc[:, 1], s=10, alpha=0)
+        ax.set_title(title)
+        ax.grid(alpha=0.3)
+        def init():
+            pts.set_alpha(0)
+            return [pts]
+        def update(i):
+            pts.set_alpha(i / frames)
+            return [pts]
     else:  # line
+        line, = ax.plot([], [], lw=2)
+        x_full = pdf.iloc[:, 0] if pdf.shape[1] > 1 else np.arange(len(pdf))
+        y_full = pdf.iloc[:, 1] if pdf.shape[1] > 1 else pdf.iloc[:, 0]
+        ax.set_xlim(x_full.min(), x_full.max())
+        ax.set_ylim(y_full.min(), y_full.max())
+        ax.set_title(title)
+        ax.grid(alpha=0.3)
+        def init():
+            line.set_data([], [])
+            return [line]
+        def update(i):
+            k = max(2, int(len(x_full) * i / frames))
+            line.set_data(x_full[:k], y_full.iloc[:k])
+            return [line]
+    anim = FuncAnimation(
+        fig, update, init_func=init, frames=frames,
+        blit=True, interval=1000 / fps)
+    anim.save(str(out), writer=FFMpegWriter(fps=fps, metadata={'artist': 'Sozo'}), dpi=144)
+    plt.close(fig)
+    return str(out)
+def safe_chart(desc, df, dur, out):
+    try:
+        return animate_chart(desc, df, dur, out)
     except Exception:
+        with plt.ioff():
+            df.plot(ax=plt.gca())
+            tmp_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
+            plt.savefig(tmp_png, bbox_inches="tight")
+            plt.close()
+        img = cv2.resize(cv2.imread(str(tmp_png)), (WIDTH, HEIGHT))
+        return animate_image_fade(img, dur, out)
 def concat_media(paths: List[str], out: Path, kind="video"):
+    if not paths:
+        return
+    lst = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
     with lst.open("w") as f:
         for p in paths:
+            if Path(p).exists():
+                f.write(f"file '{Path(p).resolve()}'\n")
+    subprocess.run(
+        ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(lst),
+         "-c:v" if kind == "video" else "-c:a", "copy", str(out)],
+        check=True, capture_output=True)
     lst.unlink(missing_ok=True)
+# ────────────────────────────────────────────────────────────────────────────
+# PROMPT HELPERS
+# ────────────────────────────────────────────────────────────────────────────
+def build_story_prompt(ctx_dict):
+    cols = ", ".join(ctx_dict["columns"][:6])
     return (
+        f"Create a script for a short business video with exactly {VIDEO_SCENES} scenes.\n"
+        "Each scene must include:\n"
+        "• 1–2 sentences of narration (no scene labels, no chart descriptions).\n"
+        '• Exactly one chart tag, e.g. <generate_chart: "bar | total revenue by month">.\n'
+        "Valid chart types: bar, pie, line, scatter, hist.\n"
+        f"Use the dataset columns ({cols}) with sensible aggregations.\n"
+        "Separate scenes with [SCENE_BREAK]."
     )
+# ────────────────────────────────────────────────────────────────────────────
+# VIDEO GENERATION
+# ──────────────────────────────��─────────────────────────────────────────────
+def generate_video(buf: bytes, name: str, ctx: str, key: str):
+    try:
+        subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
+    except Exception:
+        st.error("🔴 FFmpeg not available — cannot render video."); return None
+    df, err = load_dataframe_safely(buf, name)
+    if err:
+        st.error(err); return None
+    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
+                                 google_api_key=API_KEY, temperature=0.2)
+    ctx_dict = {
+        "shape": df.shape,
+        "columns": list(df.columns),
+        "user_ctx": ctx or "General business analysis",
+    }
+    script = llm.invoke(build_story_prompt(ctx_dict)).content
+    scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
+    video_parts, audio_parts, temps = [], [], []
+    for idx, sc in enumerate(scenes[:VIDEO_SCENES]):
+        st.progress((idx + 1) / VIDEO_SCENES,
+                    text=f"Rendering Scene {idx + 1}/{VIDEO_SCENES}")
+        descs = extract_chart_tags(sc)
+        narrative = clean_narration(sc)
+        # ----- audio ---------------------------------------------------------
+        audio_bytes, _ = deepgram_tts(narrative)
+        mp3_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
+        if audio_bytes:
+            mp3_path.write_bytes(audio_bytes)
+            dur = audio_duration(str(mp3_path))
+        else:
+            dur = 5.0
+            generate_silence_mp3(dur, mp3_path)
+        audio_parts.append(str(mp3_path)); temps.append(mp3_path)
+        # ----- visual --------------------------------------------------------
+        mp4_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
+        if descs:
+            safe_chart(descs[0], df, dur, mp4_path)
+        else:
+            img = generate_image_from_prompt(narrative)
+            img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
+            animate_image_fade(img_cv, dur, mp4_path)
+        video_parts.append(str(mp4_path)); temps.append(mp4_path)
+    # ----- concatenate -------------------------------------------------------
+    silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
+    concat_media(video_parts, silent_vid, "video")
+    audio_mix = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
+    concat_media(audio_parts, audio_mix, "audio")
+    final_vid = Path(tempfile.gettempdir()) / f"{key}.mp4"
+    subprocess.run(
+        ["ffmpeg", "-y", "-i", str(silent_vid), "-i", str(audio_mix),
+         "-c:v", "copy", "-c:a", "aac", "-shortest", str(final_vid)],
+        check=True, capture_output=True)
+    for p in temps + [silent_vid, audio_mix]:
+        p.unlink(missing_ok=True)
+    return str(final_vid)
+# ────────────────────────────────────────────────────────────────────────────
+# UI
+# ────────────────────────────────────────────────────────────────────────────
+upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
 if upl:
+    df_preview, _ = load_dataframe_safely(upl.getvalue(), upl.name)
+    with st.expander("📊 Data Preview"):
+        st.dataframe(arrow_df(df_preview.head()))
+ctx = st.text_area("Business context or specific instructions (optional)")
+if st.button("🚀 Generate Video", type="primary", disabled=not upl):
+    key = sha1_bytes(b"".join([upl.getvalue(), ctx.encode()]))
+    st.session_state.bundle = None
+    with st.spinner("Generating…"):
+        path = generate_video(upl.getvalue(), upl.name, ctx, key)
+        if path:
+            st.session_state.bundle = {"video_path": path, "key": key}
+    st.rerun()
+# ────────────────────────────────────────────────────────────────────────────
+# OUTPUT
+# ────────────────────────────────────────────────────────────────────────────
+if bundle := st.session_state.get("bundle"):
+    vp = bundle["video_path"]
+    if Path(vp).exists():
+        with open(vp, "rb") as f:
+            st.video(f.read())
+        with open(vp, "rb") as f:
+            st.download_button("Download Video", f,
+                               f"sozo_narrative_{bundle['key'][:8]}.mp4",
+                               "video/mp4")
+    else:
+        st.error("Video file missing – generation failed.")