Adk-Analyst2

Sleeping

App Files Files Community

rairo commited on Jul 5, 2025

Commit

c4cc4e0

verified ·

1 Parent(s): 0bf64a2

Update app.py

Browse files

Files changed (1) hide show

app.py +160 -526

app.py CHANGED Viewed

@@ -1,22 +1,10 @@
 ##############################################################################
-# Sozo Business Studio · AI transforms business data into compelling stories #
-#             (video branch-with-animation • PDF branch untouched)           #
 ##############################################################################
-# DROP-IN REPLACEMENT — 07-Jul-2025
-#
-# ▸ Fixes
-#   1. Correct Gemini image-generation call (fallback placeholder kept)
-#   2. Narration text now strips scene labels & chart tags
-#   3. Animation initialises from blank frame and returns artists for blit
-#   4. Robust graceful-failure path keeps video & audio lengths aligned
-#
-# ────────────────────────────────────────────────────────────────────────────
-import os, re, json, hashlib, uuid, base64, io, tempfile, wave, requests, subprocess
 from pathlib import Path
 from typing import Tuple, Dict, List
-# ─── Third-party ────────────────────────────────────────────────────────────
 import streamlit as st
 import pandas as pd
 import numpy as np
@@ -29,576 +17,222 @@ from markdown_it import MarkdownIt
 from PIL import Image
 import cv2
-try:
-    # optional helper for bar-race
-    import bar_chart_race as bcr
-    HAS_BCR = True
-except ImportError:
-    HAS_BCR = False
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
-from google import genai
-from google.genai import types  # needed only for image generation call
-# ────────────────────────────────────────────────────────────────────────────
-# CONFIG & CONSTANTS
-# ────────────────────────────────────────────────────────────────────────────
 st.set_page_config(page_title="Sozo Business Studio", layout="wide")
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
-FPS, WIDTH, HEIGHT  = 24, 1280, 720               # video parameters
 MAX_CHARTS, VIDEO_SCENES = 5, 5
 API_KEY = os.getenv("GEMINI_API_KEY")
 if not API_KEY:
     st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
-GEM  = genai.Client(api_key=API_KEY)               # keep original client usage
-DG_KEY = os.getenv("DEEPGRAM_API_KEY")             # optional (narration)
 st.session_state.setdefault("bundle", None)
 sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
-# ────────────────────────────────────────────────────────────────────────────
-# BASIC HELPERS
-# ────────────────────────────────────────────────────────────────────────────
 def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
-    """Attempt CSV/Excel load - return (df, err) tuple."""
     try:
         ext = Path(name).suffix.lower()
-        df = pd.read_excel(io.BytesIO(buf)) if ext in (".xlsx", ".xls") else pd.read_csv(io.BytesIO(buf))
         df.columns = df.columns.astype(str).str.strip()
         df = df.dropna(how="all")
         if df.empty or len(df.columns) == 0:
             raise ValueError("No usable data found")
         return df, None
-    except Exception as e:
-        return None, str(e)
-def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
-    """Return a Streamlit-friendly df with nullable dtypes for Arrow."""
-    safe = df.copy()
-    for c in safe.columns:
-        if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
-            safe[c] = safe[c].astype(safe[c].dtype.name.lower())
-    return safe
 @st.cache_data(show_spinner=False)
-def deepgram_tts(text: str) -> Tuple[bytes, str]:
-    """Call Deepgram TTS, return (audio_bytes, mime) or (None, None)."""
-    if not DG_KEY or not text:
-        return None, None
-    text = re.sub(r"[^\w\s.,!?;:-]", "", text)[:1000]          # Deepgram max tokens
     try:
         r = requests.post(
             "https://api.deepgram.com/v1/speak",
             params={"model": "aura-asteria-en"},
             headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"},
-            json={"text": text},
-            timeout=30,
-        )
         r.raise_for_status()
         return r.content, r.headers.get("Content-Type", "audio/mpeg")
-    except Exception:
-        return None, None
-def generate_silence(duration: float, out_path: Path) -> None:
-    """Generate a silent MP3 of exact duration using ffmpeg."""
-    subprocess.run(
-        ["ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono",
-         "-t", f"{duration:.3f}", "-q:a", "9", str(out_path)],
-        check=True, capture_output=True
-    )
-def get_audio_duration(mp3_path: str) -> float:
-    """Return duration seconds via ffprobe; fallback 5.0."""
     try:
-        out = subprocess.run(
-            ["ffprobe", "-v", "error", "-show_entries", "format=duration",
-             "-of", "default=noprint_wrappers=1:nokey=1", mp3_path],
-            text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True
-        ).stdout.strip()
         return float(out)
-    except Exception:
-        return 5.0
-TAG_RE   = re.compile(r'[<[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>"\'\]]+?)["\']?\s*[>\]]', re.I)
 extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")))
-def repl_tags(txt: str, mp: Dict[str, str], fn):
-    """Replace chart tags using map + fn()."""
-    return TAG_RE.sub(lambda m: fn(mp[m.group("d").strip()]) if m.group("d").strip() in mp else m.group(0), txt)
-# ────────────────────────────────────────────────────────────────────────────
-# PDF GENERATION (UNCHANGED)
-# ────────────────────────────────────────────────────────────────────────────
-class PDF(FPDF, HTMLMixin):
-    pass
-def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
-    html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(
-        repl_tags(md.replace("•", "*"), charts, lambda p: f'<img src="{p}">')
-    )
-    pdf = PDF(); pdf.set_auto_page_break(True, margin=15)
-    pdf.add_page(); pdf.set_font("Arial", "B", 18)
-    pdf.cell(0, 12, "AI-Generated Business Report", ln=True); pdf.ln(3)
-    pdf.set_font("Arial", "", 11); pdf.write_html(html)
-    return bytes(pdf.output(dest="S"))
-# ────────────────────────────────────────────────────────────────────────────
-# IMAGE GENERATION
-# ────────────────────────────────────────────────────────────────────────────
-def generate_image_from_prompt(prompt: str, style: str) -> Image.Image:
-    """
-    Use Gemini native image generation; fallback to placeholder.
-    Keeps default model name but gracefully tries preview model if needed.
-    """
-    model_name = "gemini-2.0-flash-exp-image-generation"   # ✳ keep original
-    full_prompt = (
-        "A professional, clean, illustrative image for a business presentation: "
-        f"{prompt}, in the style of {style}."
-    )
-    def _decode(parts):
-        for part in parts:
-            if getattr(part, "inline_data", None) is not None:
-                return Image.open(io.BytesIO(part.inline_data.data)).convert("RGB")
-        return None
-    try:
-        response = GEM.models.generate_content(
-            model=model_name,
-            contents=full_prompt,
-            config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
-        )
-        img = _decode(response.candidates[0].content.parts)
-        if img:
-            return img
-    except Exception as e:
-        # try preview SKU once
-        try:
-            response = GEM.models.generate_content(
-                model="gemini-2.0-flash-preview-image-generation",
-                contents=full_prompt,
-                config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
-            )
-            img = _decode(response.candidates[0].content.parts)
-            if img:
-                return img
-        except Exception:
-            st.warning(f"Illustrative image generation failed: {e}. Using placeholder.")
-    return Image.new("RGB", (WIDTH, HEIGHT), color=(230, 230, 230))
-# ────────────────────────────────────────────────────────────────────────────
-# NARRATION CLEAN-UP
-# ────────────────────────────────────────────────────────────────────────────
 re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I)
-def clean_narration(text: str) -> str:
-    """Strip scene labels, chart tags, and excess whitespace."""
     text = re_scene.sub("", text)
     text = TAG_RE.sub("", text)
     text = re.sub(r"\s{2,}", " ", text).strip()
     return text
-# ────────────────────────────────────────────────────────────────────────────
-# GENERIC ANIMATION HELPERS (VIDEO PATH ONLY)
-# ────────────────────────────────────────────────────────────────────────────
-def animate_image_fade(img_cv2: np.ndarray, duration: float, out_path: Path, fps: int = FPS) -> str:
-    frames = max(int(duration * fps), fps)                    # at least 1 s
-    video  = cv2.VideoWriter(str(out_path), cv2.VideoWriter_fourcc(*"mp4v"), fps, (WIDTH, HEIGHT))
-    blank  = np.full_like(img_cv2, 255)
-    for i in range(frames):
-        alpha = i / frames
-        frame = cv2.addWeighted(blank, 1 - alpha, img_cv2, alpha, 0)
-        video.write(frame)
-    video.release()
-    return str(out_path)
-def animate_chart(desc: str, df: pd.DataFrame, duration: float, out_path: Path, fps: int = FPS) -> str:
-    """
-    Build static figure then animate reveal (≤30 frames). Returns MP4 path.
-    Guaranteed to succeed; will raise to caller if fatal.
-    """
-    chart_type, *rest = [s.strip().lower() for s in desc.split("|", 1)]
-    chart_type = chart_type or "line"
-    title      = rest[0] if rest else desc
-    # === Prepare aggregated data ============================================
-    if chart_type == "pie":
-        cat = df.select_dtypes(exclude="number").columns[0]
-        num = df.select_dtypes(include="number").columns[0]
-        plot_df = df.groupby(cat)[num].sum().sort_values(ascending=False).head(8)
-    elif chart_type in ("bar", "hist"):
-        num     = df.select_dtypes(include="number").columns[0]
-        plot_df = df[num]
-    else:  # line / scatter
-        nums    = df.select_dtypes(include="number").columns[:2]
-        plot_df = df[list(nums)].sort_index()
-    # === Build figure =======================================================
-    fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
-    frames  = max(10, min(30, int(duration * fps)))
-    artists = []
-    if chart_type == "pie":
-        wedges, _ = ax.pie(plot_df, labels=plot_df.index, startangle=90)
-        ax.set_title(title)
-        def init():
-            for w in wedges:
-                w.set_alpha(0)
-            return wedges
-        def update(i):
-            alpha = i / frames
-            for w in wedges:
-                w.set_alpha(alpha)
-            return wedges
-    elif chart_type == "bar":
-        bars = ax.bar(plot_df.index, np.zeros_like(plot_df.values), color="#1f77b4")
-        ax.set_ylim(0, plot_df.max() * 1.1); ax.set_title(title)
-        def init():
-            return bars
-        def update(i):
-            frac = i / frames
-            for b, h in zip(bars, plot_df.values):
-                b.set_height(h * frac)
-            return bars
-    elif chart_type == "hist":
-        n, bins, patches = ax.hist(plot_df, bins=20, color="#1f77b4", alpha=0)
-        ax.set_title(title)
-        def init():
-            for p in patches: p.set_alpha(0)
-            return patches
-        def update(i):
-            alpha = i / frames
-            for p in patches: p.set_alpha(alpha)
-            return patches
-    elif chart_type == "scatter":
-        pts = ax.scatter(plot_df.iloc[:, 0], plot_df.iloc[:, 1], s=10, alpha=0)
-        ax.set_title(title); ax.grid(alpha=0.3)
-        def init():
-            pts.set_alpha(0); return [pts]
-        def update(i):
-            pts.set_alpha(i / frames)
-            return [pts]
-    else:  # line
-        line, = ax.plot([], [], lw=2)
-        x_full = plot_df.iloc[:, 0] if chart_type == "line" and plot_df.shape[1] > 1 else np.arange(len(plot_df))
-        y_full = plot_df.iloc[:, 1] if plot_df.shape[1] > 1 else plot_df.iloc[:, 0]
-        ax.set_xlim(x_full.min(), x_full.max()); ax.set_ylim(y_full.min(), y_full.max())
-        ax.set_title(title); ax.grid(alpha=0.3)
-        def init():
-            line.set_data([], [])
-            return [line]
-        def update(i):
-            k = max(2, int(len(x_full) * i / frames))
-            line.set_data(x_full[:k], y_full.iloc[:k])
-            return [line]
-    anim = FuncAnimation(
-        fig, update, frames=frames, init_func=init,
-        blit=True, interval=1000 / fps
-    )
-    anim.save(str(out_path), writer=FFMpegWriter(fps=fps, metadata={'artist': 'Sozo'}), dpi=144)
-    plt.close(fig)
-    return str(out_path)
-def safe_animate_chart(desc: str, df: pd.DataFrame, duration: float, out_path: Path, fps: int = FPS) -> str:
-    """Wrapper that falls back to static-fade if chart animation fails."""
-    try:
-        return animate_chart(desc, df, duration, out_path, fps)
-    except Exception:
-        with plt.ioff():
-            df.plot(ax=plt.gca())
-            tmp_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
-            plt.savefig(tmp_png, bbox_inches="tight"); plt.close()
-        img = cv2.resize(cv2.imread(str(tmp_png)), (WIDTH, HEIGHT))
-        return animate_image_fade(img, duration, out_path, fps)
-def concat_media(inputs: List[str], output: Path, kind: str = "video") -> None:
-    """FFmpeg safe concat for audio or video."""
-    if not inputs:
-        return
-    lst = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
-    with lst.open("w") as f:
-        for p in inputs:
-            if Path(p).exists():
-                f.write(f"file '{Path(p).resolve()}'\n")
-    subprocess.run(
-        ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(lst),
-         "-c:v" if kind == "video" else "-c:a", "copy", str(output)],
-        check=True, capture_output=True
-    )
-    lst.unlink(missing_ok=True)
-# ────────────────────────────────────────────────────────────────────────────
-# REPORT GENERATION (unchanged model names)
-# ────────────────────────────────────────────────────────────────────────────
-def generate_report_assets(key, buf, name, ctx):
-    df, err = load_dataframe_safely(buf, name)
-    if err:
-        st.error(err); return None
-    llm = ChatGoogleGenerativeAI(
-        model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1
-    )
-    ctx_dict = {
-        "shape":   df.shape,
-        "columns": list(df.columns),
-        "user_ctx": ctx or "General business analysis",
-    }
-    report_prompt = (
-        "You are a senior business analyst. Write an executive-level Markdown report "
-        "with insights & recommendations.\n"
-        'When you need a visual, insert a tag like <generate_chart: "pie | sales by region"> '
-        "(chart_type first, then a description). "
-        "Valid chart_type values: line, bar, scatter, pie, hist.\n"
-        f"Data Context: {json.dumps(ctx_dict, indent=2)}"
-    )
-    md = llm.invoke(report_prompt).content
-    # ---------------------------------------------------------------- charts
-    chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
-    charts: Dict[str, str] = {}
-    if chart_descs:
-        agent = create_pandas_dataframe_agent(
-            llm=llm, df=df, verbose=False, allow_dangerous_code=True
-        )
-        for d in chart_descs:
-            with st.spinner(f"Generating chart: {d}"):
-                with plt.ioff():
-                    try:
-                        agent.run(f"Create a {d} with Matplotlib and save.")
-                        fig = plt.gcf()
-                        if fig.axes:
-                            p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
-                            fig.savefig(p, dpi=300, bbox_inches="tight", facecolor="white")
-                            charts[d] = str(p)
-                        plt.close("all")
-                    except Exception:
-                        plt.close("all")
-    preview = repl_tags(
-        md, charts,
-        lambda p: f'<img src="data:image/png;base64,{base64.b64encode(Path(p).read_bytes()).decode()}">'
-    )
-    pdf = build_pdf(md, charts)
-    return {
-        "type": "report",
-        "preview": preview,
-        "pdf": pdf,
-        "report_md": md,
-        "key": key,
-    }
-# ────────────────────────────────────────────────────────────────────────────
-# VIDEO GENERATION (animated charts)
-# ────────────────────────────────────────────────────────────────────────────
-def generate_video_assets(key, buf, name, ctx, style, animate_charts: bool = True):
-    try:
-        subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
     except Exception:
-        st.error("🔴 FFmpeg not available — cannot render video.")
-        return None
-    df, err = load_dataframe_safely(buf, name)
-    if err:
-        st.error(err); return None
-    llm = ChatGoogleGenerativeAI(
-        model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.2
-    )
-    ctx_dict = {
-        "shape":   df.shape,
-        "columns": list(df.columns),
-        "user_ctx": ctx or "General business analysis",
-    }
-    story_prompt = (
-        f"Create a script for a short business video with exactly {VIDEO_SCENES} scenes.\n"
-        "For each scene:\n"
-        "1. Provide 1–2 sentences of narration.\n"
-        '2. If a visual is helpful, add <generate_chart: "bar | monthly revenue"> (chart_type first).\n'
-        "3. Separate scenes with [SCENE_BREAK].\n"
-        f"Data Context: {json.dumps(ctx_dict, indent=2)}"
-    )
-    script  = llm.invoke(story_prompt).content
-    scenes  = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
-    video_parts: List[str] = []
-    audio_parts: List[str] = []
-    temps: List[Path]      = []
-    for idx, scene in enumerate(scenes[:VIDEO_SCENES]):
-        st.progress((idx + 1) / VIDEO_SCENES, text=f"Processing Scene {idx+1}/{VIDEO_SCENES}…")
-        chart_tags = extract_chart_tags(scene)
-        narrative  = clean_narration(repl_tags(scene, {}, lambda _: "")).strip()
-        # ─────────────── audio ────────────────────────────
-        audio_bytes, _ = deepgram_tts(narrative)
-        audio_path     = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
-        if audio_bytes:
-            audio_path.write_bytes(audio_bytes)
-            duration = get_audio_duration(str(audio_path))
-        else:
-            duration = 5.0
-            generate_silence(duration, audio_path)
-        audio_parts.append(str(audio_path)); temps.append(audio_path)
-        # ─────────────── visual ───────────────────────────
-        clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
-        if chart_tags and animate_charts:
-            safe_animate_chart(chart_tags[0], df, duration, clip_path, FPS)
-        else:
-            img = generate_image_from_prompt(narrative, style)
-            png_tmp = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
-            img.save(png_tmp); temps.append(png_tmp)
-            animate_image_fade(
-                cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR),
-                duration, clip_path, FPS
-            )
-        video_parts.append(str(clip_path)); temps.append(clip_path)
-    # ───────── concatenate ───────────────────────────────
-    silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
-    concat_media(video_parts, silent_vid, "video")
-    audio_mix  = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
-    concat_media(audio_parts, audio_mix, "audio")
-    final_vid  = Path(tempfile.gettempdir()) / f"{key}.mp4"
-    subprocess.run(
-        ["ffmpeg", "-y", "-i", str(silent_vid), "-i", str(audio_mix),
-         "-c:v", "copy", "-c:a", "aac", "-shortest", str(final_vid)],
-        check=True, capture_output=True,
     )
-    # cleanup tmp
-    for p in temps:
-        p.unlink(missing_ok=True)
-    silent_vid.unlink(missing_ok=True); audio_mix.unlink(missing_ok=True)
-    return {"type": "video", "video_path": str(final_vid), "key": key}
-# ────────────────────────────────────────────────────────────────────────────
-# UI
-# ────────────────────────────────────────────────────────────────────────────
-mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
-video_style, animate_charts_flag = "professional illustration", True
-if mode == "Video Narrative":
-    with st.sidebar:
-        st.subheader("🎬 Video Options")
-        video_style = st.selectbox(
-            "Visual Style",
-            ["professional illustration", "minimalist infographic", "photorealistic",
-             "cinematic", "data visualization aesthetic"]
-        )
-        animate_charts_flag = st.toggle("Animate Charts", value=True)
-        st.caption("Disable to use static slides with a simple fade-in.")
-upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
 if upl:
-    df_sample, _ = load_dataframe_safely(upl.getvalue(), upl.name)
-    with st.expander("📊 Data Preview"):
-        st.dataframe(arrow_df(df_sample.head()))
-ctx = st.text_area("Business context or specific instructions (optional)")
-if st.button("🚀 Generate", type="primary"):
-    if not upl:
-        st.warning("Please upload a file first."); st.stop()
-    bkey = sha1_bytes(b"".join([
-        upl.getvalue(), mode.encode(), ctx.encode(),
-        video_style.encode(), str(animate_charts_flag).encode()
-    ]))
-    if mode == "Report (PDF)":
-        with st.spinner("Generating report…"):
-            st.session_state.bundle = generate_report_assets(bkey, upl.getvalue(), upl.name, ctx)
-    else:
-        st.session_state.bundle = generate_video_assets(
-            bkey, upl.getvalue(), upl.name, ctx, video_style, animate_charts_flag
-        )
-    st.rerun()
-# ────────────────────────────────────────────────────────────────────────────
-# OUTPUT
-# ────────────────────────────────────────────────────────────────────────────
-if st.session_state.get("bundle"):
-    bundle = st.session_state.bundle
-    if bundle.get("type") == "report":
-        st.subheader("📄 Generated Report")
-        with st.expander("View Report", expanded=True):
-            st.markdown(bundle["preview"], unsafe_allow_html=True)
-        c1, c2 = st.columns(2)
-        with c1:
-            st.download_button(
-                "Download PDF", bundle["pdf"], "business_report.pdf",
-                "application/pdf", use_container_width=True
-            )
-        with c2:
-            if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
-                txt = re.sub(r"<[^>]+>", "", bundle["report_md"])
-                audio, mime = deepgram_tts(txt)
-                st.audio(audio, format=mime) if audio else st.error("Narration failed.")
-    elif bundle.get("type") == "video":
-        st.subheader("🎬 Generated Video Narrative")
-        vp = bundle["video_path"]
-        if Path(vp).exists():
-            with open(vp, "rb") as f:
-                st.video(f.read())
-            with open(vp, "rb") as f:
-                st.download_button(
-                    "Download Video", f,
-                    f"sozo_narrative_{bundle['key'][:8]}.mp4", "video/mp4"
-                )
-        else:
-            st.error("Video file missing – generation failed.")

 ##############################################################################
+# Sozo Business Studio · 07-Jul-2025 update                                  #
+# Fix image-animation issues, clean narrator text, drop visual-style UI      #
 ##############################################################################
+import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
 from pathlib import Path
 from typing import Tuple, Dict, List
 import streamlit as st
 import pandas as pd
 import numpy as np
 from PIL import Image
 import cv2
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
+from google import genai, genai as _g
+from google.genai import types  # for GenerateContentConfig
+# ─── CONFIG ────────────────────────────────────────────────────────────────
 st.set_page_config(page_title="Sozo Business Studio", layout="wide")
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
+FPS, WIDTH, HEIGHT  = 24, 1280, 720
 MAX_CHARTS, VIDEO_SCENES = 5, 5
 API_KEY = os.getenv("GEMINI_API_KEY")
 if not API_KEY:
     st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
+GEM = genai.Client(api_key=API_KEY)
+DG_KEY = os.getenv("DEEPGRAM_API_KEY")
 st.session_state.setdefault("bundle", None)
 sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
+# ─── HELPERS ───────────────────────────────────────────────────────────────
 def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
     try:
         ext = Path(name).suffix.lower()
+        df = (pd.read_excel if ext in (".xlsx", ".xls") else pd.read_csv)(io.BytesIO(buf))
         df.columns = df.columns.astype(str).str.strip()
         df = df.dropna(how="all")
         if df.empty or len(df.columns) == 0:
             raise ValueError("No usable data found")
         return df, None
+    except Exception as e: return None, str(e)
 @st.cache_data(show_spinner=False)
+def deepgram_tts(txt: str) -> Tuple[bytes, str]:
+    if not DG_KEY or not txt: return None, None
+    txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000]
     try:
         r = requests.post(
             "https://api.deepgram.com/v1/speak",
             params={"model": "aura-asteria-en"},
             headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"},
+            json={"text": txt}, timeout=30)
         r.raise_for_status()
         return r.content, r.headers.get("Content-Type", "audio/mpeg")
+    except Exception: return None, None
+def silence_mp3(dur: float, path: Path):
+    subprocess.run(["ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono",
+                    "-t", f"{dur:.3f}", "-q:a", "9", str(path)],
+                   check=True, capture_output=True)
+def audio_len(p: str) -> float:
     try:
+        out = subprocess.run(["ffprobe","-v","error","-show_entries","format=duration",
+                              "-of","default=nw=1:nk=1", p],
+                             stdout=subprocess.PIPE,text=True,check=True).stdout.strip()
         return float(out)
+    except Exception: return 5.0
+TAG_RE = re.compile(r'[<[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>"\'\]]+?)["\']?\s*[>\]]', re.I)
 extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")))
 re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I)
+def clean_narr(text: str) -> str:
     text = re_scene.sub("", text)
     text = TAG_RE.sub("", text)
+    text = re.sub(r"\s*\([^)]*\)", "", text)       # remove parentheticals
     text = re.sub(r"\s{2,}", " ", text).strip()
     return text
+# ─── PDF helper unchanged – omitted for brevity (keep from previous script) ─
+# ─── IMAGE PLACEHOLDER (rarely used now) ───────────────────────────────────
+def placeholder_img() -> Image.Image:
+    return Image.new("RGB", (WIDTH, HEIGHT), (230,230,230))
+# ─── CHART ANIMATION (init_func+artists) ───────────────────────────────────
+def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path) -> str:
+    ctype,*rest=[s.strip().lower() for s in desc.split("|",1)]; ctype=ctype or"bar"
+    ttl=rest[0] if rest else desc
+    if ctype=="pie":
+        cat=df.select_dtypes(exclude="number").columns[0]
+        num=df.select_dtypes(include="number").columns[0]
+        pdf=df.groupby(cat)[num].sum().sort_values(ascending=False).head(8)
+    elif ctype in("bar","hist"):
+        num=df.select_dtypes(include="number").columns[0]
+        pdf=df[num]
+    else:
+        cols=df.select_dtypes(include="number").columns[:2]
+        pdf=df[list(cols)].sort_index()
+    fig,ax=plt.subplots(figsize=(WIDTH/100,HEIGHT/100),dpi=100)
+    frames=max(10,min(30,int(dur*FPS)))
+    if ctype=="pie":
+        wedges,_=ax.pie(pdf,labels=pdf.index,startangle=90);ax.set_title(ttl)
+        def init(): [w.set_alpha(0) for w in wedges]; return wedges
+        def update(i): a=i/frames;[w.set_alpha(a) for w in wedges]; return wedges
+    elif ctype=="bar":
+        bars=ax.bar(pdf.index,np.zeros_like(pdf.values),color="#1f77b4");ax.set_ylim(0,pdf.max()*1.1);ax.set_title(ttl)
+        def init(): return bars
+        def update(i): f=i/frames;[b.set_height(h*f) for b,h in zip(bars,pdf.values)]; return bars
+    elif ctype=="hist":
+        n,bins,patch=ax.hist(pdf,bins=20,color="#1f77b4",alpha=0);ax.set_title(ttl)
+        def init(): [p.set_alpha(0) for p in patch]; return patch
+        def update(i): a=i/frames;[p.set_alpha(a) for p in patch]; return patch
+    elif ctype=="scatter":
+        pts=ax.scatter(pdf.iloc[:,0],pdf.iloc[:,1],s=10,alpha=0);ax.set_title(ttl);ax.grid(alpha=.3)
+        def init(): pts.set_alpha(0); return [pts]
+        def update(i): pts.set_alpha(i/frames); return [pts]
+    else:  # line
+        line,=ax.plot([],[],lw=2);x=pdf.iloc[:,0] if pdf.shape[1]>1 else np.arange(len(pdf))
+        y=pdf.iloc[:,1] if pdf.shape[1]>1 else pdf.iloc[:,0]
+        ax.set_xlim(x.min(),x.max());ax.set_ylim(y.min(),y.max());ax.set_title(ttl);ax.grid(alpha=.3)
+        def init(): line.set_data([],[]); return [line]
+        def update(i): k=max(2,int(len(x)*i/frames)); line.set_data(x[:k],y.iloc[:k]); return [line]
+    anim=FuncAnimation(fig,update,init_func=init,frames=frames,blit=True,interval=1000/FPS)
+    anim.save(str(out),writer=FFMpegWriter(fps=FPS,metadata={'artist':'Sozo'}),dpi=144)
+    plt.close(fig); return str(out)
+def safe_chart(desc,df,dur,out):
+    try: return animate_chart(desc,df,dur,out)
     except Exception:
+        with plt.ioff(): df.plot(ax=plt.gca()); p=Path(tempfile.gettempdir())/f"{uuid.uuid4()}.png"
+        plt.savefig(p); plt.close(); img=cv2.resize(cv2.imread(str(p)),(WIDTH,HEIGHT))
+        blank=placeholder_img(); cv2.imwrite(str(p),cv2.cvtColor(np.array(blank),cv2.COLOR_RGB2BGR))
+        return animate_image_fade(img,dur,out)
+def animate_image_fade(img_cv2,dur,out,fps=FPS):
+    frames=max(int(dur*fps),fps); video=cv2.VideoWriter(str(out),cv2.VideoWriter_fourcc(*"mp4v"),fps,(WIDTH,HEIGHT))
+    blank=np.full_like(img_cv2,255)
+    for i in range(frames):
+        a=i/frames; video.write(cv2.addWeighted(blank,1-a,img_cv2,a,0))
+    video.release(); return str(out)
+def concat_media(paths: List[str], out: Path, kind="video"):
+    lst=Path(tempfile.gettempdir())/f"{uuid.uuid4()}.txt"
+    with lst.open("w") as f:
+        for p in paths:
+            if Path(p).exists(): f.write(f"file '{Path(p).resolve()}'\n")
+    subprocess.run(["ffmpeg","-y","-f","concat","-safe","0","-i",str(lst),
+                    "-c:v" if kind=="video" else "-c:a","copy",str(out)],
+                   check=True,capture_output=True)
+    lst.unlink(missing_ok=True)
+# ─── REPORT & VIDEO generators (prompt tweaks) ─────────────────────────────
+def story_prompt(ctx_dict):
+    cols=", ".join(ctx_dict["columns"][:6])
+    return (
+      f"Create a script for a short business video with exactly {VIDEO_SCENES} scenes.\n"
+      "Each scene **must** follow this template:\n"
+      "• 1–2 sentences of narration (no scene labels, no chart descriptions).\n"
+      '• Exactly one chart tag such as <generate_chart: "bar | total revenue by month">.\n'
+      "Valid chart types: bar, pie, line, scatter, hist.\n"
+      f"Use columns ({cols}) from the dataset; pick sensible aggregations.\n"
+      "Do **not** mention the tag or chart in the narration.\n"
+      "Separate scenes with [SCENE_BREAK]."
     )
+def build_story(df,ctx):
+    llm=ChatGoogleGenerativeAI(model="gemini-2.0-flash",google_api_key=API_KEY,temperature=0.2)
+    ctx_dict={"shape":df.shape,"columns":list(df.columns),"user_ctx":ctx or"General business analysis"}
+    return llm.invoke(story_prompt(ctx_dict)).content
+# UI ========================================================================
+upl=st.file_uploader("Upload CSV or Excel",type=["csv","xlsx","xls"])
 if upl:
+    df,_=load_dataframe_safely(upl.getvalue(),upl.name)
+    with st.expander("Data preview"): st.dataframe(df.head())
+ctx=st.text_area("Business context or specific instructions (optional)")
+if st.button("🚀 Generate video",type="primary",disabled=not upl):
+    key=sha1_bytes(b"".join([upl.getvalue(),ctx.encode()]))
+    df,_=load_dataframe_safely(upl.getvalue(),upl.name)
+    # 1⎯ Build script --------------------------------------------------------
+    script=build_story(df,ctx)
+    scenes=[s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
+    vid_parts,aud_parts,tmp=[],[],[]
+    for idx,sc in enumerate(scenes[:VIDEO_SCENES]):
+        st.progress((idx+1)/VIDEO_SCENES,text=f"Scene {idx+1}/{VIDEO_SCENES}")
+        descs=extract_chart_tags(sc)
+        narr = clean_narr(sc)
+        aud_b, _ = deepgram_tts(narr)
+        mp3 = Path(tempfile.gettempdir())/f"{uuid.uuid4()}.mp3"
+        if aud_b: mp3.write_bytes(aud_b); dur=audio_len(str(mp3))
+        else: dur=5.0; silence_mp3(dur,mp3)
+        aud_parts.append(str(mp3)); tmp.append(mp3)
+        mp4 = Path(tempfile.gettempdir())/f"{uuid.uuid4()}.mp4"
+        if descs: safe_chart(descs[0],df,dur,mp4)
+        else: img=cv2.cvtColor(np.array(placeholder_img()),cv2.COLOR_RGB2BGR); animate_image_fade(img,dur,mp4)
+        vid_parts.append(str(mp4)); tmp.append(mp4)
+    silent=Path(tempfile.gettempdir())/f"{uuid.uuid4()}.mp4"
+    concat_media(vid_parts,silent,"video")
+    mix=Path(tempfile.gettempdir())/f"{uuid.uuid4()}.mp3"
+    concat_media(aud_parts,mix,"audio")
+    final=Path(tempfile.gettempdir())/f"{key}.mp4"
+    subprocess.run(["ffmpeg","-y","-i",str(silent),"-i",str(mix),"-c:v","copy","-c:a","aac",
+                    "-shortest",str(final)],check=True,capture_output=True)
+    for p in tmp+[silent,mix]: p.unlink(missing_ok=True)
+    st.session_state.bundle={"video":str(final),"key":key}; st.rerun()
+# ─── OUTPUT ────────────────────────────────────────────────────────────────
+if "bundle" in st.session_state:
+    v=st.session_state.bundle["video"]
+    st.video(open(v,"rb").read())
+    st.download_button("Download video",open(v,"rb"),
+                       f"sozo_{st.session_state.bundle['key'][:8]}.mp4","video/mp4")