Adk-Analyst2

Sleeping

App Files Files Community

rairo commited on Jul 5, 2025

Commit

37dc133

verified ·

1 Parent(s): b2f699b

Update app.py

Browse files

Files changed (1) hide show

app.py +231 -348

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 ###############################################################################
 #  Sozo Business Studio · AI transforms business data into compelling narratives
-#  (video branch now supports animated charts)
 ###############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, wave, requests, subprocess
 from pathlib import Path
@@ -9,25 +9,24 @@ from pathlib import Path
 import streamlit as st
 import pandas as pd
 import numpy as np
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 from matplotlib.animation import FuncAnimation, FFMpegWriter
 from fpdf import FPDF, HTMLMixin
 from markdown_it import MarkdownIt
 from PIL import Image
-import cv2                                                   # video processing
-try:
-    import bar_chart_race as bcr                             # optional helper
     HAS_BCR = True
 except ImportError:
     HAS_BCR = False
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
-from google import genai
 # ─────────────────────────────────────────────────────────────────────────────
 # CONFIG & CONSTANTS
@@ -36,62 +35,45 @@ st.set_page_config(page_title="Sozo Business Studio", layout="wide")
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
-FPS            = 24                     # video frames per second
-MAX_CHARTS     = 5                      # per report
-VIDEO_SCENES   = 5                      # per video
-WIDTH, HEIGHT  = 1280, 720              # video resolution
-# --- API Keys ---
 API_KEY = os.getenv("GEMINI_API_KEY")
 if not API_KEY:
     st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
-GEM  = genai.Client(api_key=API_KEY)
-DG_KEY = os.getenv("DEEPGRAM_API_KEY")  # optional (narration)
-# --- Session State shortcut ---
 st.session_state.setdefault("bundle", None)
 # ─────────────────────────────────────────────────────────────────────────────
-# HELPERS
 # ─────────────────────────────────────────────────────────────────────────────
-sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
-def validate_file_upload(f):
-    errs=[]
-    if f is None: errs.append("No file uploaded")
-    elif f.size==0: errs.append("File is empty")
-    elif f.size>50*1024*1024: errs.append("File >50 MB")
-    if f and Path(f.name).suffix.lower() not in (".csv",".xlsx",".xls"):
-        errs.append("Unsupported file type")
-    return errs
 def load_dataframe_safely(buf: bytes, name: str):
     try:
         ext = Path(name).suffix.lower()
         df  = pd.read_excel(io.BytesIO(buf)) if ext in (".xlsx", ".xls") else pd.read_csv(io.BytesIO(buf))
-        if df.empty or len(df.columns) == 0: raise ValueError("File contains no data")
         df.columns = df.columns.astype(str).str.strip()
         df = df.dropna(how="all")
-        if df.empty: raise ValueError("Rows all empty")
         return df, None
     except Exception as e:
         return None, str(e)
-def fix_bullet(t: str) -> str:
-    return re.sub(r"[\x80-\x9f]", "", t) if isinstance(t, str) else t
-def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
     safe = df.copy()
     for c in safe.columns:
         if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
             safe[c] = safe[c].astype(safe[c].dtype.name.lower())
     return safe
-# —── DeepGram TTS ────────────────────────────────────────────────────────────
 @st.cache_data(show_spinner=False)
 def deepgram_tts(text: str):
-    if not DG_KEY or not text: return None, None
     text = re.sub(r"[^\w\s.,!?;:-]", "", text)[:1000]
     try:
         r = requests.post(
@@ -106,148 +88,139 @@ def deepgram_tts(text: str):
     except Exception:
         return None, None
-def get_audio_duration(audio_file):
-    """Return duration (seconds) of an audio file via ffprobe (fallback 5 s)."""
     try:
         out = subprocess.run(
-            ['ffprobe', '-v', 'error', '-show_entries', 'format=duration',
-             '-of', 'default=noprint_wrappers=1:nokey=1', audio_file],
-            stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True
         ).stdout.strip()
         return float(out)
     except Exception:
         return 5.0
-# ─────────────────────────────────────────────────────────────────────────────
-# MARKDOWN TAG UTILS
-# ─────────────────────────────────────────────────────────────────────────────
 TAG_RE = re.compile(r'[<\[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>\]\'"”’]+?)["\']?\s*[>\]]', re.I)
 extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")))
-def repl_tags(txt: str, mp: dict, str_fn):
-    """Replace generated-chart tags with something else (pdf/img injection)."""
-    return TAG_RE.sub(lambda m: str_fn(mp[m.group("d").strip()]) if m.group("d").strip() in mp else m.group(0), txt)
 # ─────────────────────────────────────────────────────────────────────────────
-# PDF GENERATION (unchanged)
 # ─────────────────────────────────────────────────────────────────────────────
 class PDF(FPDF, HTMLMixin): pass
 def build_pdf(md, charts):
-    md   = fix_bullet(md).replace("•", "*")
-    md   = repl_tags(md, charts, lambda p: f'<img src="{p}">')
-    html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(md)
-    pdf  = PDF(); pdf.set_auto_page_break(True, margin=15)
-    pdf.add_page()
-    pdf.set_font("Arial", "B", 18); pdf.cell(0, 12, "AI-Generated Business Report", ln=True); pdf.ln(3)
-    pdf.set_font("Arial", "", 11);  pdf.write_html(html)
     return bytes(pdf.output(dest="S"))
 # ─────────────────────────────────────────────────────────────────────────────
-# VIDEO-ONLY ANIMATION HELPERS
 # ─────────────────────────────────────────────────────────────────────────────
-def animate_image_fade(img_cv2: np.ndarray, duration: float, out_path: Path, fps: int = FPS):
-    """Simple fade-in from white background to the provided image."""
-    frames  = max(int(duration * fps), fps)   # at least 1 s
-    fourcc  = cv2.VideoWriter_fourcc(*'mp4v')
-    video   = cv2.VideoWriter(str(out_path), fourcc, fps, (WIDTH, HEIGHT))
-    blank   = np.full_like(img_cv2, 255)
     for i in range(frames):
-        alpha  = i / frames
-        frame  = cv2.addWeighted(blank, 1 - alpha, img_cv2, alpha, 0)
         video.write(frame)
     video.release()
     return str(out_path)
-def animate_chart(desc: str, df: pd.DataFrame, duration: float, out_path: Path, fps: int = FPS) -> tuple[str, str]:
     """
-    Build an animated chart clip matching *desc*.
-    Returns (mp4_path, preview_png_path).
-    Falls back to simple fade-in if animation fails.
     """
     try:
-        # VERY rough heuristic parser
         desc_low = desc.lower()
-        if ("bar race" in desc_low or "race" in desc_low) and HAS_BCR:
-            # --------------- bar chart race ---------------------------------
-            tmp_csv = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.csv"
-            df.to_csv(tmp_csv, index=False)
             bcr.bar_chart_race(
-                input_filename=tmp_csv,
-                output_filename=str(out_path),
-                n_bars=10,
-                period_length=duration / df.shape[0] if df.shape[0] else 0.5,
-                steps_per_period=3,
-                dpi=144,
-                fig=(WIDTH / 100, HEIGHT / 100),
-                bar_label_font=4,
-                fixed_order=False,
-                interpolate_period=False,
-                period_template='{x:.0f}',
             )
-            tmp_csv.unlink(missing_ok=True)
-            # grab first frame for preview
-            cap = cv2.VideoCapture(str(out_path))
-            ok, frame = cap.read(); cap.release()
-            if ok:
-                preview = Path(out_path.with_suffix(".png"))
-                cv2.imwrite(str(preview), frame)
-                return str(out_path), str(preview)
-            raise RuntimeError("Could not capture preview")
         else:
-            # --------------- generic line/bar growth using FuncAnimation ----
-            # Pick numeric columns
-            num_cols = df.select_dtypes(include=['number']).columns.tolist()
-            if len(num_cols) < 1:
-                raise ValueError("No numeric data to plot")
-            col_y   = num_cols[0]
-            col_x   = num_cols[1] if len(num_cols) > 1 else None
-            fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
-            if "bar" in desc_low:
-                bars = ax.bar([], [])
-                def update(frame_idx):
-                    frac = frame_idx / frames
-                    upto = int(len(df) * frac) or 1
-                    ydata = df[col_y].iloc[:upto]
-                    xdata = df[col_x].iloc[:upto] if col_x else np.arange(upto)
-                    ax.clear()
-                    ax.bar(xdata, ydata, color="#1f77b4")
-                    ax.set_title(desc); ax.grid(True, alpha=0.3)
-                frames = max(int(duration * fps), fps)
-                anim   = FuncAnimation(fig, update, frames=frames, blit=False)
-            else:
-                line, = ax.plot([], [], lw=2)
-                ax.set_xlim(df.index.min(), df.index.max() or len(df))
-                ax.set_ylim(df[col_y].min(), df[col_y].max())
-                ax.set_title(desc); ax.grid(True, alpha=0.3)
-                def update(frame_idx):
-                    upto = int(len(df) * frame_idx / frames) or 1
-                    line.set_data(df.index[:upto], df[col_y].iloc[:upto])
-                    return line,
-                frames = max(int(duration * fps), fps)
-                anim   = FuncAnimation(fig, update, frames=frames, blit=True)
-            writer = FFMpegWriter(fps=fps, metadata=dict(artist='Sozo Studio'))
-            anim.save(str(out_path), writer=writer, dpi=144)
-            preview = Path(out_path.with_suffix(".png"))
-            fig.savefig(preview, bbox_inches="tight", facecolor="white")
-            plt.close('all')
-            return str(out_path), str(preview)
     except Exception as e:
-        # Fallback: simple fade-in on static chart generated by agent
-        with st.spinner(f"Animation fallback due to {e}. Generating static image."):
             fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
             df.plot(ax=ax); ax.set_title(desc); ax.grid(alpha=0.3)
-            png_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
-            fig.savefig(png_path, bbox_inches="tight", facecolor="white"); plt.close('all')
-            img      = cv2.imread(str(png_path)); img = cv2.resize(img, (WIDTH, HEIGHT))
-            mp4_path = Path(out_path)
-            animate_image_fade(img, duration, mp4_path, fps=fps)
-            return str(mp4_path), str(png_path)
 # ─────────────────────────────────────────────────────────────────────────────
-# REPORT GENERATION (unchanged)
 # ─────────────────────────────────────────────────────────────────────────────
 def generate_report_assets(key, buf, name, ctx):
     df, err = load_dataframe_safely(buf, name)
@@ -257,78 +230,46 @@ def generate_report_assets(key, buf, name, ctx):
                                  google_api_key=API_KEY, temperature=0.1)
     ctx_dict = {"shape": df.shape, "columns": list(df.columns),
                 "user_ctx": ctx or "General business analysis"}
-    report_md = llm.invoke(
-        f"""You are a senior business analyst. Write an executive-level Markdown report
-with insights & recommendations. Use chart tags like <generate_chart: "description"> where helpful.
-Data Context: {json.dumps(ctx_dict, indent=2)}"""
     ).content
-    chart_descs  = extract_chart_tags(report_md)[:MAX_CHARTS]
-    chart_paths  = {}
     if chart_descs:
-        ag = create_pandas_dataframe_agent(llm=llm, df=df, verbose=False,
-                                           allow_dangerous_code=True)
         for d in chart_descs:
             with st.spinner(f"Generating chart: {d}"):
                 with plt.ioff():
                     try:
-                        ag.run(f"Create a {d} with Matplotlib and save.")
-                        fig   = plt.gcf()
                         if fig.axes:
                             p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
                             fig.savefig(p, dpi=300, bbox_inches="tight", facecolor="white")
-                            chart_paths[d] = str(p)
-                        plt.close("all")
-                    except:
                         plt.close("all")
-    md      = fix_bullet(report_md)
-    pdf     = build_pdf(md, chart_paths)
-    preview = repl_tags(md, chart_paths,
-                        lambda p: f'<img src="data:image/png;base64,{base64.b64encode(Path(p).read_bytes()).decode()}" style="max-width:100%;">')
-    return {"type": "report", "preview": preview, "pdf": pdf,
-            "report_md": md, "key": key}
 # ─────────────────────────────────────────────────────────────────────────────
-# VIDEO GENERATION  (animated charts!)
 # ─────────────────────────────────────────────────────────────────────────────
-def generate_image_from_prompt(prompt, style):
-    """Image placeholder using Gemini; falls back to gray canvas on error."""
-    try:
-        full_prompt = f"A professional, clean, illustrative image for a business presentation: {prompt}, in the style of {style}."
-        response    = GEM.generate_content(
-            contents=full_prompt,
-            model="models/gemini-1.5-flash-latest",
-            generation_config={"response_mime_type": "image/png"}
-        )
-        img_bytes = response.parts[0].blob.data
-        return Image.open(io.BytesIO(img_bytes)).convert("RGB")
-    except Exception as e:
-        st.warning(f"Illustrative image generation failed: {e}. Using placeholder.")
-        return Image.new('RGB', (WIDTH, HEIGHT), color=(230, 230, 230))
-def concat_media(inputs, output_path, media_type="video"):
-    """Concat list of mp4 or mp3 files using ffmpeg demuxer (copy, no re-encode)."""
-    concat_list = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
-    with open(concat_list, 'w') as f:
-        for item in inputs:
-            f.write(f"file '{Path(item).resolve()}'\n")
-    codec_copy = 'copy'
-    what       = '-c:v' if media_type == "video" else '-c:a'
-    subprocess.run(['ffmpeg', '-y', '-f', 'concat', '-safe', '0',
-                    '-i', str(concat_list), what, codec_copy, str(output_path)],
-                   check=True, capture_output=True)
-    concat_list.unlink(missing_ok=True)
 def generate_video_assets(key, buf, name, ctx, style, animate_charts=True):
-    # --- environment check ---
     try:
-        subprocess.run(['ffmpeg', '-version'], check=True, capture_output=True)
-    except (FileNotFoundError, subprocess.CalledProcessError):
-        st.error("🔴 FFmpeg is not installed or not in your system's PATH. Video generation is not possible.")
-        return None
     df, err = load_dataframe_safely(buf, name)
     if err: st.error(err); return None
@@ -337,188 +278,130 @@ def generate_video_assets(key, buf, name, ctx, style, animate_charts=True):
                                  google_api_key=API_KEY, temperature=0.2)
     ctx_dict = {"shape": df.shape, "columns": list(df.columns),
                 "user_ctx": ctx or "General business analysis"}
-    story_prompt = f"""Create a script for a short business video with exactly {VIDEO_SCENES} scenes.
-For each scene:
-1. Write a concise narration (1–2 sentences).
-2. If the data can be visualized for this scene, add a chart tag like <generate_chart: "bar chart of sales by region">.
-3. Separate each scene with the marker `[SCENE_BREAK]`.
-Data Context: {json.dumps(ctx_dict, indent=2)}"""
-    with st.spinner("Generating video script…"):
-        full_script = llm.invoke(story_prompt).content
-    scenes = [s.strip() for s in full_script.split("[SCENE_BREAK]") if s.strip()]
-    video_clips, audio_paths, temp_files = [], [], []
-    ag = create_pandas_dataframe_agent(llm=llm, df=df,
-                                       verbose=False, allow_dangerous_code=True)
-    try:
-        for i, scene_text in enumerate(scenes[:VIDEO_SCENES]):
-            st.progress((i + 1) / VIDEO_SCENES, text=f"Processing Scene {i+1}/{VIDEO_SCENES}…")
-            chart_descs = extract_chart_tags(scene_text)
-            narrative   = repl_tags(scene_text, {}, lambda _: "").strip()
-            # 1. Generate Audio (always)
-            audio_content, _ = deepgram_tts(narrative)
-            if audio_content:
-                audio_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
-                audio_path.write_bytes(audio_content)
-                audio_paths.append(str(audio_path))
-                temp_files.append(audio_path)
-                duration = get_audio_duration(str(audio_path))
-            else:
-                duration = 5.0  # fallback
-            # 2. Generate Visual (clip)
-            if chart_descs:
-                d    = chart_descs[0]
-                with plt.ioff():
-                    try:
-                        ag.run(f"Create a {d} with Matplotlib and save.")
-                        fig   = plt.gcf()
-                        if not fig.axes: raise ValueError("No axes")
-                        static_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
-                        fig.savefig(static_png, dpi=300, bbox_inches="tight", facecolor="white")
-                        plt.close("all")
-                    except Exception:
-                        plt.close("all")
-                        # fallback to illustrative image
-                        static_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
-                        generate_image_from_prompt(narrative, style).save(static_png)
-                # Animate?
-                if animate_charts:
-                    clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
-                    img       = cv2.imread(str(static_png)); img = cv2.resize(img, (WIDTH, HEIGHT))
-                    animate_image_fade(img, duration, clip_path)
-                    video_clips.append(str(clip_path))
-                    temp_files.extend([static_png, clip_path])
-                else:
-                    # Just still → Ken-Burns fade to duration seconds
-                    clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
-                    img       = cv2.imread(str(static_png)); img = cv2.resize(img, (WIDTH, HEIGHT))
-                    animate_image_fade(img, duration, clip_path)          # still a clip
-                    video_clips.append(str(clip_path))
-                    temp_files.extend([static_png, clip_path])
-            else:
-                # No chart; illustrative image
-                static_img = generate_image_from_prompt(narrative, style)
-                static_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
-                static_img.save(static_png)
-                clip_path  = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
-                img        = cv2.cvtColor(np.array(static_img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
-                animate_image_fade(img, duration, clip_path)
-                video_clips.append(str(clip_path))
-                temp_files.extend([static_png, clip_path])
-        # --- Assemble video ---
-        st.progress(1.0, text="Assembling video…")
-        silent_video_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
-        concat_media(video_clips, silent_video_path, media_type="video")
-        # --- Concat audio ---
-        audio_concat_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
-        concat_media(audio_paths, audio_concat_path, media_type="audio")
-        # --- Merge AV streams ---
-        final_video_path = Path(tempfile.gettempdir()) / f"{key}.mp4"
-        subprocess.run(['ffmpeg', '-y',
-                        '-i', str(silent_video_path),
-                        '-i', str(audio_concat_path),
-                        '-c:v', 'copy', '-c:a', 'aac',
-                        '-shortest', str(final_video_path)],
-                       check=True, capture_output=True)
-        return {"type": "video", "video_path": str(final_video_path), "key": key}
-    finally:
-        # clean-up temps except final video
-        for f in temp_files:
-            f.unlink(missing_ok=True)
 # ─────────────────────────────────────────────────────────────────────────────
-# UI & MAIN WORKFLOW
 # ─────────────────────────────────────────────────────────────────────────────
 mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
-# Video options
-video_style       = "professional illustration"
-animate_charts_on = True
 if mode == "Video Narrative":
     with st.sidebar:
         st.subheader("🎬 Video Options")
-        video_style = st.selectbox("Visual Style",
             ["professional illustration", "minimalist infographic",
-             "photorealistic", "cinematic", "data visualization aesthetic"])
-        animate_charts_on = st.toggle("Animate Charts", value=True)
-        st.caption("Disabling animation uses static slides with a quick fade-in.")
-# Common file uploader
 upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
 if upl:
-    df_prev, _ = load_dataframe_safely(upl.getvalue(), upl.name)
     with st.expander("📊 Data Preview"):
-        st.dataframe(arrow_df(df_prev.head()))
 ctx = st.text_area("Business context or specific instructions (optional)")
 if st.button("🚀 Generate", type="primary"):
     if not upl:
         st.warning("Please upload a file first."); st.stop()
-    bkey = sha1_bytes(b"".join([upl.getvalue(), mode.encode(),
-                                ctx.encode(), video_style.encode(),
-                                str(animate_charts_on).encode()]))
     if mode == "Report (PDF)":
-        with st.spinner("Generating report and charts…"):
-            bundle = generate_report_assets(bkey, upl.getvalue(), upl.name, ctx)
-    else:  # Video
-        bundle = generate_video_assets(bkey, upl.getvalue(), upl.name, ctx,
-                                       video_style, animate_charts=animate_charts_on)
-    st.session_state.bundle = bundle
     st.rerun()
 # ─────────────────────────────────────────────────────────────────────────────
-# DISPLAY AREA
 # ─────────────────────────────────────────────────────────────────────────────
 if st.session_state.get("bundle"):
     bundle = st.session_state.bundle
     if bundle.get("type") == "report":
         st.subheader("📄 Generated Report")
         with st.expander("View Report", expanded=True):
-            if bundle["preview"]:
-                st.markdown(bundle["preview"], unsafe_allow_html=True)
-                c1, c2 = st.columns(2)
-                with c1:
-                    st.download_button("Download PDF", bundle["pdf"],
-                                       "business_report.pdf", "application/pdf",
-                                       use_container_width=True)
-                with c2:
-                    if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
-                        report_text = re.sub(r'<[^>]+>', '', bundle["report_md"])
-                        audio, mime = deepgram_tts(report_text)
-                        if audio:
-                            st.audio(audio, format=mime)
-                        else:
-                            st.error("Narration failed.")
-            else:
-                st.warning("No report content was generated.")
     elif bundle.get("type") == "video":
         st.subheader("🎬 Generated Video Narrative")
-        video_path = bundle.get("video_path")
-        if video_path and Path(video_path).exists():
-            with open(video_path, "rb") as f:
                 st.video(f.read())
-            with open(video_path, "rb") as f:
                 st.download_button("Download Video", f,
-                                   f"sozo_narrative_{bundle['key'][:8]}.mp4",
-                                   "video/mp4")
         else:
-            st.error("Video file could not be found or generation failed.")

 ###############################################################################
 #  Sozo Business Studio · AI transforms business data into compelling narratives
+#  (video branch now supports animated charts – PDF branch untouched)
 ###############################################################################
 import os, re, json, hashlib, uuid, base64, io, tempfile, wave, requests, subprocess
 from pathlib import Path
 import streamlit as st
 import pandas as pd
 import numpy as np
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 from matplotlib.animation import FuncAnimation, FFMpegWriter
 from fpdf import FPDF, HTMLMixin
 from markdown_it import MarkdownIt
 from PIL import Image
+import cv2  # video processing
+try:                                         # optional helper for bar-race
+    import bar_chart_race as bcr
     HAS_BCR = True
 except ImportError:
     HAS_BCR = False
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
+from google import genai                              # ← original import path
 # ─────────────────────────────────────────────────────────────────────────────
 # CONFIG & CONSTANTS
 st.title("📊 Sozo Business Studio")
 st.caption("AI transforms business data into compelling narratives.")
+FPS, WIDTH, HEIGHT = 24, 1280, 720        # video parameters
+MAX_CHARTS, VIDEO_SCENES = 5, 5
 API_KEY = os.getenv("GEMINI_API_KEY")
 if not API_KEY:
     st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
+GEM = genai.Client(api_key=API_KEY)       # ← still using Client pattern
+DG_KEY = os.getenv("DEEPGRAM_API_KEY")    # optional (narration)
 st.session_state.setdefault("bundle", None)
+sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
 # ─────────────────────────────────────────────────────────────────────────────
+# BASIC HELPERS
 # ─────────────────────────────────────────────────────────────────────────────
 def load_dataframe_safely(buf: bytes, name: str):
     try:
         ext = Path(name).suffix.lower()
         df  = pd.read_excel(io.BytesIO(buf)) if ext in (".xlsx", ".xls") else pd.read_csv(io.BytesIO(buf))
         df.columns = df.columns.astype(str).str.strip()
         df = df.dropna(how="all")
+        if df.empty or len(df.columns) == 0:
+            raise ValueError("No usable data found")
         return df, None
     except Exception as e:
         return None, str(e)
+def arrow_df(df: pd.DataFrame):
     safe = df.copy()
     for c in safe.columns:
         if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
             safe[c] = safe[c].astype(safe[c].dtype.name.lower())
     return safe
 @st.cache_data(show_spinner=False)
 def deepgram_tts(text: str):
+    if not DG_KEY or not text:
+        return None, None
     text = re.sub(r"[^\w\s.,!?;:-]", "", text)[:1000]
     try:
         r = requests.post(
     except Exception:
         return None, None
+def get_audio_duration(mp3_path: str) -> float:
     try:
         out = subprocess.run(
+            ["ffprobe", "-v", "error", "-show_entries", "format=duration",
+             "-of", "default=noprint_wrappers=1:nokey=1", mp3_path],
+            text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True
         ).stdout.strip()
         return float(out)
     except Exception:
         return 5.0
 TAG_RE = re.compile(r'[<\[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>\]\'"”’]+?)["\']?\s*[>\]]', re.I)
 extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")))
+def repl_tags(txt: str, mp: dict, fn):  # fn replaces tag text
+    return TAG_RE.sub(lambda m: fn(mp[m.group("d").strip()]) if m.group("d").strip() in mp else m.group(0), txt)
 # ─────────────────────────────────────────────────────────────────────────────
+# PDF GENERATION (UNCHANGED)
 # ─────────────────────────────────────────────────────────────────────────────
 class PDF(FPDF, HTMLMixin): pass
 def build_pdf(md, charts):
+    html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(
+        repl_tags(md.replace("•", "*"), charts, lambda p: f'<img src="{p}">')
+    )
+    pdf = PDF(); pdf.set_auto_page_break(True, margin=15)
+    pdf.add_page(); pdf.set_font("Arial", "B", 18)
+    pdf.cell(0, 12, "AI-Generated Business Report", ln=True); pdf.ln(3)
+    pdf.set_font("Arial", "", 11); pdf.write_html(html)
     return bytes(pdf.output(dest="S"))
 # ─────────────────────────────────────────────────────────────────────────────
+# GENERIC ANIMATION HELPERS (VIDEO PATH ONLY)
 # ─────────────────────────────────────────────────────────────────────────────
+def animate_image_fade(img_cv2: np.ndarray, duration: float, out_path: Path, fps: int = FPS) -> str:
+    frames = max(int(duration * fps), fps)       # at least 1 second
+    video  = cv2.VideoWriter(str(out_path), cv2.VideoWriter_fourcc(*"mp4v"), fps, (WIDTH, HEIGHT))
+    blank  = np.full_like(img_cv2, 255)
     for i in range(frames):
+        alpha = i / frames
+        frame = cv2.addWeighted(blank, 1 - alpha, img_cv2, alpha, 0)
         video.write(frame)
     video.release()
     return str(out_path)
+def animate_chart(desc: str, df: pd.DataFrame, duration: float, out_path: Path, fps: int = FPS) -> str:
     """
+    Build an animated chart matching *desc*; returns mp4 path.
+    Falls back to simple fade animation if something fails.
     """
     try:
         desc_low = desc.lower()
+        # --- bar chart race --------------------------------------------------
+        if ("race" in desc_low or "bar race" in desc_low) and HAS_BCR:
+            tmpcsv = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.csv"
+            df.to_csv(tmpcsv, index=False)
             bcr.bar_chart_race(
+                input_filename=tmpcsv, output_filename=str(out_path),
+                n_bars=10, period_length=duration / max(len(df), 1),
+                dpi=144, fig=(WIDTH / 100, HEIGHT / 100)
             )
+            tmpcsv.unlink(missing_ok=True)
+            return str(out_path)
+        # --- generic line / bar growth --------------------------------------
+        num_cols = df.select_dtypes(include=['number']).columns.tolist()
+        if not num_cols:
+            raise ValueError("No numeric data")
+        col_y  = num_cols[0]
+        col_x  = num_cols[1] if len(num_cols) > 1 else None
+        fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
+        frames  = max(int(duration * fps), fps)
+        if "bar" in desc_low:
+            def update(i):
+                frac = i / frames
+                upto = max(int(len(df) * frac), 1)
+                ax.clear(); ax.bar(df[col_x].iloc[:upto] if col_x else np.arange(upto),
+                                   df[col_y].iloc[:upto], color="#1f77b4")
+                ax.set_title(desc); ax.grid(alpha=0.3)
         else:
+            line, = ax.plot([], [], lw=2)
+            ax.set_xlim(0, len(df)-1); ax.set_ylim(df[col_y].min(), df[col_y].max())
+            ax.set_title(desc); ax.grid(alpha=0.3)
+            def update(i):
+                upto = max(int(len(df) * i / frames), 1)
+                line.set_data(np.arange(upto), df[col_y].iloc[:upto])
+                return line,
+        anim = FuncAnimation(fig, update, frames=frames, blit=("bar" not in desc_low))
+        writer = FFMpegWriter(fps=fps, metadata=dict(artist="Sozo Studio"))
+        anim.save(str(out_path), writer=writer, dpi=144); plt.close('all')
+        return str(out_path)
     except Exception as e:
+        # fallback → static image fade
+        with plt.ioff():
             fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
             df.plot(ax=ax); ax.set_title(desc); ax.grid(alpha=0.3)
+            png_tmp = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
+            fig.savefig(png_tmp, bbox_inches="tight", facecolor="white"); plt.close('all')
+        img = cv2.resize(cv2.imread(str(png_tmp)), (WIDTH, HEIGHT))
+        return animate_image_fade(img, duration, out_path, fps)
+def concat_media(inputs, output, kind="video"):
+    lst = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
+    with lst.open("w") as f:
+        for p in inputs:
+            f.write(f"file '{Path(p).resolve()}'\n")
+    subprocess.run(
+        ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(lst),
+         "-c:v" if kind == "video" else "-c:a", "copy", str(output)],
+        check=True, capture_output=True
+    )
+    lst.unlink(missing_ok=True)
 # ─────────────────────────────────────────────────────────────────────────────
+# IMAGE GENERATION (keeps original Client.generate_content call)
+# ─────────────────────────────────────────────────────────────────────────────
+def generate_image_from_prompt(prompt, style):
+    try:
+        full_prompt = (f"A professional, clean, illustrative image for a business presentation: "
+                       f"{prompt}, in the style of {style}.")
+        response = GEM.generate_content(
+            contents=full_prompt,
+            model="models/gemini-1.5-flash-latest",
+            generation_config={"response_mime_type": "image/png"},
+        )
+        img_bytes = response.parts[0].blob.data
+        return Image.open(io.BytesIO(img_bytes)).convert("RGB")
+    except Exception as e:
+        st.warning(f"Illustrative image generation failed: {e}. Using placeholder.")
+        return Image.new("RGB", (WIDTH, HEIGHT), color=(230, 230, 230))
+# ─────────────────────────────────────────────────────────────────────────────
+# REPORT GENERATION (UNCHANGED)
 # ─────────────────────────────────────────────────────────────────────────────
 def generate_report_assets(key, buf, name, ctx):
     df, err = load_dataframe_safely(buf, name)
                                  google_api_key=API_KEY, temperature=0.1)
     ctx_dict = {"shape": df.shape, "columns": list(df.columns),
                 "user_ctx": ctx or "General business analysis"}
+    md = llm.invoke(
+        "You are a senior business analyst. Write an executive-level Markdown report "
+        "with insights & recommendations. Use chart tags like <generate_chart: \"description\"> where helpful.\n"
+        f"Data Context: {json.dumps(ctx_dict, indent=2)}"
     ).content
+    # Replace tags with static charts
+    chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
+    charts = {}
     if chart_descs:
+        ag = create_pandas_dataframe_agent(llm=llm, df=df, verbose=False, allow_dangerous_code=True)
         for d in chart_descs:
             with st.spinner(f"Generating chart: {d}"):
                 with plt.ioff():
                     try:
+                        ag.run(f"Create a {d} with Matplotlib and save."); fig = plt.gcf()
                         if fig.axes:
                             p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
                             fig.savefig(p, dpi=300, bbox_inches="tight", facecolor="white")
+                            charts[d] = str(p)
                         plt.close("all")
+                    except: plt.close("all")
+    preview = repl_tags(
+        md, charts,
+        lambda p: f'<img src="data:image/png;base64,{base64.b64encode(Path(p).read_bytes()).decode()}" '
+                  f'style="max-width:100%;">'
+    )
+    pdf = build_pdf(md, charts)
+    return {"type": "report", "preview": preview, "pdf": pdf, "report_md": md, "key": key}
 # ─────────────────────────────────────────────────────────────────────────────
+# VIDEO GENERATION (ANIMATED CHARTS)
 # ─────────────────────────────────────────────────────────────────────────────
 def generate_video_assets(key, buf, name, ctx, style, animate_charts=True):
+    # FFmpeg presence
     try:
+        subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
+    except Exception:
+        st.error("🔴 FFmpeg not available — cannot render video."); return None
     df, err = load_dataframe_safely(buf, name)
     if err: st.error(err); return None
                                  google_api_key=API_KEY, temperature=0.2)
     ctx_dict = {"shape": df.shape, "columns": list(df.columns),
                 "user_ctx": ctx or "General business analysis"}
+    script = llm.invoke(
+        f"Create a script for a short business video with exactly {VIDEO_SCENES} scenes.\n"
+        "For each scene:\n"
+        "1. Write a concise narration (1–2 sentences).\n"
+        "2. If the data can be visualised, add a chart tag like <generate_chart: \"bar chart of sales by region\">.\n"
+        "3. Separate each scene with the marker [SCENE_BREAK].\n"
+        f"Data Context: {json.dumps(ctx_dict, indent=2)}"
+    ).content
+    scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
+    video_parts, audio_parts, temps = [], [], []
+    for idx, scene in enumerate(scenes[:VIDEO_SCENES]):
+        st.progress((idx + 1) / VIDEO_SCENES, text=f"Processing Scene {idx+1}/{VIDEO_SCENES}…")
+        chart_tags = extract_chart_tags(scene)
+        narrative  = repl_tags(scene, {}, lambda _: "").strip()
+        # Audio
+        audio_bytes, _ = deepgram_tts(narrative)
+        audio_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
+        (audio_path.write_bytes(audio_bytes) if audio_bytes else None)
+        duration = get_audio_duration(str(audio_path)) if audio_bytes else 5.0
+        audio_parts.append(str(audio_path)); temps.append(audio_path)
+        # Video
+        if chart_tags and animate_charts:
+            clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
+            animate_chart(chart_tags[0], df, duration, clip_path, FPS)
+            video_parts.append(str(clip_path)); temps.append(clip_path)
+        else:
+            # illustrative image fade
+            img = generate_image_from_prompt(narrative, style)
+            png_tmp = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
+            img.save(png_tmp); temps.append(png_tmp)
+            clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
+            animate_image_fade(
+                cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR),
+                duration, clip_path, FPS
+            )
+            video_parts.append(str(clip_path)); temps.append(clip_path)
+    # Concatenate media
+    silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
+    concat_media(video_parts, silent_vid, "video")
+    audio_mix  = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
+    concat_media(audio_parts, audio_mix, "audio")
+    final_vid  = Path(tempfile.gettempdir()) / f"{key}.mp4"
+    subprocess.run(
+        ["ffmpeg", "-y", "-i", str(silent_vid), "-i", str(audio_mix),
+         "-c:v", "copy", "-c:a", "aac", "-shortest", str(final_vid)],
+        check=True, capture_output=True
+    )
+    return {"type": "video", "video_path": str(final_vid), "key": key}
 # ─────────────────────────────────────────────────────────────────────────────
+# UI
 # ─────────────────────────────────────────────────────────────────────────────
 mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
+video_style, animate_charts_flag = "professional illustration", True
 if mode == "Video Narrative":
     with st.sidebar:
         st.subheader("🎬 Video Options")
+        video_style = st.selectbox(
+            "Visual Style",
             ["professional illustration", "minimalist infographic",
+             "photorealistic", "cinematic", "data visualization aesthetic"]
+        )
+        animate_charts_flag = st.toggle("Animate Charts", value=True)
+        st.caption("Disable to use static slides with a simple fade-in.")
 upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
 if upl:
+    df_sample, _ = load_dataframe_safely(upl.getvalue(), upl.name)
     with st.expander("📊 Data Preview"):
+        st.dataframe(arrow_df(df_sample.head()))
 ctx = st.text_area("Business context or specific instructions (optional)")
 if st.button("🚀 Generate", type="primary"):
     if not upl:
         st.warning("Please upload a file first."); st.stop()
+    bkey = sha1_bytes(b"".join([
+        upl.getvalue(), mode.encode(), ctx.encode(),
+        video_style.encode(), str(animate_charts_flag).encode()
+    ]))
     if mode == "Report (PDF)":
+        with st.spinner("Generating report…"):
+            st.session_state.bundle = generate_report_assets(bkey, upl.getvalue(), upl.name, ctx)
+    else:
+        st.session_state.bundle = generate_video_assets(
+            bkey, upl.getvalue(), upl.name, ctx,
+            video_style, animate_charts_flag
+        )
     st.rerun()
 # ─────────────────────────────────────────────────────────────────────────────
+# OUTPUT
 # ─────────────────────────────────────────────────────────────────────────────
 if st.session_state.get("bundle"):
     bundle = st.session_state.bundle
     if bundle.get("type") == "report":
         st.subheader("📄 Generated Report")
         with st.expander("View Report", expanded=True):
+            st.markdown(bundle["preview"], unsafe_allow_html=True)
+            c1, c2 = st.columns(2)
+            with c1:
+                st.download_button("Download PDF", bundle["pdf"],
+                                   "business_report.pdf", "application/pdf",
+                                   use_container_width=True)
+            with c2:
+                if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
+                    txt = re.sub(r"<[^>]+>", "", bundle["report_md"])
+                    audio, mime = deepgram_tts(txt)
+                    st.audio(audio, format=mime) if audio else st.error("Narration failed.")
     elif bundle.get("type") == "video":
         st.subheader("🎬 Generated Video Narrative")
+        vp = bundle["video_path"]
+        if Path(vp).exists():
+            with open(vp, "rb") as f:
                 st.video(f.read())
+            with open(vp, "rb") as f:
                 st.download_button("Download Video", f,
+                                   f"sozo_narrative_{bundle['key'][:8]}.mp4", "video/mp4")
         else:
+            st.error("Video file missing – generation failed.")