rairo commited on
Commit
37dc133
·
verified ·
1 Parent(s): b2f699b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +231 -348
app.py CHANGED
@@ -1,6 +1,6 @@
1
  ###############################################################################
2
  # Sozo Business Studio · AI transforms business data into compelling narratives
3
- # (video branch now supports animated charts)
4
  ###############################################################################
5
  import os, re, json, hashlib, uuid, base64, io, tempfile, wave, requests, subprocess
6
  from pathlib import Path
@@ -9,25 +9,24 @@ from pathlib import Path
9
  import streamlit as st
10
  import pandas as pd
11
  import numpy as np
12
-
13
  import matplotlib
14
  matplotlib.use("Agg")
15
  import matplotlib.pyplot as plt
16
  from matplotlib.animation import FuncAnimation, FFMpegWriter
17
-
18
  from fpdf import FPDF, HTMLMixin
19
  from markdown_it import MarkdownIt
20
  from PIL import Image
21
- import cv2 # video processing
22
- try:
23
- import bar_chart_race as bcr # optional helper
 
24
  HAS_BCR = True
25
  except ImportError:
26
  HAS_BCR = False
27
 
28
  from langchain_experimental.agents import create_pandas_dataframe_agent
29
  from langchain_google_genai import ChatGoogleGenerativeAI
30
- from google import genai
31
 
32
  # ─────────────────────────────────────────────────────────────────────────────
33
  # CONFIG & CONSTANTS
@@ -36,62 +35,45 @@ st.set_page_config(page_title="Sozo Business Studio", layout="wide")
36
  st.title("📊 Sozo Business Studio")
37
  st.caption("AI transforms business data into compelling narratives.")
38
 
39
- FPS = 24 # video frames per second
40
- MAX_CHARTS = 5 # per report
41
- VIDEO_SCENES = 5 # per video
42
- WIDTH, HEIGHT = 1280, 720 # video resolution
43
 
44
- # --- API Keys ---
45
  API_KEY = os.getenv("GEMINI_API_KEY")
46
  if not API_KEY:
47
  st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
48
- GEM = genai.Client(api_key=API_KEY)
49
 
50
- DG_KEY = os.getenv("DEEPGRAM_API_KEY") # optional (narration)
51
 
52
- # --- Session State shortcut ---
53
  st.session_state.setdefault("bundle", None)
 
54
 
55
  # ─────────────────────────────────────────────────────────────────────────────
56
- # HELPERS
57
  # ─────────────────────────────────────────────────────────────────────────────
58
- sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
59
-
60
- def validate_file_upload(f):
61
- errs=[]
62
- if f is None: errs.append("No file uploaded")
63
- elif f.size==0: errs.append("File is empty")
64
- elif f.size>50*1024*1024: errs.append("File >50 MB")
65
- if f and Path(f.name).suffix.lower() not in (".csv",".xlsx",".xls"):
66
- errs.append("Unsupported file type")
67
- return errs
68
-
69
  def load_dataframe_safely(buf: bytes, name: str):
70
  try:
71
  ext = Path(name).suffix.lower()
72
  df = pd.read_excel(io.BytesIO(buf)) if ext in (".xlsx", ".xls") else pd.read_csv(io.BytesIO(buf))
73
- if df.empty or len(df.columns) == 0: raise ValueError("File contains no data")
74
  df.columns = df.columns.astype(str).str.strip()
75
  df = df.dropna(how="all")
76
- if df.empty: raise ValueError("Rows all empty")
 
77
  return df, None
78
  except Exception as e:
79
  return None, str(e)
80
 
81
- def fix_bullet(t: str) -> str:
82
- return re.sub(r"[\x80-\x9f]", "", t) if isinstance(t, str) else t
83
-
84
- def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
85
  safe = df.copy()
86
  for c in safe.columns:
87
  if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
88
  safe[c] = safe[c].astype(safe[c].dtype.name.lower())
89
  return safe
90
 
91
- # —── DeepGram TTS ────────────────────────────────────────────────────────────
92
  @st.cache_data(show_spinner=False)
93
  def deepgram_tts(text: str):
94
- if not DG_KEY or not text: return None, None
 
95
  text = re.sub(r"[^\w\s.,!?;:-]", "", text)[:1000]
96
  try:
97
  r = requests.post(
@@ -106,148 +88,139 @@ def deepgram_tts(text: str):
106
  except Exception:
107
  return None, None
108
 
109
- def get_audio_duration(audio_file):
110
- """Return duration (seconds) of an audio file via ffprobe (fallback 5 s)."""
111
  try:
112
  out = subprocess.run(
113
- ['ffprobe', '-v', 'error', '-show_entries', 'format=duration',
114
- '-of', 'default=noprint_wrappers=1:nokey=1', audio_file],
115
- stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True
116
  ).stdout.strip()
117
  return float(out)
118
  except Exception:
119
  return 5.0
120
 
121
- # ─────────────────────────────────────────────────────────────────────────────
122
- # MARKDOWN TAG UTILS
123
- # ─────────────────────────────────────────────────────────────────────────────
124
  TAG_RE = re.compile(r'[<\[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>\]\'"”’]+?)["\']?\s*[>\]]', re.I)
125
  extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")))
126
- def repl_tags(txt: str, mp: dict, str_fn):
127
- """Replace generated-chart tags with something else (pdf/img injection)."""
128
- return TAG_RE.sub(lambda m: str_fn(mp[m.group("d").strip()]) if m.group("d").strip() in mp else m.group(0), txt)
129
 
130
  # ─────────────────────────────────────────────────────────────────────────────
131
- # PDF GENERATION (unchanged)
132
  # ─────────────────────────────────────────────────────────────────────────────
133
  class PDF(FPDF, HTMLMixin): pass
134
-
135
  def build_pdf(md, charts):
136
- md = fix_bullet(md).replace("", "*")
137
- md = repl_tags(md, charts, lambda p: f'<img src="{p}">')
138
- html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(md)
139
- pdf = PDF(); pdf.set_auto_page_break(True, margin=15)
140
- pdf.add_page()
141
- pdf.set_font("Arial", "B", 18); pdf.cell(0, 12, "AI-Generated Business Report", ln=True); pdf.ln(3)
142
- pdf.set_font("Arial", "", 11); pdf.write_html(html)
143
  return bytes(pdf.output(dest="S"))
144
 
145
  # ─────────────────────────────────────────────────────────────────────────────
146
- # VIDEO-ONLY ANIMATION HELPERS
147
  # ─────────────────────────────────────────────────────────────────────────────
148
- def animate_image_fade(img_cv2: np.ndarray, duration: float, out_path: Path, fps: int = FPS):
149
- """Simple fade-in from white background to the provided image."""
150
- frames = max(int(duration * fps), fps) # at least 1 s
151
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
152
- video = cv2.VideoWriter(str(out_path), fourcc, fps, (WIDTH, HEIGHT))
153
- blank = np.full_like(img_cv2, 255)
154
-
155
  for i in range(frames):
156
- alpha = i / frames
157
- frame = cv2.addWeighted(blank, 1 - alpha, img_cv2, alpha, 0)
158
  video.write(frame)
159
  video.release()
160
  return str(out_path)
161
 
162
- def animate_chart(desc: str, df: pd.DataFrame, duration: float, out_path: Path, fps: int = FPS) -> tuple[str, str]:
163
  """
164
- Build an animated chart clip matching *desc*.
165
- Returns (mp4_path, preview_png_path).
166
- Falls back to simple fade-in if animation fails.
167
  """
168
  try:
169
- # VERY rough heuristic parser
170
  desc_low = desc.lower()
171
- if ("bar race" in desc_low or "race" in desc_low) and HAS_BCR:
172
- # --------------- bar chart race ---------------------------------
173
- tmp_csv = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.csv"
174
- df.to_csv(tmp_csv, index=False)
175
  bcr.bar_chart_race(
176
- input_filename=tmp_csv,
177
- output_filename=str(out_path),
178
- n_bars=10,
179
- period_length=duration / df.shape[0] if df.shape[0] else 0.5,
180
- steps_per_period=3,
181
- dpi=144,
182
- fig=(WIDTH / 100, HEIGHT / 100),
183
- bar_label_font=4,
184
- fixed_order=False,
185
- interpolate_period=False,
186
- period_template='{x:.0f}',
187
  )
188
- tmp_csv.unlink(missing_ok=True)
189
- # grab first frame for preview
190
- cap = cv2.VideoCapture(str(out_path))
191
- ok, frame = cap.read(); cap.release()
192
- if ok:
193
- preview = Path(out_path.with_suffix(".png"))
194
- cv2.imwrite(str(preview), frame)
195
- return str(out_path), str(preview)
196
- raise RuntimeError("Could not capture preview")
 
 
 
 
 
 
 
 
 
197
  else:
198
- # --------------- generic line/bar growth using FuncAnimation ----
199
- # Pick numeric columns
200
- num_cols = df.select_dtypes(include=['number']).columns.tolist()
201
- if len(num_cols) < 1:
202
- raise ValueError("No numeric data to plot")
203
- col_y = num_cols[0]
204
- col_x = num_cols[1] if len(num_cols) > 1 else None
205
- fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
206
-
207
- if "bar" in desc_low:
208
- bars = ax.bar([], [])
209
- def update(frame_idx):
210
- frac = frame_idx / frames
211
- upto = int(len(df) * frac) or 1
212
- ydata = df[col_y].iloc[:upto]
213
- xdata = df[col_x].iloc[:upto] if col_x else np.arange(upto)
214
- ax.clear()
215
- ax.bar(xdata, ydata, color="#1f77b4")
216
- ax.set_title(desc); ax.grid(True, alpha=0.3)
217
- frames = max(int(duration * fps), fps)
218
- anim = FuncAnimation(fig, update, frames=frames, blit=False)
219
- else:
220
- line, = ax.plot([], [], lw=2)
221
- ax.set_xlim(df.index.min(), df.index.max() or len(df))
222
- ax.set_ylim(df[col_y].min(), df[col_y].max())
223
- ax.set_title(desc); ax.grid(True, alpha=0.3)
224
- def update(frame_idx):
225
- upto = int(len(df) * frame_idx / frames) or 1
226
- line.set_data(df.index[:upto], df[col_y].iloc[:upto])
227
- return line,
228
- frames = max(int(duration * fps), fps)
229
- anim = FuncAnimation(fig, update, frames=frames, blit=True)
230
-
231
- writer = FFMpegWriter(fps=fps, metadata=dict(artist='Sozo Studio'))
232
- anim.save(str(out_path), writer=writer, dpi=144)
233
- preview = Path(out_path.with_suffix(".png"))
234
- fig.savefig(preview, bbox_inches="tight", facecolor="white")
235
- plt.close('all')
236
- return str(out_path), str(preview)
237
  except Exception as e:
238
- # Fallback: simple fade-in on static chart generated by agent
239
- with st.spinner(f"Animation fallback due to {e}. Generating static image."):
240
  fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
241
  df.plot(ax=ax); ax.set_title(desc); ax.grid(alpha=0.3)
242
- png_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
243
- fig.savefig(png_path, bbox_inches="tight", facecolor="white"); plt.close('all')
244
- img = cv2.imread(str(png_path)); img = cv2.resize(img, (WIDTH, HEIGHT))
245
- mp4_path = Path(out_path)
246
- animate_image_fade(img, duration, mp4_path, fps=fps)
247
- return str(mp4_path), str(png_path)
 
 
 
 
 
 
 
 
 
 
248
 
249
  # ─────────────────────────────────────────────────────────────────────────────
250
- # REPORT GENERATION (unchanged)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  # ─────────────────────────────────────────────────────────────────────────────
252
  def generate_report_assets(key, buf, name, ctx):
253
  df, err = load_dataframe_safely(buf, name)
@@ -257,78 +230,46 @@ def generate_report_assets(key, buf, name, ctx):
257
  google_api_key=API_KEY, temperature=0.1)
258
  ctx_dict = {"shape": df.shape, "columns": list(df.columns),
259
  "user_ctx": ctx or "General business analysis"}
260
-
261
- report_md = llm.invoke(
262
- f"""You are a senior business analyst. Write an executive-level Markdown report
263
- with insights & recommendations. Use chart tags like <generate_chart: "description"> where helpful.
264
- Data Context: {json.dumps(ctx_dict, indent=2)}"""
265
  ).content
266
 
267
- chart_descs = extract_chart_tags(report_md)[:MAX_CHARTS]
268
- chart_paths = {}
 
269
  if chart_descs:
270
- ag = create_pandas_dataframe_agent(llm=llm, df=df, verbose=False,
271
- allow_dangerous_code=True)
272
  for d in chart_descs:
273
  with st.spinner(f"Generating chart: {d}"):
274
  with plt.ioff():
275
  try:
276
- ag.run(f"Create a {d} with Matplotlib and save.")
277
- fig = plt.gcf()
278
  if fig.axes:
279
  p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
280
  fig.savefig(p, dpi=300, bbox_inches="tight", facecolor="white")
281
- chart_paths[d] = str(p)
282
- plt.close("all")
283
- except:
284
  plt.close("all")
 
285
 
286
- md = fix_bullet(report_md)
287
- pdf = build_pdf(md, chart_paths)
288
- preview = repl_tags(md, chart_paths,
289
- lambda p: f'<img src="data:image/png;base64,{base64.b64encode(Path(p).read_bytes()).decode()}" style="max-width:100%;">')
290
-
291
- return {"type": "report", "preview": preview, "pdf": pdf,
292
- "report_md": md, "key": key}
293
 
294
  # ─────────────────────────────────────────────────────────────────────────────
295
- # VIDEO GENERATION (animated charts!)
296
  # ─────────────────────────────────────────────────────────────────────────────
297
- def generate_image_from_prompt(prompt, style):
298
- """Image placeholder using Gemini; falls back to gray canvas on error."""
299
- try:
300
- full_prompt = f"A professional, clean, illustrative image for a business presentation: {prompt}, in the style of {style}."
301
- response = GEM.generate_content(
302
- contents=full_prompt,
303
- model="models/gemini-1.5-flash-latest",
304
- generation_config={"response_mime_type": "image/png"}
305
- )
306
- img_bytes = response.parts[0].blob.data
307
- return Image.open(io.BytesIO(img_bytes)).convert("RGB")
308
- except Exception as e:
309
- st.warning(f"Illustrative image generation failed: {e}. Using placeholder.")
310
- return Image.new('RGB', (WIDTH, HEIGHT), color=(230, 230, 230))
311
-
312
- def concat_media(inputs, output_path, media_type="video"):
313
- """Concat list of mp4 or mp3 files using ffmpeg demuxer (copy, no re-encode)."""
314
- concat_list = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
315
- with open(concat_list, 'w') as f:
316
- for item in inputs:
317
- f.write(f"file '{Path(item).resolve()}'\n")
318
- codec_copy = 'copy'
319
- what = '-c:v' if media_type == "video" else '-c:a'
320
- subprocess.run(['ffmpeg', '-y', '-f', 'concat', '-safe', '0',
321
- '-i', str(concat_list), what, codec_copy, str(output_path)],
322
- check=True, capture_output=True)
323
- concat_list.unlink(missing_ok=True)
324
-
325
  def generate_video_assets(key, buf, name, ctx, style, animate_charts=True):
326
- # --- environment check ---
327
  try:
328
- subprocess.run(['ffmpeg', '-version'], check=True, capture_output=True)
329
- except (FileNotFoundError, subprocess.CalledProcessError):
330
- st.error("🔴 FFmpeg is not installed or not in your system's PATH. Video generation is not possible.")
331
- return None
332
 
333
  df, err = load_dataframe_safely(buf, name)
334
  if err: st.error(err); return None
@@ -337,188 +278,130 @@ def generate_video_assets(key, buf, name, ctx, style, animate_charts=True):
337
  google_api_key=API_KEY, temperature=0.2)
338
  ctx_dict = {"shape": df.shape, "columns": list(df.columns),
339
  "user_ctx": ctx or "General business analysis"}
340
-
341
- story_prompt = f"""Create a script for a short business video with exactly {VIDEO_SCENES} scenes.
342
- For each scene:
343
- 1. Write a concise narration (1–2 sentences).
344
- 2. If the data can be visualized for this scene, add a chart tag like <generate_chart: "bar chart of sales by region">.
345
- 3. Separate each scene with the marker `[SCENE_BREAK]`.
346
- Data Context: {json.dumps(ctx_dict, indent=2)}"""
347
-
348
- with st.spinner("Generating video script"):
349
- full_script = llm.invoke(story_prompt).content
350
- scenes = [s.strip() for s in full_script.split("[SCENE_BREAK]") if s.strip()]
351
-
352
- video_clips, audio_paths, temp_files = [], [], []
353
- ag = create_pandas_dataframe_agent(llm=llm, df=df,
354
- verbose=False, allow_dangerous_code=True)
355
-
356
- try:
357
- for i, scene_text in enumerate(scenes[:VIDEO_SCENES]):
358
- st.progress((i + 1) / VIDEO_SCENES, text=f"Processing Scene {i+1}/{VIDEO_SCENES}…")
359
-
360
- chart_descs = extract_chart_tags(scene_text)
361
- narrative = repl_tags(scene_text, {}, lambda _: "").strip()
362
-
363
- # 1. Generate Audio (always)
364
- audio_content, _ = deepgram_tts(narrative)
365
- if audio_content:
366
- audio_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
367
- audio_path.write_bytes(audio_content)
368
- audio_paths.append(str(audio_path))
369
- temp_files.append(audio_path)
370
- duration = get_audio_duration(str(audio_path))
371
- else:
372
- duration = 5.0 # fallback
373
-
374
- # 2. Generate Visual (clip)
375
- if chart_descs:
376
- d = chart_descs[0]
377
- with plt.ioff():
378
- try:
379
- ag.run(f"Create a {d} with Matplotlib and save.")
380
- fig = plt.gcf()
381
- if not fig.axes: raise ValueError("No axes")
382
- static_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
383
- fig.savefig(static_png, dpi=300, bbox_inches="tight", facecolor="white")
384
- plt.close("all")
385
- except Exception:
386
- plt.close("all")
387
- # fallback to illustrative image
388
- static_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
389
- generate_image_from_prompt(narrative, style).save(static_png)
390
-
391
- # Animate?
392
- if animate_charts:
393
- clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
394
- img = cv2.imread(str(static_png)); img = cv2.resize(img, (WIDTH, HEIGHT))
395
- animate_image_fade(img, duration, clip_path)
396
- video_clips.append(str(clip_path))
397
- temp_files.extend([static_png, clip_path])
398
- else:
399
- # Just still → Ken-Burns fade to duration seconds
400
- clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
401
- img = cv2.imread(str(static_png)); img = cv2.resize(img, (WIDTH, HEIGHT))
402
- animate_image_fade(img, duration, clip_path) # still a clip
403
- video_clips.append(str(clip_path))
404
- temp_files.extend([static_png, clip_path])
405
-
406
- else:
407
- # No chart; illustrative image
408
- static_img = generate_image_from_prompt(narrative, style)
409
- static_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
410
- static_img.save(static_png)
411
- clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
412
- img = cv2.cvtColor(np.array(static_img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
413
- animate_image_fade(img, duration, clip_path)
414
- video_clips.append(str(clip_path))
415
- temp_files.extend([static_png, clip_path])
416
-
417
- # --- Assemble video ---
418
- st.progress(1.0, text="Assembling video…")
419
- silent_video_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
420
- concat_media(video_clips, silent_video_path, media_type="video")
421
-
422
- # --- Concat audio ---
423
- audio_concat_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
424
- concat_media(audio_paths, audio_concat_path, media_type="audio")
425
-
426
- # --- Merge AV streams ---
427
- final_video_path = Path(tempfile.gettempdir()) / f"{key}.mp4"
428
- subprocess.run(['ffmpeg', '-y',
429
- '-i', str(silent_video_path),
430
- '-i', str(audio_concat_path),
431
- '-c:v', 'copy', '-c:a', 'aac',
432
- '-shortest', str(final_video_path)],
433
- check=True, capture_output=True)
434
-
435
- return {"type": "video", "video_path": str(final_video_path), "key": key}
436
-
437
- finally:
438
- # clean-up temps except final video
439
- for f in temp_files:
440
- f.unlink(missing_ok=True)
441
 
442
  # ─────────────────────────────────────────────────────────────────────────────
443
- # UI & MAIN WORKFLOW
444
  # ─────────────────────────────────────────────────────────────────────────────
445
  mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
446
 
447
- # Video options
448
- video_style = "professional illustration"
449
- animate_charts_on = True
450
  if mode == "Video Narrative":
451
  with st.sidebar:
452
  st.subheader("🎬 Video Options")
453
- video_style = st.selectbox("Visual Style",
 
454
  ["professional illustration", "minimalist infographic",
455
- "photorealistic", "cinematic", "data visualization aesthetic"])
456
- animate_charts_on = st.toggle("Animate Charts", value=True)
457
- st.caption("Disabling animation uses static slides with a quick fade-in.")
 
458
 
459
- # Common file uploader
460
  upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
461
  if upl:
462
- df_prev, _ = load_dataframe_safely(upl.getvalue(), upl.name)
463
  with st.expander("📊 Data Preview"):
464
- st.dataframe(arrow_df(df_prev.head()))
465
 
466
  ctx = st.text_area("Business context or specific instructions (optional)")
467
 
468
  if st.button("🚀 Generate", type="primary"):
469
  if not upl:
470
  st.warning("Please upload a file first."); st.stop()
471
-
472
- bkey = sha1_bytes(b"".join([upl.getvalue(), mode.encode(),
473
- ctx.encode(), video_style.encode(),
474
- str(animate_charts_on).encode()]))
475
-
476
  if mode == "Report (PDF)":
477
- with st.spinner("Generating report and charts…"):
478
- bundle = generate_report_assets(bkey, upl.getvalue(), upl.name, ctx)
479
- else: # Video
480
- bundle = generate_video_assets(bkey, upl.getvalue(), upl.name, ctx,
481
- video_style, animate_charts=animate_charts_on)
482
-
483
- st.session_state.bundle = bundle
484
  st.rerun()
485
 
486
  # ─────────────────────────────────────────────────────────────────────────────
487
- # DISPLAY AREA
488
  # ─────────────────────────────────────────────────────────────────────────────
489
  if st.session_state.get("bundle"):
490
  bundle = st.session_state.bundle
491
-
492
  if bundle.get("type") == "report":
493
  st.subheader("📄 Generated Report")
494
  with st.expander("View Report", expanded=True):
495
- if bundle["preview"]:
496
- st.markdown(bundle["preview"], unsafe_allow_html=True)
497
- c1, c2 = st.columns(2)
498
- with c1:
499
- st.download_button("Download PDF", bundle["pdf"],
500
- "business_report.pdf", "application/pdf",
501
- use_container_width=True)
502
- with c2:
503
- if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
504
- report_text = re.sub(r'<[^>]+>', '', bundle["report_md"])
505
- audio, mime = deepgram_tts(report_text)
506
- if audio:
507
- st.audio(audio, format=mime)
508
- else:
509
- st.error("Narration failed.")
510
- else:
511
- st.warning("No report content was generated.")
512
-
513
  elif bundle.get("type") == "video":
514
  st.subheader("🎬 Generated Video Narrative")
515
- video_path = bundle.get("video_path")
516
- if video_path and Path(video_path).exists():
517
- with open(video_path, "rb") as f:
518
  st.video(f.read())
519
- with open(video_path, "rb") as f:
520
  st.download_button("Download Video", f,
521
- f"sozo_narrative_{bundle['key'][:8]}.mp4",
522
- "video/mp4")
523
  else:
524
- st.error("Video file could not be found or generation failed.")
 
1
  ###############################################################################
2
  # Sozo Business Studio · AI transforms business data into compelling narratives
3
+ # (video branch now supports animated charts – PDF branch untouched)
4
  ###############################################################################
5
  import os, re, json, hashlib, uuid, base64, io, tempfile, wave, requests, subprocess
6
  from pathlib import Path
 
9
  import streamlit as st
10
  import pandas as pd
11
  import numpy as np
 
12
  import matplotlib
13
  matplotlib.use("Agg")
14
  import matplotlib.pyplot as plt
15
  from matplotlib.animation import FuncAnimation, FFMpegWriter
 
16
  from fpdf import FPDF, HTMLMixin
17
  from markdown_it import MarkdownIt
18
  from PIL import Image
19
+ import cv2 # video processing
20
+
21
+ try: # optional helper for bar-race
22
+ import bar_chart_race as bcr
23
  HAS_BCR = True
24
  except ImportError:
25
  HAS_BCR = False
26
 
27
  from langchain_experimental.agents import create_pandas_dataframe_agent
28
  from langchain_google_genai import ChatGoogleGenerativeAI
29
+ from google import genai # ← original import path
30
 
31
  # ─────────────────────────────────────────────────────────────────────────────
32
  # CONFIG & CONSTANTS
 
35
  st.title("📊 Sozo Business Studio")
36
  st.caption("AI transforms business data into compelling narratives.")
37
 
38
+ FPS, WIDTH, HEIGHT = 24, 1280, 720 # video parameters
39
+ MAX_CHARTS, VIDEO_SCENES = 5, 5
 
 
40
 
 
41
  API_KEY = os.getenv("GEMINI_API_KEY")
42
  if not API_KEY:
43
  st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
44
+ GEM = genai.Client(api_key=API_KEY) # ← still using Client pattern
45
 
46
+ DG_KEY = os.getenv("DEEPGRAM_API_KEY") # optional (narration)
47
 
 
48
  st.session_state.setdefault("bundle", None)
49
+ sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
50
 
51
  # ─────────────────────────────────────────────────────────────────────────────
52
+ # BASIC HELPERS
53
  # ─────────────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
54
  def load_dataframe_safely(buf: bytes, name: str):
55
  try:
56
  ext = Path(name).suffix.lower()
57
  df = pd.read_excel(io.BytesIO(buf)) if ext in (".xlsx", ".xls") else pd.read_csv(io.BytesIO(buf))
 
58
  df.columns = df.columns.astype(str).str.strip()
59
  df = df.dropna(how="all")
60
+ if df.empty or len(df.columns) == 0:
61
+ raise ValueError("No usable data found")
62
  return df, None
63
  except Exception as e:
64
  return None, str(e)
65
 
66
+ def arrow_df(df: pd.DataFrame):
 
 
 
67
  safe = df.copy()
68
  for c in safe.columns:
69
  if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
70
  safe[c] = safe[c].astype(safe[c].dtype.name.lower())
71
  return safe
72
 
 
73
  @st.cache_data(show_spinner=False)
74
  def deepgram_tts(text: str):
75
+ if not DG_KEY or not text:
76
+ return None, None
77
  text = re.sub(r"[^\w\s.,!?;:-]", "", text)[:1000]
78
  try:
79
  r = requests.post(
 
88
  except Exception:
89
  return None, None
90
 
91
+ def get_audio_duration(mp3_path: str) -> float:
 
92
  try:
93
  out = subprocess.run(
94
+ ["ffprobe", "-v", "error", "-show_entries", "format=duration",
95
+ "-of", "default=noprint_wrappers=1:nokey=1", mp3_path],
96
+ text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True
97
  ).stdout.strip()
98
  return float(out)
99
  except Exception:
100
  return 5.0
101
 
 
 
 
102
  TAG_RE = re.compile(r'[<\[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>\]\'"”’]+?)["\']?\s*[>\]]', re.I)
103
  extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")))
104
+ def repl_tags(txt: str, mp: dict, fn): # fn replaces tag text
105
+ return TAG_RE.sub(lambda m: fn(mp[m.group("d").strip()]) if m.group("d").strip() in mp else m.group(0), txt)
 
106
 
107
  # ─────────────────────────────────────────────────────────────────────────────
108
+ # PDF GENERATION (UNCHANGED)
109
  # ─────────────────────────────────────────────────────────────────────────────
110
  class PDF(FPDF, HTMLMixin): pass
 
111
  def build_pdf(md, charts):
112
+ html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(
113
+ repl_tags(md.replace("•", "*"), charts, lambda p: f'<img src="{p}">')
114
+ )
115
+ pdf = PDF(); pdf.set_auto_page_break(True, margin=15)
116
+ pdf.add_page(); pdf.set_font("Arial", "B", 18)
117
+ pdf.cell(0, 12, "AI-Generated Business Report", ln=True); pdf.ln(3)
118
+ pdf.set_font("Arial", "", 11); pdf.write_html(html)
119
  return bytes(pdf.output(dest="S"))
120
 
121
  # ─────────────────────────────────────────────────────────────────────────────
122
+ # GENERIC ANIMATION HELPERS (VIDEO PATH ONLY)
123
  # ─────────────────────────────────────────────────────────────────────────────
124
+ def animate_image_fade(img_cv2: np.ndarray, duration: float, out_path: Path, fps: int = FPS) -> str:
125
+ frames = max(int(duration * fps), fps) # at least 1 second
126
+ video = cv2.VideoWriter(str(out_path), cv2.VideoWriter_fourcc(*"mp4v"), fps, (WIDTH, HEIGHT))
127
+ blank = np.full_like(img_cv2, 255)
 
 
 
128
  for i in range(frames):
129
+ alpha = i / frames
130
+ frame = cv2.addWeighted(blank, 1 - alpha, img_cv2, alpha, 0)
131
  video.write(frame)
132
  video.release()
133
  return str(out_path)
134
 
135
+ def animate_chart(desc: str, df: pd.DataFrame, duration: float, out_path: Path, fps: int = FPS) -> str:
136
  """
137
+ Build an animated chart matching *desc*; returns mp4 path.
138
+ Falls back to simple fade animation if something fails.
 
139
  """
140
  try:
 
141
  desc_low = desc.lower()
142
+ # --- bar chart race --------------------------------------------------
143
+ if ("race" in desc_low or "bar race" in desc_low) and HAS_BCR:
144
+ tmpcsv = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.csv"
145
+ df.to_csv(tmpcsv, index=False)
146
  bcr.bar_chart_race(
147
+ input_filename=tmpcsv, output_filename=str(out_path),
148
+ n_bars=10, period_length=duration / max(len(df), 1),
149
+ dpi=144, fig=(WIDTH / 100, HEIGHT / 100)
 
 
 
 
 
 
 
 
150
  )
151
+ tmpcsv.unlink(missing_ok=True)
152
+ return str(out_path)
153
+ # --- generic line / bar growth --------------------------------------
154
+ num_cols = df.select_dtypes(include=['number']).columns.tolist()
155
+ if not num_cols:
156
+ raise ValueError("No numeric data")
157
+ col_y = num_cols[0]
158
+ col_x = num_cols[1] if len(num_cols) > 1 else None
159
+ fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
160
+ frames = max(int(duration * fps), fps)
161
+
162
+ if "bar" in desc_low:
163
+ def update(i):
164
+ frac = i / frames
165
+ upto = max(int(len(df) * frac), 1)
166
+ ax.clear(); ax.bar(df[col_x].iloc[:upto] if col_x else np.arange(upto),
167
+ df[col_y].iloc[:upto], color="#1f77b4")
168
+ ax.set_title(desc); ax.grid(alpha=0.3)
169
  else:
170
+ line, = ax.plot([], [], lw=2)
171
+ ax.set_xlim(0, len(df)-1); ax.set_ylim(df[col_y].min(), df[col_y].max())
172
+ ax.set_title(desc); ax.grid(alpha=0.3)
173
+ def update(i):
174
+ upto = max(int(len(df) * i / frames), 1)
175
+ line.set_data(np.arange(upto), df[col_y].iloc[:upto])
176
+ return line,
177
+
178
+ anim = FuncAnimation(fig, update, frames=frames, blit=("bar" not in desc_low))
179
+ writer = FFMpegWriter(fps=fps, metadata=dict(artist="Sozo Studio"))
180
+ anim.save(str(out_path), writer=writer, dpi=144); plt.close('all')
181
+ return str(out_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  except Exception as e:
183
+ # fallback static image fade
184
+ with plt.ioff():
185
  fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
186
  df.plot(ax=ax); ax.set_title(desc); ax.grid(alpha=0.3)
187
+ png_tmp = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
188
+ fig.savefig(png_tmp, bbox_inches="tight", facecolor="white"); plt.close('all')
189
+ img = cv2.resize(cv2.imread(str(png_tmp)), (WIDTH, HEIGHT))
190
+ return animate_image_fade(img, duration, out_path, fps)
191
+
192
+ def concat_media(inputs, output, kind="video"):
193
+ lst = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
194
+ with lst.open("w") as f:
195
+ for p in inputs:
196
+ f.write(f"file '{Path(p).resolve()}'\n")
197
+ subprocess.run(
198
+ ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(lst),
199
+ "-c:v" if kind == "video" else "-c:a", "copy", str(output)],
200
+ check=True, capture_output=True
201
+ )
202
+ lst.unlink(missing_ok=True)
203
 
204
  # ─────────────────────────────────────────────────────────────────────────────
205
+ # IMAGE GENERATION (keeps original Client.generate_content call)
206
+ # ─────────────────────────────────────────────────────────────────────────────
207
+ def generate_image_from_prompt(prompt, style):
208
+ try:
209
+ full_prompt = (f"A professional, clean, illustrative image for a business presentation: "
210
+ f"{prompt}, in the style of {style}.")
211
+ response = GEM.generate_content(
212
+ contents=full_prompt,
213
+ model="models/gemini-1.5-flash-latest",
214
+ generation_config={"response_mime_type": "image/png"},
215
+ )
216
+ img_bytes = response.parts[0].blob.data
217
+ return Image.open(io.BytesIO(img_bytes)).convert("RGB")
218
+ except Exception as e:
219
+ st.warning(f"Illustrative image generation failed: {e}. Using placeholder.")
220
+ return Image.new("RGB", (WIDTH, HEIGHT), color=(230, 230, 230))
221
+
222
+ # ─────────────────────────────────────────────────────────────────────────────
223
+ # REPORT GENERATION (UNCHANGED)
224
  # ─────────────────────────────────────────────────────────────────────────────
225
  def generate_report_assets(key, buf, name, ctx):
226
  df, err = load_dataframe_safely(buf, name)
 
230
  google_api_key=API_KEY, temperature=0.1)
231
  ctx_dict = {"shape": df.shape, "columns": list(df.columns),
232
  "user_ctx": ctx or "General business analysis"}
233
+ md = llm.invoke(
234
+ "You are a senior business analyst. Write an executive-level Markdown report "
235
+ "with insights & recommendations. Use chart tags like <generate_chart: \"description\"> where helpful.\n"
236
+ f"Data Context: {json.dumps(ctx_dict, indent=2)}"
 
237
  ).content
238
 
239
+ # Replace tags with static charts
240
+ chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
241
+ charts = {}
242
  if chart_descs:
243
+ ag = create_pandas_dataframe_agent(llm=llm, df=df, verbose=False, allow_dangerous_code=True)
 
244
  for d in chart_descs:
245
  with st.spinner(f"Generating chart: {d}"):
246
  with plt.ioff():
247
  try:
248
+ ag.run(f"Create a {d} with Matplotlib and save."); fig = plt.gcf()
 
249
  if fig.axes:
250
  p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
251
  fig.savefig(p, dpi=300, bbox_inches="tight", facecolor="white")
252
+ charts[d] = str(p)
 
 
253
  plt.close("all")
254
+ except: plt.close("all")
255
 
256
+ preview = repl_tags(
257
+ md, charts,
258
+ lambda p: f'<img src="data:image/png;base64,{base64.b64encode(Path(p).read_bytes()).decode()}" '
259
+ f'style="max-width:100%;">'
260
+ )
261
+ pdf = build_pdf(md, charts)
262
+ return {"type": "report", "preview": preview, "pdf": pdf, "report_md": md, "key": key}
263
 
264
  # ─────────────────────────────────────────────────────────────────────────────
265
+ # VIDEO GENERATION (ANIMATED CHARTS)
266
  # ─────────────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  def generate_video_assets(key, buf, name, ctx, style, animate_charts=True):
268
+ # FFmpeg presence
269
  try:
270
+ subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
271
+ except Exception:
272
+ st.error("🔴 FFmpeg not available cannot render video."); return None
 
273
 
274
  df, err = load_dataframe_safely(buf, name)
275
  if err: st.error(err); return None
 
278
  google_api_key=API_KEY, temperature=0.2)
279
  ctx_dict = {"shape": df.shape, "columns": list(df.columns),
280
  "user_ctx": ctx or "General business analysis"}
281
+ script = llm.invoke(
282
+ f"Create a script for a short business video with exactly {VIDEO_SCENES} scenes.\n"
283
+ "For each scene:\n"
284
+ "1. Write a concise narration (1–2 sentences).\n"
285
+ "2. If the data can be visualised, add a chart tag like <generate_chart: \"bar chart of sales by region\">.\n"
286
+ "3. Separate each scene with the marker [SCENE_BREAK].\n"
287
+ f"Data Context: {json.dumps(ctx_dict, indent=2)}"
288
+ ).content
289
+ scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
290
+
291
+ video_parts, audio_parts, temps = [], [], []
292
+
293
+ for idx, scene in enumerate(scenes[:VIDEO_SCENES]):
294
+ st.progress((idx + 1) / VIDEO_SCENES, text=f"Processing Scene {idx+1}/{VIDEO_SCENES}…")
295
+ chart_tags = extract_chart_tags(scene)
296
+ narrative = repl_tags(scene, {}, lambda _: "").strip()
297
+
298
+ # Audio
299
+ audio_bytes, _ = deepgram_tts(narrative)
300
+ audio_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
301
+ (audio_path.write_bytes(audio_bytes) if audio_bytes else None)
302
+ duration = get_audio_duration(str(audio_path)) if audio_bytes else 5.0
303
+ audio_parts.append(str(audio_path)); temps.append(audio_path)
304
+
305
+ # Video
306
+ if chart_tags and animate_charts:
307
+ clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
308
+ animate_chart(chart_tags[0], df, duration, clip_path, FPS)
309
+ video_parts.append(str(clip_path)); temps.append(clip_path)
310
+ else:
311
+ # illustrative image fade
312
+ img = generate_image_from_prompt(narrative, style)
313
+ png_tmp = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
314
+ img.save(png_tmp); temps.append(png_tmp)
315
+ clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
316
+ animate_image_fade(
317
+ cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR),
318
+ duration, clip_path, FPS
319
+ )
320
+ video_parts.append(str(clip_path)); temps.append(clip_path)
321
+
322
+ # Concatenate media
323
+ silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
324
+ concat_media(video_parts, silent_vid, "video")
325
+ audio_mix = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
326
+ concat_media(audio_parts, audio_mix, "audio")
327
+
328
+ final_vid = Path(tempfile.gettempdir()) / f"{key}.mp4"
329
+ subprocess.run(
330
+ ["ffmpeg", "-y", "-i", str(silent_vid), "-i", str(audio_mix),
331
+ "-c:v", "copy", "-c:a", "aac", "-shortest", str(final_vid)],
332
+ check=True, capture_output=True
333
+ )
334
+ return {"type": "video", "video_path": str(final_vid), "key": key}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
  # ─────────────────────────────────────────────────────────────────────────────
337
+ # UI
338
  # ─────────────────────────────────────────────────────────────────────────────
339
  mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
340
 
341
+ video_style, animate_charts_flag = "professional illustration", True
 
 
342
  if mode == "Video Narrative":
343
  with st.sidebar:
344
  st.subheader("🎬 Video Options")
345
+ video_style = st.selectbox(
346
+ "Visual Style",
347
  ["professional illustration", "minimalist infographic",
348
+ "photorealistic", "cinematic", "data visualization aesthetic"]
349
+ )
350
+ animate_charts_flag = st.toggle("Animate Charts", value=True)
351
+ st.caption("Disable to use static slides with a simple fade-in.")
352
 
 
353
  upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
354
  if upl:
355
+ df_sample, _ = load_dataframe_safely(upl.getvalue(), upl.name)
356
  with st.expander("📊 Data Preview"):
357
+ st.dataframe(arrow_df(df_sample.head()))
358
 
359
  ctx = st.text_area("Business context or specific instructions (optional)")
360
 
361
  if st.button("🚀 Generate", type="primary"):
362
  if not upl:
363
  st.warning("Please upload a file first."); st.stop()
364
+ bkey = sha1_bytes(b"".join([
365
+ upl.getvalue(), mode.encode(), ctx.encode(),
366
+ video_style.encode(), str(animate_charts_flag).encode()
367
+ ]))
 
368
  if mode == "Report (PDF)":
369
+ with st.spinner("Generating report…"):
370
+ st.session_state.bundle = generate_report_assets(bkey, upl.getvalue(), upl.name, ctx)
371
+ else:
372
+ st.session_state.bundle = generate_video_assets(
373
+ bkey, upl.getvalue(), upl.name, ctx,
374
+ video_style, animate_charts_flag
375
+ )
376
  st.rerun()
377
 
378
  # ─────────────────────────────────────────────────────────────────────────────
379
+ # OUTPUT
380
  # ─────────────────────────────────────────────────────────────────────────────
381
  if st.session_state.get("bundle"):
382
  bundle = st.session_state.bundle
 
383
  if bundle.get("type") == "report":
384
  st.subheader("📄 Generated Report")
385
  with st.expander("View Report", expanded=True):
386
+ st.markdown(bundle["preview"], unsafe_allow_html=True)
387
+ c1, c2 = st.columns(2)
388
+ with c1:
389
+ st.download_button("Download PDF", bundle["pdf"],
390
+ "business_report.pdf", "application/pdf",
391
+ use_container_width=True)
392
+ with c2:
393
+ if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
394
+ txt = re.sub(r"<[^>]+>", "", bundle["report_md"])
395
+ audio, mime = deepgram_tts(txt)
396
+ st.audio(audio, format=mime) if audio else st.error("Narration failed.")
 
 
 
 
 
 
 
397
  elif bundle.get("type") == "video":
398
  st.subheader("🎬 Generated Video Narrative")
399
+ vp = bundle["video_path"]
400
+ if Path(vp).exists():
401
+ with open(vp, "rb") as f:
402
  st.video(f.read())
403
+ with open(vp, "rb") as f:
404
  st.download_button("Download Video", f,
405
+ f"sozo_narrative_{bundle['key'][:8]}.mp4", "video/mp4")
 
406
  else:
407
+ st.error("Video file missing generation failed.")