rairo commited on
Commit
9823ff4
·
verified ·
1 Parent(s): 0c4e8d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -327
app.py CHANGED
@@ -1,18 +1,14 @@
1
- ###############################################################################
2
  # Sozo Business Studio · 10-Jul-2025
3
- # • Restores PDF branch alongside fixed Video branch
4
- # • Shared chart-tag grammar across both paths
5
- # • Narrator text cleans scene labels + chart talk
6
- # • Matplotlib animation starts from blank; artists returned (blit=True)
7
- # • Gemini Flash-preview image gen with placeholder fallback
8
- # • Silent-audio fallback keeps mux lengths equal
9
- # • NEW (2025-07-06): Lazy-loading of PDF charts + st.rerun()
10
  ##############################################################################
11
 
12
  import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
13
  from pathlib import Path
14
  from typing import Tuple, Dict, List
15
- from concurrent.futures import ThreadPoolExecutor
16
 
17
  import streamlit as st
18
  import pandas as pd
@@ -49,11 +45,8 @@ DG_KEY = os.getenv("DEEPGRAM_API_KEY") # optional narration
49
 
50
  sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
51
 
52
- # ─── LAZY-LOADING SCAFFOLDING ──────────────────────────────────────────────
53
- EXEC = ThreadPoolExecutor(max_workers=4) # parallel chart threads
54
- if "lazy_reports" not in st.session_state: # key → report dict
55
- st.session_state.lazy_reports = {}
56
- st.session_state.setdefault("bundle", None) # video branch
57
 
58
  # ─── HELPERS ───────────────────────────────────────────────────────────────
59
  def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
@@ -101,52 +94,22 @@ def deepgram_tts(txt: str) -> Tuple[bytes, str]:
101
 
102
  def generate_silence_mp3(duration: float, out: Path):
103
  subprocess.run(
104
- [
105
- "ffmpeg",
106
- "-y",
107
- "-f",
108
- "lavfi",
109
- "-i",
110
- "anullsrc=r=44100:cl=mono",
111
- "-t",
112
- f"{duration:.3f}",
113
- "-q:a",
114
- "9",
115
- str(out),
116
- ],
117
- check=True,
118
- capture_output=True,
119
  )
120
 
121
  def audio_duration(path: str) -> float:
122
  try:
123
  res = subprocess.run(
124
- [
125
- "ffprobe",
126
- "-v",
127
- "error",
128
- "-show_entries",
129
- "format=duration",
130
- "-of",
131
- "default=nw=1:nk=1",
132
- path,
133
- ],
134
- text=True,
135
- stdout=subprocess.PIPE,
136
- stderr=subprocess.PIPE,
137
- check=True,
138
  )
139
  return float(res.stdout.strip())
140
  except Exception:
141
  return 5.0
142
 
143
- TAG_RE = re.compile(
144
- r'[<[]\s*generate_?chart\s*[:=]?\s*[\"\'“”]?(?P<d>[^>\"\'”\]]+?)[\"\'“”]?\s*[>\]]',
145
- re.I,
146
- )
147
- extract_chart_tags = lambda t: list(
148
- dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or ""))
149
- )
150
 
151
  re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I)
152
  def clean_narration(txt: str) -> str:
@@ -166,8 +129,7 @@ def generate_image_from_prompt(prompt: str) -> Image.Image:
166
 
167
  def fetch(model_name):
168
  res = GEM.models.generate_content(
169
- model=model_name,
170
- contents=full_prompt,
171
  config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
172
  )
173
  for part in res.candidates[0].content.parts:
@@ -182,11 +144,9 @@ def generate_image_from_prompt(prompt: str) -> Image.Image:
182
  return placeholder_img()
183
 
184
  # ─── PDF GENERATION ────────────────────────────────────────────────────────
185
- class PDF(FPDF, HTMLMixin):
186
- pass
187
 
188
  def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
189
- # For robust PDF creation, embed images as base64 data URIs
190
  def embed_chart_for_pdf(match):
191
  desc = match.group("d").strip()
192
  path = charts.get(desc)
@@ -235,33 +195,25 @@ def quick_chart(desc: str, df: pd.DataFrame, out: Path):
235
  fig.savefig(out, bbox_inches="tight", facecolor="white")
236
  plt.close(fig)
237
 
238
- # ─── REPORT (STEP 1) prepare markdown instantly ────────────────────────
239
- def prepare_report(buf: bytes, name: str, ctx: str):
 
 
 
 
240
  df, err = load_dataframe_safely(buf, name)
241
  if err:
242
  st.error(err)
243
- return None, None, None
244
 
245
- llm = ChatGoogleGenerativeAI(
246
- model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1
247
- )
248
 
249
- # ─── original enhanced context & prompt (UNTOUCHED) ───────────────────
250
  ctx_dict = {
251
- "shape": df.shape,
252
- "columns": list(df.columns),
253
- "user_ctx": ctx or "General business analysis",
254
  "full_dataframe": df.to_dict("records"),
255
- "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
256
- "missing_values": {
257
- col: int(count) for col, count in df.isnull().sum().to_dict().items()
258
- },
259
- "numeric_summary": {
260
- col: {stat: float(val) for stat, val in stats.items()}
261
- for col, stats in df.describe().to_dict().items()
262
- }
263
- if len(df.select_dtypes(include=["number"]).columns) > 0
264
- else {},
265
  }
266
  cols = ", ".join(ctx_dict["columns"][:6])
267
 
@@ -307,76 +259,56 @@ def prepare_report(buf: bytes, name: str, ctx: str):
307
 
308
  Generate insights that would be valuable to C-level executives and department heads.
309
  """
310
- # ─── end original prompt ───────────────────────────────────────────────
311
-
312
  md = llm.invoke(report_prompt).content
313
  chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
314
- return df, md, chart_descs
315
 
316
- # ─── REPORT (STEP 2) — background worker per chart ───────────────────────
317
- def render_chart_worker(rep_key: str, desc: str):
318
- """Generate one chart (LLM + fallback)."""
319
- rep = st.session_state.lazy_reports[rep_key]
320
- df = rep["df"]
321
-
322
- img_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
323
- try:
324
- agent = create_pandas_dataframe_agent(
325
- llm=ChatGoogleGenerativeAI(
326
- model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1
327
- ),
328
- df=df,
329
- verbose=False,
330
- allow_dangerous_code=True,
331
- )
332
- chart_prompt = f"""
333
- Create a professional {desc} chart using matplotlib with these requirements:
334
- 1. Use a clean, business-appropriate style
335
- 2. Include proper title, axis labels, and legends
336
- 3. Apply appropriate color schemes (avoid rainbow colors)
337
- 4. Ensure text is readable (font size 10+)
338
- 5. Format numbers appropriately (e.g., currency, percentages)
339
- 6. Save the figure with high quality
340
- 7. Handle any missing or null values appropriately
341
- """
342
- agent.run(chart_prompt)
343
- if not img_path.exists():
344
- raise RuntimeError("LLM did not save figure")
345
- except Exception:
346
- try:
347
- quick_chart(desc, df, img_path)
348
- except Exception:
349
- img_path = None
 
350
 
351
- rep["charts"][desc] = str(img_path) if img_path and img_path.exists() else ""
352
- rep["pending"].discard(desc)
353
 
354
- if not rep["pending"]:
355
- rep["pdf"] = build_pdf(rep["md"], rep["charts"])
356
- rep["finished"] = True
357
- st.rerun()
358
 
359
- # ─── FIXED Helper: inline image or text placeholder for preview ───────────
360
- def _substitute_chart_tags_for_preview(rep, desc):
361
- """
362
- Returns an HTML <img> tag for a completed chart or a markdown placeholder.
363
- This function is used by re.sub to render the live report preview.
364
- The img tag styling is based on the working reference script for robustness.
365
- """
366
- path = rep["charts"].get(desc)
367
- if path and Path(path).exists():
368
- b64 = base64.b64encode(Path(path).read_bytes()).decode()
369
- # The style attribute is crucial for responsive rendering on all platforms.
370
- return f'<img src="data:image/png;base64,{b64}" style="max-width:100%;">'
371
- # A textual placeholder is safer and more informative than a broken/styled <img> tag.
372
- return f'\n\n> *⏳ Rendering chart: "{desc}"...*\n\n'
373
-
374
- # ─── ANIMATION HELPERS (unchanged) ────────────────────────────────────────
375
- def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path,
376
- fps: int = FPS) -> str:
377
  frames = max(int(dur * fps), fps)
378
- vid = cv2.VideoWriter(str(out), cv2.VideoWriter_fourcc(*"mp4v"),
379
- fps, (WIDTH, HEIGHT))
380
  blank = np.full_like(img_cv2, 255)
381
  for i in range(frames):
382
  a = i / frames
@@ -384,8 +316,7 @@ def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path,
384
  vid.release()
385
  return str(out)
386
 
387
- def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path,
388
- fps: int = FPS) -> str:
389
  """Render an animated chart whose clip length equals `dur`."""
390
  ctype, *rest = [s.strip().lower() for s in desc.split("|", 1)]
391
  ctype = ctype or "bar"
@@ -410,7 +341,6 @@ def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path,
410
  if ctype == "pie":
411
  wedges, _ = ax.pie(plot_df, labels=plot_df.index, startangle=90)
412
  ax.set_title(title)
413
-
414
  def init(): [w.set_alpha(0) for w in wedges]; return wedges
415
  def update(i):
416
  a = i / (frames - 1)
@@ -420,18 +350,15 @@ def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path,
420
  elif ctype == "bar":
421
  bars = ax.bar(plot_df.index, np.zeros_like(plot_df.values), color="#1f77b4")
422
  ax.set_ylim(0, plot_df.max() * 1.1); ax.set_title(title)
423
-
424
  def init(): return bars
425
  def update(i):
426
  a = i / (frames - 1)
427
- for b, h in zip(bars, plot_df.values):
428
- b.set_height(h * a)
429
  return bars
430
 
431
  elif ctype == "hist":
432
  _, _, patches = ax.hist(plot_df, bins=20, color="#1f77b4", alpha=0)
433
  ax.set_title(title)
434
-
435
  def init(): [p.set_alpha(0) for p in patches]; return patches
436
  def update(i):
437
  a = i / (frames - 1)
@@ -439,42 +366,30 @@ def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path,
439
  return patches
440
 
441
  elif ctype == "scatter":
442
- pts = ax.scatter(plot_df.iloc[:, 0], plot_df.iloc[:, 1],
443
- s=10, alpha=0)
444
  ax.set_title(title); ax.grid(alpha=.3)
445
-
446
  def init(): pts.set_alpha(0); return [pts]
447
- def update(i):
448
- pts.set_alpha(i / (frames - 1)); return [pts]
449
 
450
  else: # line
451
  line, = ax.plot([], [], lw=2)
452
- x_full = (plot_df.iloc[:, 0] if plot_df.shape[1] > 1
453
- else np.arange(len(plot_df)))
454
- y_full = (plot_df.iloc[:, 1] if plot_df.shape[1] > 1
455
- else plot_df.iloc[:, 0])
456
- ax.set_xlim(x_full.min(), x_full.max())
457
- ax.set_ylim(y_full.min(), y_full.max())
458
  ax.set_title(title); ax.grid(alpha=.3)
459
-
460
  def init(): line.set_data([], []); return [line]
461
  def update(i):
462
  k = max(2, int(len(x_full) * i / (frames - 1)))
463
  line.set_data(x_full[:k], y_full.iloc[:k])
464
  return [line]
465
 
466
- anim = FuncAnimation(fig, update, init_func=init,
467
- frames=frames, blit=True,
468
- interval=1000 / fps)
469
- anim.save(str(out),
470
- writer=FFMpegWriter(fps=fps, metadata={'artist':'Sozo'}),
471
- dpi=144)
472
  plt.close(fig)
473
  return str(out)
474
 
475
  def safe_chart(desc, df, dur, out):
476
- try:
477
- return animate_chart(desc, df, dur, out)
478
  except Exception:
479
  with plt.ioff():
480
  df.plot(ax=plt.gca())
@@ -484,33 +399,18 @@ def safe_chart(desc, df, dur, out):
484
  return animate_image_fade(img, dur, out)
485
 
486
  def concat_media(paths: List[str], out: Path, kind="video"):
487
- if not paths:
488
- return
489
  lst = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
490
  with lst.open("w") as f:
491
  for p in paths:
492
- if Path(p).exists():
493
- f.write(f"file '{Path(p).resolve()}'\n")
494
  subprocess.run(
495
- [
496
- "ffmpeg",
497
- "-y",
498
- "-f",
499
- "concat",
500
- "-safe",
501
- "0",
502
- "-i",
503
- str(lst),
504
- "-c:v" if kind == "video" else "-c:a",
505
- "copy",
506
- str(out),
507
- ],
508
- check=True,
509
- capture_output=True,
510
  )
511
  lst.unlink(missing_ok=True)
512
 
513
- # ─── VIDEO GENERATION (original prompt & logic) ────────────────────────────
514
  def build_story_prompt(ctx_dict):
515
  cols = ", ".join(ctx_dict["columns"][:6])
516
  return f"""
@@ -568,108 +468,54 @@ def build_story_prompt(ctx_dict):
568
  """
569
 
570
  def generate_video(buf: bytes, name: str, ctx: str, key: str):
571
- try:
572
- subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
573
- except Exception:
574
- st.error("🔴 FFmpeg not available — cannot render video.")
575
- return None
576
 
577
  df, err = load_dataframe_safely(buf, name)
578
- if err:
579
- st.error(err)
580
- return None
581
-
582
- llm = ChatGoogleGenerativeAI(
583
- model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.2
584
- )
585
 
 
586
  ctx_dict = {
587
- "shape": df.shape,
588
- "columns": list(df.columns),
589
- "user_ctx": ctx or "General business analysis",
590
  "full_dataframe": df.to_dict("records"),
591
  "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
592
- "numeric_summary": {
593
- col: {stat: float(val) for stat, val in stats.items()}
594
- for col, stats in df.describe().to_dict().items()
595
- }
596
- if len(df.select_dtypes(include=["number"]).columns) > 0
597
- else {},
598
  }
599
-
600
  script = llm.invoke(build_story_prompt(ctx_dict)).content
601
  scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
602
 
603
  video_parts, audio_parts, temps = [], [], []
604
  for idx, sc in enumerate(scenes[:VIDEO_SCENES]):
605
- st.progress(
606
- (idx + 1) / VIDEO_SCENES,
607
- text=f"Rendering Scene {idx + 1}/{VIDEO_SCENES}",
608
- )
609
-
610
- descs = extract_chart_tags(sc)
611
- narrative = clean_narration(sc)
612
-
613
- # audio
614
  audio_bytes, _ = deepgram_tts(narrative)
615
  mp3 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
616
- if audio_bytes:
617
- mp3.write_bytes(audio_bytes)
618
- dur = audio_duration(str(mp3))
619
- else:
620
- dur = 5.0
621
- generate_silence_mp3(dur, mp3)
622
- audio_parts.append(str(mp3))
623
- temps.append(mp3)
624
 
625
- # visual
626
  mp4 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
627
- if descs:
628
- safe_chart(descs[0], df, dur, mp4)
629
  else:
630
  img = generate_image_from_prompt(narrative)
631
- img_cv = cv2.cvtColor(
632
- np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR
633
- )
634
  animate_image_fade(img_cv, dur, mp4)
635
- video_parts.append(str(mp4))
636
- temps.append(mp4)
637
 
638
- # concat
639
- silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
640
  concat_media(video_parts, silent_vid, "video")
641
- audio_mix = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
642
  concat_media(audio_parts, audio_mix, "audio")
643
-
644
  final_vid = Path(tempfile.gettempdir()) / f"{key}.mp4"
645
  subprocess.run(
646
- [
647
- "ffmpeg",
648
- "-y",
649
- "-i",
650
- str(silent_vid),
651
- "-i",
652
- str(audio_mix),
653
- "-c:v",
654
- "copy",
655
- "-c:a",
656
- "aac",
657
- "-shortest",
658
- str(final_vid),
659
- ],
660
- check=True,
661
- capture_output=True,
662
  )
663
-
664
- for p in temps + [silent_vid, audio_mix]:
665
- p.unlink(missing_ok=True)
666
-
667
  return str(final_vid)
668
 
669
- # ─── UI ────────────────────────────────────────────────────────────────────
670
- mode = st.radio(
671
- "Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True
672
- )
673
 
674
  upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
675
  if upl:
@@ -679,81 +525,48 @@ if upl:
679
 
680
  ctx = st.text_area("Business context or specific instructions (optional)")
681
 
682
- # ─── Generate button ──────────────────────────────────────────────────────
683
  if st.button("🚀 Generate", type="primary", disabled=not upl):
684
  key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
 
685
 
686
  if mode == "Report (PDF)":
687
- df, md, chart_descs = prepare_report(upl.getvalue(), upl.name, ctx)
688
- if df is None:
689
- st.stop()
690
-
691
- st.session_state.lazy_reports[key] = {
692
- "df": df,
693
- "md": md,
694
- "charts": {},
695
- "pending": set(chart_descs),
696
- "finished": False,
697
- }
698
- for d in chart_descs:
699
- EXEC.submit(render_chart_worker, key, d)
700
-
701
- st.rerun()
702
-
703
- else: # video branch
704
- st.session_state.bundle = None
705
- path = generate_video(upl.getvalue(), upl.name, ctx, key)
706
- if path:
707
- st.session_state.bundle = {"type": "video", "video_path": path, "key": key}
708
- st.rerun()
709
-
710
- # ─── OUTPUT (with fixed preview rendering) ────────────────────────────────
711
- # 1) live PDF reports (may be multiple)
712
- for rep_key, rep in st.session_state.lazy_reports.items():
713
- st.subheader("📄 Generated Report")
714
- with st.expander("View Report", expanded=True):
715
- # This robust method substitutes tags with base64 <img> tags for completed
716
- # charts or a text placeholder for pending ones. This ensures correct rendering
717
- # of the interleaved text and images, as guided by the working example.
718
- md_with_imgs = TAG_RE.sub(
719
- lambda m: _substitute_chart_tags_for_preview(rep, m.group("d").strip()), rep["md"]
720
- )
721
- st.markdown(md_with_imgs, unsafe_allow_html=True)
722
 
723
- if rep["finished"]:
724
  c1, c2 = st.columns(2)
725
  with c1:
726
  st.download_button(
727
- "Download PDF",
728
- rep["pdf"],
729
- f"business_report_{rep_key[:8]}.pdf",
730
- "application/pdf",
731
- use_container_width=True,
732
  )
733
  with c2:
734
- if DG_KEY and st.button("🔊 Narrate Summary", key=f"aud_{rep_key}"):
735
- txt = re.sub(r"<[^>]+>", "", rep["md"])
736
  audio, mime = deepgram_tts(txt)
737
- if audio:
738
- st.audio(audio, format=mime)
739
- else:
740
- st.error("Narration failed.")
741
- else:
742
- st.info("Charts are still rendering… feel free to keep browsing.")
743
-
744
- # 2) video branch output
745
- if (bundle := st.session_state.get("bundle")) and bundle.get("type") == "video":
746
- st.subheader("🎬 Generated Video Narrative")
747
- vp = bundle["video_path"]
748
- if Path(vp).exists():
749
- with open(vp, "rb") as f:
750
- st.video(f.read())
751
- with open(vp, "rb") as f:
752
- st.download_button(
753
- "Download Video",
754
- f,
755
- f"sozo_narrative_{bundle['key'][:8]}.mp4",
756
- "video/mp4",
757
- )
758
- else:
759
- st.error("Video file missing – generation failed.")
 
1
+ ##############################################################################
2
  # Sozo Business Studio · 10-Jul-2025
3
+ # • REFACTORED: Removed lazy-loading to ensure stability on Streamlit.
4
+ # • Report generation is now a single, synchronous process.
5
+ # • Unified output under a single `st.session_state.bundle` for both modes.
6
+ # • This is the complete, unabridged code with no functions skipped.
 
 
 
7
  ##############################################################################
8
 
9
  import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
10
  from pathlib import Path
11
  from typing import Tuple, Dict, List
 
12
 
13
  import streamlit as st
14
  import pandas as pd
 
45
 
46
  sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
47
 
48
+ # --- Simplified Session State (No Lazy Loading) ---
49
+ st.session_state.setdefault("bundle", None)
 
 
 
50
 
51
  # ─── HELPERS ───────────────────────────────────────────────────────────────
52
  def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
 
94
 
95
  def generate_silence_mp3(duration: float, out: Path):
96
  subprocess.run(
97
+ [ "ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono", "-t", f"{duration:.3f}", "-q:a", "9", str(out), ],
98
+ check=True, capture_output=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  )
100
 
101
  def audio_duration(path: str) -> float:
102
  try:
103
  res = subprocess.run(
104
+ [ "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=nw=1:nk=1", path, ],
105
+ text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True,
 
 
 
 
 
 
 
 
 
 
 
 
106
  )
107
  return float(res.stdout.strip())
108
  except Exception:
109
  return 5.0
110
 
111
+ TAG_RE = re.compile( r'[<[]\s*generate_?chart\s*[:=]?\s*[\"\'“”]?(?P<d>[^>\"\'”\]]+?)[\"\'“”]?\s*[>\]]', re.I, )
112
+ extract_chart_tags = lambda t: list( dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")) )
 
 
 
 
 
113
 
114
  re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I)
115
  def clean_narration(txt: str) -> str:
 
129
 
130
  def fetch(model_name):
131
  res = GEM.models.generate_content(
132
+ model=model_name, contents=full_prompt,
 
133
  config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
134
  )
135
  for part in res.candidates[0].content.parts:
 
144
  return placeholder_img()
145
 
146
  # ─── PDF GENERATION ────────────────────────────────────────────────────────
147
+ class PDF(FPDF, HTMLMixin): pass
 
148
 
149
  def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
 
150
  def embed_chart_for_pdf(match):
151
  desc = match.group("d").strip()
152
  path = charts.get(desc)
 
195
  fig.savefig(out, bbox_inches="tight", facecolor="white")
196
  plt.close(fig)
197
 
198
+ # ─── SYNCHRONOUS REPORT GENERATION (NO LAZY LOADING) ────────────────────────
199
+ def generate_report_bundle(buf: bytes, name: str, ctx: str, key: str):
200
+ """
201
+ Generates the full report and all assets in a single, synchronous pass.
202
+ """
203
+ # 1. Load data and generate markdown text
204
  df, err = load_dataframe_safely(buf, name)
205
  if err:
206
  st.error(err)
207
+ return None
208
 
209
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
 
 
210
 
 
211
  ctx_dict = {
212
+ "shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis",
 
 
213
  "full_dataframe": df.to_dict("records"),
214
+ "data_types": {c: str(d) for c, d in df.dtypes.to_dict().items()},
215
+ "missing_values": {c: int(v) for c, v in df.isnull().sum().to_dict().items()},
216
+ "numeric_summary": {c: {s: float(v) for s, v in stats.items()} for c, stats in df.describe().to_dict().items()} if len(df.select_dtypes(include=["number"]).columns) > 0 else {},
 
 
 
 
 
 
 
217
  }
218
  cols = ", ".join(ctx_dict["columns"][:6])
219
 
 
259
 
260
  Generate insights that would be valuable to C-level executives and department heads.
261
  """
 
 
262
  md = llm.invoke(report_prompt).content
263
  chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
 
264
 
265
+ # 2. Generate all charts sequentially
266
+ chart_paths = {}
267
+ agent = create_pandas_dataframe_agent(llm=llm, df=df, verbose=False, allow_dangerous_code=True)
268
+ for desc in chart_descs:
269
+ with st.spinner(f"Generating chart: {desc}..."):
270
+ img_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
271
+ try:
272
+ chart_prompt = f"""
273
+ Create a professional {desc} chart using matplotlib with these requirements:
274
+ 1. Use a clean, business-appropriate style
275
+ 2. Include proper title, axis labels, and legends
276
+ 3. Apply appropriate color schemes (avoid rainbow colors)
277
+ 4. Ensure text is readable (font size 10+)
278
+ 5. Format numbers appropriately (e.g., currency, percentages)
279
+ 6. Save the figure with high quality
280
+ 7. Handle any missing or null values appropriately
281
+ """
282
+ agent.run(chart_prompt)
283
+ if not img_path.exists(): raise RuntimeError("LLM did not save figure")
284
+ except Exception:
285
+ try: quick_chart(desc, df, img_path)
286
+ except Exception: img_path = None
287
+ if img_path and img_path.exists():
288
+ chart_paths[desc] = str(img_path)
289
+
290
+ # 3. Assemble the final report bundle
291
+ pdf_bytes = build_pdf(md, chart_paths)
292
+
293
+ def _substitute_tags_for_preview(match):
294
+ desc = match.group("d").strip()
295
+ path = chart_paths.get(desc)
296
+ if path:
297
+ b64 = base64.b64encode(Path(path).read_bytes()).decode()
298
+ return f'<img src="data:image/png;base64,{b64}" style="max-width:100%;">'
299
+ return f"*Chart '{desc}' could not be generated.*"
300
 
301
+ preview_md = TAG_RE.sub(_substitute_tags_for_preview, md)
 
302
 
303
+ return {
304
+ "type": "report", "key": key, "preview_md": preview_md,
305
+ "pdf": pdf_bytes, "raw_md": md
306
+ }
307
 
308
+ # ─── ANIMATION HELPERS ────────────────────────────────────────
309
+ def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path, fps: int = FPS) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  frames = max(int(dur * fps), fps)
311
+ vid = cv2.VideoWriter(str(out), cv2.VideoWriter_fourcc(*"mp4v"), fps, (WIDTH, HEIGHT))
 
312
  blank = np.full_like(img_cv2, 255)
313
  for i in range(frames):
314
  a = i / frames
 
316
  vid.release()
317
  return str(out)
318
 
319
+ def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path, fps: int = FPS) -> str:
 
320
  """Render an animated chart whose clip length equals `dur`."""
321
  ctype, *rest = [s.strip().lower() for s in desc.split("|", 1)]
322
  ctype = ctype or "bar"
 
341
  if ctype == "pie":
342
  wedges, _ = ax.pie(plot_df, labels=plot_df.index, startangle=90)
343
  ax.set_title(title)
 
344
  def init(): [w.set_alpha(0) for w in wedges]; return wedges
345
  def update(i):
346
  a = i / (frames - 1)
 
350
  elif ctype == "bar":
351
  bars = ax.bar(plot_df.index, np.zeros_like(plot_df.values), color="#1f77b4")
352
  ax.set_ylim(0, plot_df.max() * 1.1); ax.set_title(title)
 
353
  def init(): return bars
354
  def update(i):
355
  a = i / (frames - 1)
356
+ for b, h in zip(bars, plot_df.values): b.set_height(h * a)
 
357
  return bars
358
 
359
  elif ctype == "hist":
360
  _, _, patches = ax.hist(plot_df, bins=20, color="#1f77b4", alpha=0)
361
  ax.set_title(title)
 
362
  def init(): [p.set_alpha(0) for p in patches]; return patches
363
  def update(i):
364
  a = i / (frames - 1)
 
366
  return patches
367
 
368
  elif ctype == "scatter":
369
+ pts = ax.scatter(plot_df.iloc[:, 0], plot_df.iloc[:, 1], s=10, alpha=0)
 
370
  ax.set_title(title); ax.grid(alpha=.3)
 
371
  def init(): pts.set_alpha(0); return [pts]
372
+ def update(i): pts.set_alpha(i / (frames - 1)); return [pts]
 
373
 
374
  else: # line
375
  line, = ax.plot([], [], lw=2)
376
+ x_full = (plot_df.iloc[:, 0] if plot_df.shape[1] > 1 else np.arange(len(plot_df)))
377
+ y_full = (plot_df.iloc[:, 1] if plot_df.shape[1] > 1 else plot_df.iloc[:, 0])
378
+ ax.set_xlim(x_full.min(), x_full.max()); ax.set_ylim(y_full.min(), y_full.max())
 
 
 
379
  ax.set_title(title); ax.grid(alpha=.3)
 
380
  def init(): line.set_data([], []); return [line]
381
  def update(i):
382
  k = max(2, int(len(x_full) * i / (frames - 1)))
383
  line.set_data(x_full[:k], y_full.iloc[:k])
384
  return [line]
385
 
386
+ anim = FuncAnimation(fig, update, init_func=init, frames=frames, blit=True, interval=1000 / fps)
387
+ anim.save(str(out), writer=FFMpegWriter(fps=fps, metadata={'artist':'Sozo'}), dpi=144)
 
 
 
 
388
  plt.close(fig)
389
  return str(out)
390
 
391
  def safe_chart(desc, df, dur, out):
392
+ try: return animate_chart(desc, df, dur, out)
 
393
  except Exception:
394
  with plt.ioff():
395
  df.plot(ax=plt.gca())
 
399
  return animate_image_fade(img, dur, out)
400
 
401
  def concat_media(paths: List[str], out: Path, kind="video"):
402
+ if not paths: return
 
403
  lst = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
404
  with lst.open("w") as f:
405
  for p in paths:
406
+ if Path(p).exists(): f.write(f"file '{Path(p).resolve()}'\n")
 
407
  subprocess.run(
408
+ [ "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(lst), "-c:v" if kind == "video" else "-c:a", "copy", str(out), ],
409
+ check=True, capture_output=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  )
411
  lst.unlink(missing_ok=True)
412
 
413
+ # ─── VIDEO GENERATION ────────────────────────────
414
  def build_story_prompt(ctx_dict):
415
  cols = ", ".join(ctx_dict["columns"][:6])
416
  return f"""
 
468
  """
469
 
470
  def generate_video(buf: bytes, name: str, ctx: str, key: str):
471
+ try: subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
472
+ except Exception: st.error("🔴 FFmpeg not available — cannot render video."); return None
 
 
 
473
 
474
  df, err = load_dataframe_safely(buf, name)
475
+ if err: st.error(err); return None
 
 
 
 
 
 
476
 
477
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.2)
478
  ctx_dict = {
479
+ "shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis",
 
 
480
  "full_dataframe": df.to_dict("records"),
481
  "data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
482
+ "numeric_summary": {col: {stat: float(val) for stat, val in stats.items()} for col, stats in df.describe().to_dict().items()} if len(df.select_dtypes(include=["number"]).columns) > 0 else {},
 
 
 
 
 
483
  }
 
484
  script = llm.invoke(build_story_prompt(ctx_dict)).content
485
  scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
486
 
487
  video_parts, audio_parts, temps = [], [], []
488
  for idx, sc in enumerate(scenes[:VIDEO_SCENES]):
489
+ st.progress((idx + 1) / VIDEO_SCENES, text=f"Rendering Scene {idx + 1}/{VIDEO_SCENES}")
490
+ descs, narrative = extract_chart_tags(sc), clean_narration(sc)
 
 
 
 
 
 
 
491
  audio_bytes, _ = deepgram_tts(narrative)
492
  mp3 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
493
+ if audio_bytes: mp3.write_bytes(audio_bytes); dur = audio_duration(str(mp3))
494
+ else: dur = 5.0; generate_silence_mp3(dur, mp3)
495
+ audio_parts.append(str(mp3)); temps.append(mp3)
 
 
 
 
 
496
 
 
497
  mp4 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
498
+ if descs: safe_chart(descs[0], df, dur, mp4)
 
499
  else:
500
  img = generate_image_from_prompt(narrative)
501
+ img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
 
 
502
  animate_image_fade(img_cv, dur, mp4)
503
+ video_parts.append(str(mp4)); temps.append(mp4)
 
504
 
505
+ silent_vid, audio_mix = Path(tempfile.gettempdir())/f"{uuid.uuid4()}.mp4", Path(tempfile.gettempdir())/f"{uuid.uuid4()}.mp3"
 
506
  concat_media(video_parts, silent_vid, "video")
 
507
  concat_media(audio_parts, audio_mix, "audio")
 
508
  final_vid = Path(tempfile.gettempdir()) / f"{key}.mp4"
509
  subprocess.run(
510
+ [ "ffmpeg", "-y", "-i", str(silent_vid), "-i", str(audio_mix), "-c:v", "copy", "-c:a", "aac", "-shortest", str(final_vid), ],
511
+ check=True, capture_output=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
  )
513
+ for p in temps + [silent_vid, audio_mix]: p.unlink(missing_ok=True)
 
 
 
514
  return str(final_vid)
515
 
516
+
517
+ # ─── UI & MAIN WORKFLOW ──────────────────────────────────────────────────
518
+ mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
 
519
 
520
  upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
521
  if upl:
 
525
 
526
  ctx = st.text_area("Business context or specific instructions (optional)")
527
 
528
+ # ─── Generate button (with synchronous flow) ──────────────────────────
529
  if st.button("🚀 Generate", type="primary", disabled=not upl):
530
  key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
531
+ st.session_state.bundle = None # Clear previous results
532
 
533
  if mode == "Report (PDF)":
534
+ with st.spinner("Generating full report and charts... Please wait."):
535
+ bundle = generate_report_bundle(upl.getvalue(), upl.name, ctx, key)
536
+ st.session_state.bundle = bundle
537
+ else: # Video branch (already synchronous)
538
+ # The video function already shows progress, so a top-level spinner is not needed.
539
+ bundle_path = generate_video(upl.getvalue(), upl.name, ctx, key)
540
+ if bundle_path:
541
+ st.session_state.bundle = {"type": "video", "video_path": bundle_path, "key": key}
542
+ st.rerun() # Rerun once to display the final state
543
+
544
+ # ─── UNIFIED OUTPUT AREA ─────────────────────────────────────────────────
545
+ if (bundle := st.session_state.get("bundle")):
546
+ if bundle.get("type") == "report":
547
+ st.subheader("📄 Generated Report")
548
+ with st.expander("View Report", expanded=True):
549
+ st.markdown(bundle["preview_md"], unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
 
 
551
  c1, c2 = st.columns(2)
552
  with c1:
553
  st.download_button(
554
+ "Download PDF", bundle["pdf"], f"business_report_{bundle['key'][:8]}.pdf",
555
+ "application/pdf", use_container_width=True,
 
 
 
556
  )
557
  with c2:
558
+ if DG_KEY and st.button("🔊 Narrate Summary", key=f"aud_{bundle['key']}"):
559
+ txt = re.sub(r"<[^>]+>", "", bundle["raw_md"])
560
  audio, mime = deepgram_tts(txt)
561
+ if audio: st.audio(audio, format=mime)
562
+ else: st.error("Narration failed.")
563
+
564
+ elif bundle.get("type") == "video":
565
+ st.subheader("🎬 Generated Video Narrative")
566
+ vp = bundle["video_path"]
567
+ if Path(vp).exists():
568
+ with open(vp, "rb") as f: st.video(f.read())
569
+ with open(vp, "rb") as f:
570
+ st.download_button("Download Video", f, f"sozo_narrative_{bundle['key'][:8]}.mp4", "video/mp4")
571
+ else:
572
+ st.error("Video file missing – generation may have failed.")