rairo commited on
Commit
7ff3101
·
verified ·
1 Parent(s): bfcb421

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -134
app.py CHANGED
@@ -1,10 +1,11 @@
1
  ##############################################################################
2
- # Sozo Business Studio · 09-Jul-2025 #
3
- # • Clean narrator text (no scene labels / chart talk) #
4
- # • Enforce chart-tag-driven visuals (bar, pie, line, scatter, hist) #
5
- # • Fix image generation (Gemini Flash preview) & placeholder fallback #
6
- # • Animation starts blank; artists returned for blit=True #
7
- # • Silent-audio fallback keeps mux lengths equal #
 
8
  ##############################################################################
9
 
10
  import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
@@ -26,16 +27,14 @@ import cv2
26
  from langchain_experimental.agents import create_pandas_dataframe_agent
27
  from langchain_google_genai import ChatGoogleGenerativeAI
28
  from google import genai
29
- from google.genai import types # GenerateContentConfig for image calls
30
 
31
- # ────────────────────────────────────────────────────────────────────────────
32
- # CONFIG
33
- # ────────────────────────────────────────────────────────────────────────────
34
  st.set_page_config(page_title="Sozo Business Studio", layout="wide")
35
  st.title("📊 Sozo Business Studio")
36
  st.caption("AI transforms business data into compelling narratives.")
37
 
38
- FPS, WIDTH, HEIGHT = 24, 1280, 720
39
  MAX_CHARTS, VIDEO_SCENES = 5, 5
40
 
41
  API_KEY = os.getenv("GEMINI_API_KEY")
@@ -43,14 +42,13 @@ if not API_KEY:
43
  st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
44
  GEM = genai.Client(api_key=API_KEY)
45
 
46
- DG_KEY = os.getenv("DEEPGRAM_API_KEY") # optional narration
47
  st.session_state.setdefault("bundle", None)
48
  sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
49
 
50
- # ────────────────────────────────────────────────────────────────────────────
51
- # HELPERS
52
- # ────────────────────────────────────────────────────────────────────────────
53
  def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
 
54
  try:
55
  ext = Path(name).suffix.lower()
56
  df = (pd.read_excel if ext in (".xlsx", ".xls") else pd.read_csv)(io.BytesIO(buf))
@@ -64,6 +62,7 @@ def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
64
 
65
 
66
  def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
 
67
  safe = df.copy()
68
  for c in safe.columns:
69
  if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
@@ -73,9 +72,10 @@ def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
73
 
74
  @st.cache_data(show_spinner=False)
75
  def deepgram_tts(txt: str) -> Tuple[bytes, str]:
 
76
  if not DG_KEY or not txt:
77
  return None, None
78
- txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000] # Deepgram text hygiene
79
  try:
80
  r = requests.post(
81
  "https://api.deepgram.com/v1/speak",
@@ -118,31 +118,11 @@ re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I)
118
  def clean_narration(txt: str) -> str:
119
  txt = re_scene.sub("", txt)
120
  txt = TAG_RE.sub("", txt)
121
- txt = re.sub(r"\s*\([^)]*\)", "", txt) # remove parentheticals
122
  txt = re.sub(r"\s{2,}", " ", txt).strip()
123
  return txt
124
 
125
 
126
- # ─── PDF GENERATION (unchanged logic) ───────────────────────────────────────
127
- class PDF(FPDF, HTMLMixin):
128
- pass
129
-
130
-
131
- def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
132
- html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(
133
- TAG_RE.sub(lambda m: f'<img src="{charts.get(m.group("d").strip(), "")}">', md)
134
- )
135
- pdf = PDF()
136
- pdf.set_auto_page_break(True, margin=15)
137
- pdf.add_page()
138
- pdf.set_font("Arial", "B", 18)
139
- pdf.cell(0, 12, "AI-Generated Business Report", ln=True)
140
- pdf.ln(3)
141
- pdf.set_font("Arial", "", 11)
142
- pdf.write_html(html)
143
- return bytes(pdf.output(dest="S"))
144
-
145
-
146
  # ─── IMAGE GENERATION & PLACEHOLDER ────────────────────────────────────────
147
  def placeholder_img() -> Image.Image:
148
  return Image.new("RGB", (WIDTH, HEIGHT), (230, 230, 230))
@@ -151,7 +131,7 @@ def placeholder_img() -> Image.Image:
151
  def generate_image_from_prompt(prompt: str) -> Image.Image:
152
  model_main = "gemini-2.0-flash-exp-image-generation"
153
  model_fallback = "gemini-2.0-flash-preview-image-generation"
154
- full_prompt = ("A clean business-presentation illustration: " + prompt)
155
 
156
  def fetch(model_name):
157
  res = GEM.models.generate_content(
@@ -171,6 +151,86 @@ def generate_image_from_prompt(prompt: str) -> Image.Image:
171
  return placeholder_img()
172
 
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  # ─── ANIMATION HELPERS ─────────────────────────────────────────────────────
175
  def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path, fps: int = FPS) -> str:
176
  frames = max(int(dur * fps), fps)
@@ -188,7 +248,6 @@ def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path, fps: int =
188
  ctype = ctype or "bar"
189
  title = rest[0] if rest else desc
190
 
191
- # aggregate or prepare data
192
  if ctype == "pie":
193
  cat = df.select_dtypes(exclude="number").columns[0]
194
  num = df.select_dtypes(include="number").columns[0]
@@ -196,7 +255,7 @@ def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path, fps: int =
196
  elif ctype in ("bar", "hist"):
197
  num = df.select_dtypes(include="number").columns[0]
198
  pdf = df[num]
199
- else: # line/scatter
200
  cols = df.select_dtypes(include="number").columns[:2]
201
  pdf = df[list(cols)].sort_index()
202
 
@@ -207,54 +266,29 @@ def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path, fps: int =
207
  wedges, _ = ax.pie(pdf, labels=pdf.index, startangle=90)
208
  ax.set_title(title)
209
 
210
- def init():
211
- for w in wedges: w.set_alpha(0)
212
- return wedges
213
-
214
- def update(i):
215
- a = i / frames
216
- for w in wedges: w.set_alpha(a)
217
- return wedges
218
 
219
  elif ctype == "bar":
220
  bars = ax.bar(pdf.index, np.zeros_like(pdf.values), color="#1f77b4")
221
- ax.set_ylim(0, pdf.max() * 1.1)
222
- ax.set_title(title)
223
-
224
- def init():
225
- return bars
226
 
227
- def update(i):
228
- f = i / frames
229
- for b, h in zip(bars, pdf.values):
230
- b.set_height(h * f)
231
- return bars
232
 
233
  elif ctype == "hist":
234
  _, _, patches = ax.hist(pdf, bins=20, color="#1f77b4", alpha=0)
235
  ax.set_title(title)
236
 
237
- def init():
238
- for p in patches: p.set_alpha(0)
239
- return patches
240
-
241
- def update(i):
242
- a = i / frames
243
- for p in patches: p.set_alpha(a)
244
- return patches
245
 
246
  elif ctype == "scatter":
247
  pts = ax.scatter(pdf.iloc[:, 0], pdf.iloc[:, 1], s=10, alpha=0)
248
- ax.set_title(title)
249
- ax.grid(alpha=0.3)
250
 
251
- def init():
252
- pts.set_alpha(0)
253
- return [pts]
254
-
255
- def update(i):
256
- pts.set_alpha(i / frames)
257
- return [pts]
258
 
259
  else: # line
260
  line, = ax.plot([], [], lw=2)
@@ -262,21 +296,13 @@ def animate_chart(desc: str, df: pd.DataFrame, dur: float, out: Path, fps: int =
262
  y_full = pdf.iloc[:, 1] if pdf.shape[1] > 1 else pdf.iloc[:, 0]
263
  ax.set_xlim(x_full.min(), x_full.max())
264
  ax.set_ylim(y_full.min(), y_full.max())
265
- ax.set_title(title)
266
- ax.grid(alpha=0.3)
267
 
268
- def init():
269
- line.set_data([], [])
270
- return [line]
271
 
272
- def update(i):
273
- k = max(2, int(len(x_full) * i / frames))
274
- line.set_data(x_full[:k], y_full.iloc[:k])
275
- return [line]
276
-
277
- anim = FuncAnimation(
278
- fig, update, init_func=init, frames=frames,
279
- blit=True, interval=1000 / fps)
280
  anim.save(str(out), writer=FFMpegWriter(fps=fps, metadata={'artist': 'Sozo'}), dpi=144)
281
  plt.close(fig)
282
  return str(out)
@@ -288,10 +314,9 @@ def safe_chart(desc, df, dur, out):
288
  except Exception:
289
  with plt.ioff():
290
  df.plot(ax=plt.gca())
291
- tmp_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
292
- plt.savefig(tmp_png, bbox_inches="tight")
293
- plt.close()
294
- img = cv2.resize(cv2.imread(str(tmp_png)), (WIDTH, HEIGHT))
295
  return animate_image_fade(img, dur, out)
296
 
297
 
@@ -310,9 +335,7 @@ def concat_media(paths: List[str], out: Path, kind="video"):
310
  lst.unlink(missing_ok=True)
311
 
312
 
313
- # ────────────────────────────────────────────────────────────────────────────
314
- # PROMPT HELPERS
315
- # ────────────────��───────────────────────────────────────────────────────────
316
  def build_story_prompt(ctx_dict):
317
  cols = ", ".join(ctx_dict["columns"][:6])
318
  return (
@@ -326,9 +349,6 @@ def build_story_prompt(ctx_dict):
326
  )
327
 
328
 
329
- # ────────────────────────────────────────────────────────────────────────────
330
- # VIDEO GENERATION
331
- # ────────────────────────────────────────────────────────────────────────────
332
  def generate_video(buf: bytes, name: str, ctx: str, key: str):
333
  try:
334
  subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
@@ -358,28 +378,28 @@ def generate_video(buf: bytes, name: str, ctx: str, key: str):
358
  descs = extract_chart_tags(sc)
359
  narrative = clean_narration(sc)
360
 
361
- # ----- audio ---------------------------------------------------------
362
  audio_bytes, _ = deepgram_tts(narrative)
363
- mp3_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
364
  if audio_bytes:
365
- mp3_path.write_bytes(audio_bytes)
366
- dur = audio_duration(str(mp3_path))
367
  else:
368
  dur = 5.0
369
- generate_silence_mp3(dur, mp3_path)
370
- audio_parts.append(str(mp3_path)); temps.append(mp3_path)
371
 
372
- # ----- visual --------------------------------------------------------
373
- mp4_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
374
  if descs:
375
- safe_chart(descs[0], df, dur, mp4_path)
376
  else:
377
  img = generate_image_from_prompt(narrative)
378
  img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
379
- animate_image_fade(img_cv, dur, mp4_path)
380
- video_parts.append(str(mp4_path)); temps.append(mp4_path)
381
 
382
- # ----- concatenate -------------------------------------------------------
383
  silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
384
  concat_media(video_parts, silent_vid, "video")
385
  audio_mix = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
@@ -397,37 +417,56 @@ def generate_video(buf: bytes, name: str, ctx: str, key: str):
397
  return str(final_vid)
398
 
399
 
400
- # ────────────────────────────────────────────────────────────────────────────
401
- # UI
402
- # ────────────────────────────────────────────────────────────────────────────
403
  upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
404
  if upl:
405
- df_preview, _ = load_dataframe_safely(upl.getvalue(), upl.name)
406
  with st.expander("📊 Data Preview"):
407
- st.dataframe(arrow_df(df_preview.head()))
408
 
409
  ctx = st.text_area("Business context or specific instructions (optional)")
410
 
411
- if st.button("🚀 Generate Video", type="primary", disabled=not upl):
412
- key = sha1_bytes(b"".join([upl.getvalue(), ctx.encode()]))
413
- st.session_state.bundle = None
414
  with st.spinner("Generating…"):
415
- path = generate_video(upl.getvalue(), upl.name, ctx, key)
416
- if path:
417
- st.session_state.bundle = {"video_path": path, "key": key}
 
 
 
 
418
  st.rerun()
419
 
420
- # ────────────────────────────────────────────────────────────────────────────
421
- # OUTPUT
422
- # ────────────────────────────────────────────────────────────────────────────
423
  if bundle := st.session_state.get("bundle"):
424
- vp = bundle["video_path"]
425
- if Path(vp).exists():
426
- with open(vp, "rb") as f:
427
- st.video(f.read())
428
- with open(vp, "rb") as f:
429
- st.download_button("Download Video", f,
430
- f"sozo_narrative_{bundle['key'][:8]}.mp4",
431
- "video/mp4")
432
- else:
433
- st.error("Video file missing – generation failed.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ##############################################################################
2
+ # Sozo Business Studio · 10-Jul-2025 (full drop-in) #
3
+ # • Restores PDF branch alongside fixed Video branch #
4
+ # • Shared chart-tag grammar across both paths #
5
+ # • Narrator text cleans scene labels + chart talk #
6
+ # • Matplotlib animation starts from blank; artists returned (blit=True) #
7
+ # • Gemini Flash-preview image gen with placeholder fallback #
8
+ # • Silent-audio fallback keeps mux lengths equal #
9
  ##############################################################################
10
 
11
  import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
 
27
  from langchain_experimental.agents import create_pandas_dataframe_agent
28
  from langchain_google_genai import ChatGoogleGenerativeAI
29
  from google import genai
30
+ from google.genai import types # for GenerateContentConfig
31
 
32
+ # ─── CONFIG ────────────────────────────────────────────────────────────────
 
 
33
  st.set_page_config(page_title="Sozo Business Studio", layout="wide")
34
  st.title("📊 Sozo Business Studio")
35
  st.caption("AI transforms business data into compelling narratives.")
36
 
37
+ FPS, WIDTH, HEIGHT = 24, 1280, 720
38
  MAX_CHARTS, VIDEO_SCENES = 5, 5
39
 
40
  API_KEY = os.getenv("GEMINI_API_KEY")
 
42
  st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
43
  GEM = genai.Client(api_key=API_KEY)
44
 
45
+ DG_KEY = os.getenv("DEEPGRAM_API_KEY") # optional for narration
46
  st.session_state.setdefault("bundle", None)
47
  sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
48
 
49
+ # ─── HELPERS ───────────────────────────────────────────────────────────────
 
 
50
  def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
51
+ """Load CSV/Excel, return (df, err)."""
52
  try:
53
  ext = Path(name).suffix.lower()
54
  df = (pd.read_excel if ext in (".xlsx", ".xls") else pd.read_csv)(io.BytesIO(buf))
 
62
 
63
 
64
  def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
65
+ """Convert for Streamlit Arrow renderer."""
66
  safe = df.copy()
67
  for c in safe.columns:
68
  if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
 
72
 
73
  @st.cache_data(show_spinner=False)
74
  def deepgram_tts(txt: str) -> Tuple[bytes, str]:
75
+ """Optional audio narration."""
76
  if not DG_KEY or not txt:
77
  return None, None
78
+ txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000]
79
  try:
80
  r = requests.post(
81
  "https://api.deepgram.com/v1/speak",
 
118
  def clean_narration(txt: str) -> str:
119
  txt = re_scene.sub("", txt)
120
  txt = TAG_RE.sub("", txt)
121
+ txt = re.sub(r"\s*\([^)]*\)", "", txt)
122
  txt = re.sub(r"\s{2,}", " ", txt).strip()
123
  return txt
124
 
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  # ─── IMAGE GENERATION & PLACEHOLDER ────────────────────────────────────────
127
  def placeholder_img() -> Image.Image:
128
  return Image.new("RGB", (WIDTH, HEIGHT), (230, 230, 230))
 
131
  def generate_image_from_prompt(prompt: str) -> Image.Image:
132
  model_main = "gemini-2.0-flash-exp-image-generation"
133
  model_fallback = "gemini-2.0-flash-preview-image-generation"
134
+ full_prompt = "A clean business-presentation illustration: " + prompt
135
 
136
  def fetch(model_name):
137
  res = GEM.models.generate_content(
 
151
  return placeholder_img()
152
 
153
 
154
+ # ─── PDF GENERATION ────────────────────────────────────────────────────────
155
+ class PDF(FPDF, HTMLMixin):
156
+ pass
157
+
158
+
159
+ def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
160
+ html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(
161
+ TAG_RE.sub(lambda m: f'<img src="{charts.get(m.group("d").strip(), "")}">', md)
162
+ )
163
+ pdf = PDF()
164
+ pdf.set_auto_page_break(True, margin=15)
165
+ pdf.add_page()
166
+ pdf.set_font("Arial", "B", 18)
167
+ pdf.cell(0, 12, "AI-Generated Business Report", ln=True)
168
+ pdf.ln(3)
169
+ pdf.set_font("Arial", "", 11)
170
+ pdf.write_html(html)
171
+ return bytes(pdf.output(dest="S"))
172
+
173
+
174
+ def generate_report(buf: bytes, name: str, ctx: str, key: str):
175
+ df, err = load_dataframe_safely(buf, name)
176
+ if err:
177
+ st.error(err); return None
178
+
179
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
180
+ google_api_key=API_KEY, temperature=0.1)
181
+
182
+ ctx_dict = {
183
+ "shape": df.shape,
184
+ "columns": list(df.columns),
185
+ "user_ctx": ctx or "General business analysis",
186
+ }
187
+ cols = ", ".join(ctx_dict["columns"][:6])
188
+ report_prompt = (
189
+ "You are a senior business analyst. Write an executive-level Markdown report "
190
+ "with insights & recommendations.\n"
191
+ 'When a visual is helpful, insert a tag like <generate_chart: "pie | sales by region"> '
192
+ "(chart_type first, then a description). Valid chart types: bar, pie, line, scatter, hist.\n"
193
+ f"Base every chart on columns ({cols}) from the dataset.\n"
194
+ f"Data context:\n{json.dumps(ctx_dict, indent=2)}"
195
+ )
196
+ md = llm.invoke(report_prompt).content
197
+
198
+ chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
199
+ charts: Dict[str, str] = {}
200
+ if chart_descs:
201
+ agent = create_pandas_dataframe_agent(
202
+ llm=llm, df=df, verbose=False, allow_dangerous_code=True
203
+ )
204
+ for d in chart_descs:
205
+ with st.spinner(f"Generating chart: {d}"):
206
+ with plt.ioff():
207
+ try:
208
+ agent.run(f"Create a {d} with Matplotlib and save.")
209
+ fig = plt.gcf()
210
+ if fig.axes:
211
+ p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
212
+ fig.savefig(p, dpi=300, bbox_inches="tight", facecolor="white")
213
+ charts[d] = str(p)
214
+ plt.close("all")
215
+ except Exception:
216
+ plt.close("all")
217
+
218
+ preview = TAG_RE.sub(
219
+ lambda m: f'<img src="data:image/png;base64,{base64.b64encode(Path(charts[m.group("d").strip()]).read_bytes()).decode()}">'
220
+ if m.group("d").strip() in charts else m.group(0),
221
+ md
222
+ )
223
+ pdf_bytes = build_pdf(md, charts)
224
+
225
+ return {
226
+ "type": "report",
227
+ "preview": preview,
228
+ "pdf": pdf_bytes,
229
+ "report_md": md,
230
+ "key": key,
231
+ }
232
+
233
+
234
  # ─── ANIMATION HELPERS ─────────────────────────────────────────────────────
235
  def animate_image_fade(img_cv2: np.ndarray, dur: float, out: Path, fps: int = FPS) -> str:
236
  frames = max(int(dur * fps), fps)
 
248
  ctype = ctype or "bar"
249
  title = rest[0] if rest else desc
250
 
 
251
  if ctype == "pie":
252
  cat = df.select_dtypes(exclude="number").columns[0]
253
  num = df.select_dtypes(include="number").columns[0]
 
255
  elif ctype in ("bar", "hist"):
256
  num = df.select_dtypes(include="number").columns[0]
257
  pdf = df[num]
258
+ else:
259
  cols = df.select_dtypes(include="number").columns[:2]
260
  pdf = df[list(cols)].sort_index()
261
 
 
266
  wedges, _ = ax.pie(pdf, labels=pdf.index, startangle=90)
267
  ax.set_title(title)
268
 
269
+ def init(): [w.set_alpha(0) for w in wedges]; return wedges
270
+ def update(i): a=i/frames; [w.set_alpha(a) for w in wedges]; return wedges
 
 
 
 
 
 
271
 
272
  elif ctype == "bar":
273
  bars = ax.bar(pdf.index, np.zeros_like(pdf.values), color="#1f77b4")
274
+ ax.set_ylim(0, pdf.max() * 1.1); ax.set_title(title)
 
 
 
 
275
 
276
+ def init(): return bars
277
+ def update(i): f=i/frames; [b.set_height(h*f) for b,h in zip(bars,pdf.values)]; return bars
 
 
 
278
 
279
  elif ctype == "hist":
280
  _, _, patches = ax.hist(pdf, bins=20, color="#1f77b4", alpha=0)
281
  ax.set_title(title)
282
 
283
+ def init(): [p.set_alpha(0) for p in patches]; return patches
284
+ def update(i): a=i/frames; [p.set_alpha(a) for p in patches]; return patches
 
 
 
 
 
 
285
 
286
  elif ctype == "scatter":
287
  pts = ax.scatter(pdf.iloc[:, 0], pdf.iloc[:, 1], s=10, alpha=0)
288
+ ax.set_title(title); ax.grid(alpha=.3)
 
289
 
290
+ def init(): pts.set_alpha(0); return [pts]
291
+ def update(i): pts.set_alpha(i/frames); return [pts]
 
 
 
 
 
292
 
293
  else: # line
294
  line, = ax.plot([], [], lw=2)
 
296
  y_full = pdf.iloc[:, 1] if pdf.shape[1] > 1 else pdf.iloc[:, 0]
297
  ax.set_xlim(x_full.min(), x_full.max())
298
  ax.set_ylim(y_full.min(), y_full.max())
299
+ ax.set_title(title); ax.grid(alpha=.3)
 
300
 
301
+ def init(): line.set_data([], []); return [line]
302
+ def update(i): k=max(2,int(len(x_full)*i/frames)); line.set_data(x_full[:k],y_full.iloc[:k]); return [line]
 
303
 
304
+ anim = FuncAnimation(fig, update, init_func=init, frames=frames,
305
+ blit=True, interval=1000 / fps)
 
 
 
 
 
 
306
  anim.save(str(out), writer=FFMpegWriter(fps=fps, metadata={'artist': 'Sozo'}), dpi=144)
307
  plt.close(fig)
308
  return str(out)
 
314
  except Exception:
315
  with plt.ioff():
316
  df.plot(ax=plt.gca())
317
+ p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
318
+ plt.savefig(p, bbox_inches="tight"); plt.close()
319
+ img = cv2.resize(cv2.imread(str(p)), (WIDTH, HEIGHT))
 
320
  return animate_image_fade(img, dur, out)
321
 
322
 
 
335
  lst.unlink(missing_ok=True)
336
 
337
 
338
+ # ─── VIDEO GENERATION ──────────────────────────────────────────────────────
 
 
339
  def build_story_prompt(ctx_dict):
340
  cols = ", ".join(ctx_dict["columns"][:6])
341
  return (
 
349
  )
350
 
351
 
 
 
 
352
  def generate_video(buf: bytes, name: str, ctx: str, key: str):
353
  try:
354
  subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
 
378
  descs = extract_chart_tags(sc)
379
  narrative = clean_narration(sc)
380
 
381
+ # --- audio ---
382
  audio_bytes, _ = deepgram_tts(narrative)
383
+ mp3 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
384
  if audio_bytes:
385
+ mp3.write_bytes(audio_bytes)
386
+ dur = audio_duration(str(mp3))
387
  else:
388
  dur = 5.0
389
+ generate_silence_mp3(dur, mp3)
390
+ audio_parts.append(str(mp3)); temps.append(mp3)
391
 
392
+ # --- visual ---
393
+ mp4 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
394
  if descs:
395
+ safe_chart(descs[0], df, dur, mp4)
396
  else:
397
  img = generate_image_from_prompt(narrative)
398
  img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
399
+ animate_image_fade(img_cv, dur, mp4)
400
+ video_parts.append(str(mp4)); temps.append(mp4)
401
 
402
+ # concat
403
  silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
404
  concat_media(video_parts, silent_vid, "video")
405
  audio_mix = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
 
417
  return str(final_vid)
418
 
419
 
420
+ # ─── UI ─────────────────────────────────────────────────────────────────────
421
+ mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
422
+
423
  upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
424
  if upl:
425
+ df_prev, _ = load_dataframe_safely(upl.getvalue(), upl.name)
426
  with st.expander("📊 Data Preview"):
427
+ st.dataframe(arrow_df(df_prev.head()))
428
 
429
  ctx = st.text_area("Business context or specific instructions (optional)")
430
 
431
+ if st.button("🚀 Generate", type="primary", disabled=not upl):
432
+ key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
433
+
434
  with st.spinner("Generating…"):
435
+ if mode == "Report (PDF)":
436
+ st.session_state.bundle = generate_report(upl.getvalue(), upl.name, ctx, key)
437
+ else:
438
+ st.session_state.bundle = None
439
+ path = generate_video(upl.getvalue(), upl.name, ctx, key)
440
+ if path:
441
+ st.session_state.bundle = {"type": "video", "video_path": path, "key": key}
442
  st.rerun()
443
 
444
+ # ─── OUTPUT ────────────────────────────────────────────────────────────────
 
 
445
  if bundle := st.session_state.get("bundle"):
446
+ if bundle["type"] == "report":
447
+ st.subheader("📄 Generated Report")
448
+ with st.expander("View Report", expanded=True):
449
+ st.markdown(bundle["preview"], unsafe_allow_html=True)
450
+
451
+ c1, c2 = st.columns(2)
452
+ with c1:
453
+ st.download_button("Download PDF", bundle["pdf"],
454
+ "business_report.pdf", "application/pdf",
455
+ use_container_width=True)
456
+ with c2:
457
+ if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
458
+ txt = re.sub(r"<[^>]+>", "", bundle["report_md"])
459
+ audio, mime = deepgram_tts(txt)
460
+ st.audio(audio, format=mime) if audio else st.error("Narration failed.")
461
+
462
+ else: # video
463
+ st.subheader("🎬 Generated Video Narrative")
464
+ vp = bundle["video_path"]
465
+ if Path(vp).exists():
466
+ with open(vp, "rb") as f:
467
+ st.video(f.read())
468
+ with open(vp, "rb") as f:
469
+ st.download_button("Download Video", f,
470
+ f"sozo_narrative_{bundle['key'][:8]}.mp4", "video/mp4")
471
+ else:
472
+ st.error("Video file missing – generation failed.")