rairo commited on
Commit
a9e09a8
·
verified ·
1 Parent(s): 0d73b2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +340 -180
app.py CHANGED
@@ -1,23 +1,33 @@
1
  ###############################################################################
2
  # Sozo Business Studio · AI transforms business data into compelling narratives
 
3
  ###############################################################################
4
  import os, re, json, hashlib, uuid, base64, io, tempfile, wave, requests, subprocess
5
  from pathlib import Path
6
 
 
7
  import streamlit as st
8
  import pandas as pd
9
  import numpy as np
 
10
  import matplotlib
11
  matplotlib.use("Agg")
12
  import matplotlib.pyplot as plt
 
 
13
  from fpdf import FPDF, HTMLMixin
14
  from markdown_it import MarkdownIt
15
  from PIL import Image
 
 
 
 
 
 
16
 
17
  from langchain_experimental.agents import create_pandas_dataframe_agent
18
  from langchain_google_genai import ChatGoogleGenerativeAI
19
  from google import genai
20
- import cv2 # Added for video processing
21
 
22
  # ─────────────────────────────────────────────────────────────────────────────
23
  # CONFIG & CONSTANTS
@@ -26,21 +36,20 @@ st.set_page_config(page_title="Sozo Business Studio", layout="wide")
26
  st.title("📊 Sozo Business Studio")
27
  st.caption("AI transforms business data into compelling narratives.")
28
 
29
- # --- Feature Caps ---
30
- MAX_CHARTS = 5
31
- VIDEO_SCENES = 5 # Number of scenes for the video
 
32
 
33
- # --- API Keys & Clients (Correct Initialization) ---
34
  API_KEY = os.getenv("GEMINI_API_KEY")
35
  if not API_KEY:
36
  st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
37
- # Use the Client pattern from the original script
38
- GEM = genai.Client(api_key=API_KEY)
39
 
40
- DG_KEY = os.getenv("DEEPGRAM_API_KEY") # Optional but needed for narration
41
 
42
- # --- Session State ---
43
- # Simplified state to hold the most recent generated output
44
  st.session_state.setdefault("bundle", None)
45
 
46
  # ─────────────────────────────────────────────────────────────────────────────
@@ -57,114 +66,239 @@ def validate_file_upload(f):
57
  errs.append("Unsupported file type")
58
  return errs
59
 
60
- def load_dataframe_safely(buf:bytes, name:str):
61
  try:
62
  ext = Path(name).suffix.lower()
63
- df = pd.read_excel(io.BytesIO(buf)) if ext in (".xlsx", ".xls") else pd.read_csv(io.BytesIO(buf))
64
- if df.empty or len(df.columns)==0: raise ValueError("File contains no data")
65
- df.columns=df.columns.astype(str).str.strip()
66
- df=df.dropna(how="all")
67
  if df.empty: raise ValueError("Rows all empty")
68
- return df,None
69
- except Exception as e: return None,str(e)
 
70
 
71
- def fix_bullet(t:str)->str:
72
- return re.sub(r"[\x80-\x9f]", "", t) if isinstance(t, str) else ""
73
 
74
- # ——— Arrow helpers ————————————————————————————————————————————————
75
- def arrow_df(df:pd.DataFrame)->pd.DataFrame:
76
- safe=df.copy()
77
  for c in safe.columns:
78
- if safe[c].dtype.name in ("Int64","Float64","Boolean"):
79
- safe[c]=safe[c].astype(safe[c].dtype.name.lower())
80
  return safe
81
 
82
- # ——— Text-to-Speech (Used by Both Features) ————————————————————————
83
  @st.cache_data(show_spinner=False)
84
- def deepgram_tts(text:str):
85
  if not DG_KEY or not text: return None, None
86
  text = re.sub(r"[^\w\s.,!?;:-]", "", text)[:1000]
87
  try:
88
- r = requests.post("https://api.deepgram.com/v1/speak",
89
- params={"model":"aura-asteria-en"},
90
- headers={"Authorization":f"Token {DG_KEY}", "Content-Type":"application/json"},
91
- json={"text":text}, timeout=30)
 
 
 
92
  r.raise_for_status()
93
  return r.content, r.headers.get("Content-Type", "audio/mpeg")
94
  except Exception:
95
  return None, None
96
 
97
- def pcm_to_wav(pcm,sr=24000,ch=1,w=2):
98
- buf=io.BytesIO()
99
- with wave.open(buf,'wb') as wf:
100
- wf.setnchannels(ch); wf.setsampwidth(w); wf.setframerate(sr); wf.writeframes(pcm)
101
- buf.seek(0); return buf.getvalue()
 
 
 
 
 
 
102
 
103
- # ——— Chart & Tag Helpers ———————————————————————————————————————————
 
 
104
  TAG_RE = re.compile(r'[<\[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>\]\'"”’]+?)["\']?\s*[>\]]', re.I)
105
  extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")))
106
- def repl_tags(txt:str,mp:dict,str_fn):
 
107
  return TAG_RE.sub(lambda m: str_fn(mp[m.group("d").strip()]) if m.group("d").strip() in mp else m.group(0), txt)
108
 
109
  # ─────────────────────────────────────────────────────────────────────────────
110
- # FEATURE 1: REPORT GENERATION
111
  # ─────────────────────────────────────────────────────────────────────────────
112
- class PDF(FPDF,HTMLMixin): pass
113
 
114
  def build_pdf(md, charts):
115
- md = fix_bullet(md).replace("•", "*")
116
- md = repl_tags(md, charts, lambda p: f'<img src="{p}">')
117
- html = MarkdownIt("commonmark", {"breaks":True}).enable("table").render(md)
118
- pdf = PDF(); pdf.set_auto_page_break(True, margin=15)
119
  pdf.add_page()
120
- pdf.set_font("Arial", "B", 18)
121
- pdf.cell(0, 12, "AI-Generated Business Report", ln=True); pdf.ln(3)
122
- pdf.set_font("Arial", "", 11)
123
- pdf.write_html(html)
124
  return bytes(pdf.output(dest="S"))
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  def generate_report_assets(key, buf, name, ctx):
127
  df, err = load_dataframe_safely(buf, name)
128
  if err: st.error(err); return None
129
- llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=API_KEY, temperature=0.1)
130
- ctx_dict = {"shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis"}
131
 
132
- report_md = llm.invoke(f"""You are a senior business analyst. Write an executive-level Markdown report
 
 
 
 
 
 
133
  with insights & recommendations. Use chart tags like <generate_chart: "description"> where helpful.
134
- Data Context: {json.dumps(ctx_dict, indent=2)}""").content
 
135
 
136
- chart_descs = extract_chart_tags(report_md)[:MAX_CHARTS]
137
- chart_paths = {}
138
  if chart_descs:
139
- ag = create_pandas_dataframe_agent(llm=llm, df=df, verbose=False, allow_dangerous_code=True)
 
140
  for d in chart_descs:
141
  with st.spinner(f"Generating chart: {d}"):
142
  with plt.ioff():
143
  try:
144
  ag.run(f"Create a {d} with Matplotlib and save.")
145
- fig = plt.gcf()
146
  if fig.axes:
147
  p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
148
  fig.savefig(p, dpi=300, bbox_inches="tight", facecolor="white")
149
  chart_paths[d] = str(p)
150
  plt.close("all")
151
- except: plt.close("all")
 
 
 
 
 
 
152
 
153
- md = fix_bullet(report_md)
154
- pdf = build_pdf(md, chart_paths)
155
- preview = repl_tags(md, chart_paths, lambda p: f'<img src="data:image/png;base64,{base64.b64encode(Path(p).read_bytes()).decode()}" style="max-width:100%;">')
156
-
157
- return {"type": "report", "preview": preview, "pdf": pdf, "report_md": md, "key": key}
158
 
159
  # ─────────────────────────────────────────────────────────────────────────────
160
- # FEATURE 2: VIDEO GENERATION
161
  # ─────────────────────────────────────────────────────────────────────────────
162
  def generate_image_from_prompt(prompt, style):
163
- """Generates an illustrative image using the Gemini Client."""
164
  try:
165
  full_prompt = f"A professional, clean, illustrative image for a business presentation: {prompt}, in the style of {style}."
166
- # Use the globally defined GEM client, as per the original script's pattern
167
- response = GEM.generate_content(
168
  contents=full_prompt,
169
  model="models/gemini-1.5-flash-latest",
170
  generation_config={"response_mime_type": "image/png"}
@@ -173,46 +307,23 @@ def generate_image_from_prompt(prompt, style):
173
  return Image.open(io.BytesIO(img_bytes)).convert("RGB")
174
  except Exception as e:
175
  st.warning(f"Illustrative image generation failed: {e}. Using placeholder.")
176
- return Image.new('RGB', (1024, 768), color = (230, 230, 230))
177
-
178
- def create_silent_video(images, durations, output_path):
179
- width, height = 1280, 720
180
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
181
- video = cv2.VideoWriter(output_path, fourcc, 24, (width, height))
182
-
183
- for img, duration in zip(images, durations):
184
- # Resize image and convert to BGR for OpenCV
185
- frame = np.array(img.resize((width, height)))
186
- frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
187
- for _ in range(int(duration * 24)): # 24 fps
188
- video.write(frame_bgr)
189
- video.release()
190
- return output_path
191
-
192
- def combine_video_audio(video_path, audio_paths, output_path):
193
- concat_list_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
194
- with open(concat_list_path, 'w') as f:
195
- for af in audio_paths:
196
- f.write(f"file '{Path(af).resolve()}'\n")
197
-
198
- concat_audio_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
199
- subprocess.run(['ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_list_path), '-c', 'copy', str(concat_audio_path)], check=True, capture_output=True)
200
-
201
- subprocess.run(['ffmpeg', '-y', '-i', video_path, '-i', str(concat_audio_path), '-c:v', 'copy', '-c:a', 'aac', '-shortest', output_path], check=True, capture_output=True)
202
-
203
- concat_list_path.unlink(missing_ok=True)
204
- concat_audio_path.unlink(missing_ok=True)
205
- return output_path
206
-
207
- def get_audio_duration(audio_file):
208
- try:
209
- result = subprocess.run(['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', audio_file],
210
- stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True)
211
- return float(result.stdout.strip())
212
- except Exception:
213
- return 5.0 # Default duration
214
-
215
- def generate_video_assets(key, buf, name, ctx, style):
216
  try:
217
  subprocess.run(['ffmpeg', '-version'], check=True, capture_output=True)
218
  except (FileNotFoundError, subprocess.CalledProcessError):
@@ -221,89 +332,131 @@ def generate_video_assets(key, buf, name, ctx, style):
221
 
222
  df, err = load_dataframe_safely(buf, name)
223
  if err: st.error(err); return None
224
- llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=API_KEY, temperature=0.2)
225
- ctx_dict = {"shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis"}
 
 
 
226
 
227
  story_prompt = f"""Create a script for a short business video with exactly {VIDEO_SCENES} scenes.
228
  For each scene:
229
- 1. Write a concise narration (1-2 sentences).
230
  2. If the data can be visualized for this scene, add a chart tag like <generate_chart: "bar chart of sales by region">.
231
  3. Separate each scene with the marker `[SCENE_BREAK]`.
232
  Data Context: {json.dumps(ctx_dict, indent=2)}"""
233
-
234
- with st.spinner("Generating video script..."):
235
  full_script = llm.invoke(story_prompt).content
236
- scenes = [s.strip() for s in full_script.split("[SCENE_BREAK]")]
 
 
 
 
237
 
238
- visuals, audio_paths, temp_files = [], [], []
239
  try:
240
- ag = create_pandas_dataframe_agent(llm=llm, df=df, verbose=False, allow_dangerous_code=True)
241
  for i, scene_text in enumerate(scenes[:VIDEO_SCENES]):
242
- progress = (i + 1) / VIDEO_SCENES
243
- st.progress(progress, text=f"Processing Scene {i+1}/{VIDEO_SCENES}...")
244
-
245
  chart_descs = extract_chart_tags(scene_text)
246
- narrative = repl_tags(scene_text, {}, lambda _: "").strip()
247
-
248
- if narrative: # Only process scenes with text
249
- # 1. Generate Visual
250
- if chart_descs:
251
- with plt.ioff():
252
- try:
253
- ag.run(f"Create a {chart_descs[0]} with Matplotlib and save.")
254
- fig = plt.gcf()
255
- if fig.axes:
256
- p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
257
- fig.savefig(p, dpi=200, bbox_inches="tight", facecolor="white")
258
- visuals.append(Image.open(p).convert("RGB"))
259
- temp_files.append(p)
260
- else: raise ValueError("No chart produced")
261
- except Exception:
262
- visuals.append(generate_image_from_prompt(narrative, style))
263
- finally: plt.close("all")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  else:
265
- visuals.append(generate_image_from_prompt(narrative, style))
266
-
267
- # 2. Generate Audio
268
- audio_content, _ = deepgram_tts(narrative)
269
- if audio_content:
270
- audio_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
271
- audio_path.write_bytes(audio_content)
272
- audio_paths.append(str(audio_path))
273
- temp_files.append(audio_path)
274
-
275
- if not visuals or not audio_paths:
276
- st.error("Could not generate any scenes for the video. Please try a different context or file.")
277
- return None
278
-
279
- st.progress(1.0, text="Assembling video...")
280
- durations = [get_audio_duration(ap) for ap in audio_paths]
281
- silent_video_path = str(Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4")
282
- final_video_path = str(Path(tempfile.gettempdir()) / f"{key}.mp4")
283
-
284
- create_silent_video(visuals, durations, silent_video_path)
285
- temp_files.append(Path(silent_video_path))
286
- combine_video_audio(silent_video_path, audio_paths, final_video_path)
287
-
288
- return {"type": "video", "video_path": final_video_path, "key": key}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  finally:
290
- for f in temp_files: f.unlink(missing_ok=True) # Cleanup all temp files
 
 
291
 
292
  # ─────────────────────────────────────────────────────────────────────────────
293
  # UI & MAIN WORKFLOW
294
  # ─────────────────────────────────────────────────────────────────────────────
295
  mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
296
 
297
- # --- Conditional UI ---
298
- video_style = "professional illustration"
 
299
  if mode == "Video Narrative":
300
  with st.sidebar:
301
  st.subheader("🎬 Video Options")
302
- video_style = st.selectbox("Visual Style",
303
- ["professional illustration", "minimalist infographic", "photorealistic", "cinematic", "data visualization aesthetic"])
304
- st.info("The AI will generate charts from your data where possible, and illustrative images for other scenes.")
 
 
305
 
306
- # --- Common UI ---
307
  upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
308
  if upl:
309
  df_prev, _ = load_dataframe_safely(upl.getvalue(), upl.name)
@@ -314,22 +467,26 @@ ctx = st.text_area("Business context or specific instructions (optional)")
314
 
315
  if st.button("🚀 Generate", type="primary"):
316
  if not upl:
317
- st.warning("Please upload a file first.")
318
- st.stop()
319
-
320
- bkey = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode(), video_style.encode()]))
321
-
 
322
  if mode == "Report (PDF)":
323
- with st.spinner("Generating report and charts..."):
324
  bundle = generate_report_assets(bkey, upl.getvalue(), upl.name, ctx)
325
- else: # Video Narrative
326
- bundle = generate_video_assets(bkey, upl.getvalue(), upl.name, ctx, video_style)
327
-
 
328
  st.session_state.bundle = bundle
329
  st.rerun()
330
 
331
- # --- Display Area (handles state correctly after rerun) ---
332
- if "bundle" in st.session_state and st.session_state.bundle:
 
 
333
  bundle = st.session_state.bundle
334
 
335
  if bundle.get("type") == "report":
@@ -337,13 +494,14 @@ if "bundle" in st.session_state and st.session_state.bundle:
337
  with st.expander("View Report", expanded=True):
338
  if bundle["preview"]:
339
  st.markdown(bundle["preview"], unsafe_allow_html=True)
340
-
341
  c1, c2 = st.columns(2)
342
  with c1:
343
- st.download_button("Download PDF", bundle["pdf"], "business_report.pdf", "application/pdf", use_container_width=True)
 
 
344
  with c2:
345
  if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
346
- report_text = re.sub(r'<[^>]+>', '', bundle["report_md"]) # Basic HTML strip
347
  audio, mime = deepgram_tts(report_text)
348
  if audio:
349
  st.audio(audio, format=mime)
@@ -359,6 +517,8 @@ if "bundle" in st.session_state and st.session_state.bundle:
359
  with open(video_path, "rb") as f:
360
  st.video(f.read())
361
  with open(video_path, "rb") as f:
362
- st.download_button("Download Video", f, f"sozo_narrative_{bundle['key'][:8]}.mp4", "video/mp4")
 
 
363
  else:
364
  st.error("Video file could not be found or generation failed.")
 
1
  ###############################################################################
2
  # Sozo Business Studio · AI transforms business data into compelling narratives
3
+ # (video branch now supports animated charts)
4
  ###############################################################################
5
  import os, re, json, hashlib, uuid, base64, io, tempfile, wave, requests, subprocess
6
  from pathlib import Path
7
 
8
+ # ─── Third-party ──────────────────────────────────────────────────────────────
9
  import streamlit as st
10
  import pandas as pd
11
  import numpy as np
12
+
13
  import matplotlib
14
  matplotlib.use("Agg")
15
  import matplotlib.pyplot as plt
16
+ from matplotlib.animation import FuncAnimation, FFMpegWriter
17
+
18
  from fpdf import FPDF, HTMLMixin
19
  from markdown_it import MarkdownIt
20
  from PIL import Image
21
+ import cv2 # video processing
22
+ try:
23
+ import bar_chart_race as bcr # optional helper
24
+ HAS_BCR = True
25
+ except ImportError:
26
+ HAS_BCR = False
27
 
28
  from langchain_experimental.agents import create_pandas_dataframe_agent
29
  from langchain_google_genai import ChatGoogleGenerativeAI
30
  from google import genai
 
31
 
32
  # ─────────────────────────────────────────────────────────────────────────────
33
  # CONFIG & CONSTANTS
 
36
  st.title("📊 Sozo Business Studio")
37
  st.caption("AI transforms business data into compelling narratives.")
38
 
39
+ FPS = 24 # video frames per second
40
+ MAX_CHARTS = 5 # per report
41
+ VIDEO_SCENES = 5 # per video
42
+ WIDTH, HEIGHT = 1280, 720 # video resolution
43
 
44
+ # --- API Keys ---
45
  API_KEY = os.getenv("GEMINI_API_KEY")
46
  if not API_KEY:
47
  st.error("⚠️ GEMINI_API_KEY is not set."); st.stop()
48
+ GEM = genai.Client(api_key=API_KEY)
 
49
 
50
+ DG_KEY = os.getenv("DEEPGRAM_API_KEY") # optional (narration)
51
 
52
+ # --- Session State shortcut ---
 
53
  st.session_state.setdefault("bundle", None)
54
 
55
  # ─────────────────────────────────────────────────────────────────────────────
 
66
  errs.append("Unsupported file type")
67
  return errs
68
 
69
+ def load_dataframe_safely(buf: bytes, name: str):
70
  try:
71
  ext = Path(name).suffix.lower()
72
+ df = pd.read_excel(io.BytesIO(buf)) if ext in (".xlsx", ".xls") else pd.read_csv(io.BytesIO(buf))
73
+ if df.empty or len(df.columns) == 0: raise ValueError("File contains no data")
74
+ df.columns = df.columns.astype(str).str.strip()
75
+ df = df.dropna(how="all")
76
  if df.empty: raise ValueError("Rows all empty")
77
+ return df, None
78
+ except Exception as e:
79
+ return None, str(e)
80
 
81
+ def fix_bullet(t: str) -> str:
82
+ return re.sub(r"[\x80-\x9f]", "", t) if isinstance(t, str) else t
83
 
84
+ def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
85
+ safe = df.copy()
 
86
  for c in safe.columns:
87
+ if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
88
+ safe[c] = safe[c].astype(safe[c].dtype.name.lower())
89
  return safe
90
 
91
+ # —── DeepGram TTS ──────────────────────────���─────────────────────────────────
92
  @st.cache_data(show_spinner=False)
93
+ def deepgram_tts(text: str):
94
  if not DG_KEY or not text: return None, None
95
  text = re.sub(r"[^\w\s.,!?;:-]", "", text)[:1000]
96
  try:
97
+ r = requests.post(
98
+ "https://api.deepgram.com/v1/speak",
99
+ params={"model": "aura-asteria-en"},
100
+ headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"},
101
+ json={"text": text},
102
+ timeout=30,
103
+ )
104
  r.raise_for_status()
105
  return r.content, r.headers.get("Content-Type", "audio/mpeg")
106
  except Exception:
107
  return None, None
108
 
109
+ def get_audio_duration(audio_file):
110
+ """Return duration (seconds) of an audio file via ffprobe (fallback 5 s)."""
111
+ try:
112
+ out = subprocess.run(
113
+ ['ffprobe', '-v', 'error', '-show_entries', 'format=duration',
114
+ '-of', 'default=noprint_wrappers=1:nokey=1', audio_file],
115
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True
116
+ ).stdout.strip()
117
+ return float(out)
118
+ except Exception:
119
+ return 5.0
120
 
121
+ # ─────────────────────────────────────────────────────────────────────────────
122
+ # MARKDOWN TAG UTILS
123
+ # ─────────────────────────────────────────────────────────────────────────────
124
  TAG_RE = re.compile(r'[<\[]\s*generate_?chart\s*[:=]?\s*["\']?(?P<d>[^>\]\'"”’]+?)["\']?\s*[>\]]', re.I)
125
  extract_chart_tags = lambda t: list(dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")))
126
+ def repl_tags(txt: str, mp: dict, str_fn):
127
+ """Replace generated-chart tags with something else (pdf/img injection)."""
128
  return TAG_RE.sub(lambda m: str_fn(mp[m.group("d").strip()]) if m.group("d").strip() in mp else m.group(0), txt)
129
 
130
  # ─────────────────────────────────────────────────────────────────────────────
131
+ # PDF GENERATION (unchanged)
132
  # ─────────────────────────────────────────────────────────────────────────────
133
+ class PDF(FPDF, HTMLMixin): pass
134
 
135
  def build_pdf(md, charts):
136
+ md = fix_bullet(md).replace("•", "*")
137
+ md = repl_tags(md, charts, lambda p: f'<img src="{p}">')
138
+ html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(md)
139
+ pdf = PDF(); pdf.set_auto_page_break(True, margin=15)
140
  pdf.add_page()
141
+ pdf.set_font("Arial", "B", 18); pdf.cell(0, 12, "AI-Generated Business Report", ln=True); pdf.ln(3)
142
+ pdf.set_font("Arial", "", 11); pdf.write_html(html)
 
 
143
  return bytes(pdf.output(dest="S"))
144
 
145
+ # ─────────────────────────────────────────────────────────────────────────────
146
+ # VIDEO-ONLY ANIMATION HELPERS
147
+ # ─────────────────────────────────────────────────────────────────────────────
148
+ def animate_image_fade(img_cv2: np.ndarray, duration: float, out_path: Path, fps: int = FPS):
149
+ """Simple fade-in from white background to the provided image."""
150
+ frames = max(int(duration * fps), fps) # at least 1 s
151
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
152
+ video = cv2.VideoWriter(str(out_path), fourcc, fps, (WIDTH, HEIGHT))
153
+ blank = np.full_like(img_cv2, 255)
154
+
155
+ for i in range(frames):
156
+ alpha = i / frames
157
+ frame = cv2.addWeighted(blank, 1 - alpha, img_cv2, alpha, 0)
158
+ video.write(frame)
159
+ video.release()
160
+ return str(out_path)
161
+
162
+ def animate_chart(desc: str, df: pd.DataFrame, duration: float, out_path: Path, fps: int = FPS) -> tuple[str, str]:
163
+ """
164
+ Build an animated chart clip matching *desc*.
165
+ Returns (mp4_path, preview_png_path).
166
+ Falls back to simple fade-in if animation fails.
167
+ """
168
+ try:
169
+ # VERY rough heuristic parser
170
+ desc_low = desc.lower()
171
+ if ("bar race" in desc_low or "race" in desc_low) and HAS_BCR:
172
+ # --------------- bar chart race ---------------------------------
173
+ tmp_csv = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.csv"
174
+ df.to_csv(tmp_csv, index=False)
175
+ bcr.bar_chart_race(
176
+ input_filename=tmp_csv,
177
+ output_filename=str(out_path),
178
+ n_bars=10,
179
+ period_length=duration / df.shape[0] if df.shape[0] else 0.5,
180
+ steps_per_period=3,
181
+ dpi=144,
182
+ fig=(WIDTH / 100, HEIGHT / 100),
183
+ bar_label_font=4,
184
+ fixed_order=False,
185
+ interpolate_period=False,
186
+ period_template='{x:.0f}',
187
+ )
188
+ tmp_csv.unlink(missing_ok=True)
189
+ # grab first frame for preview
190
+ cap = cv2.VideoCapture(str(out_path))
191
+ ok, frame = cap.read(); cap.release()
192
+ if ok:
193
+ preview = Path(out_path.with_suffix(".png"))
194
+ cv2.imwrite(str(preview), frame)
195
+ return str(out_path), str(preview)
196
+ raise RuntimeError("Could not capture preview")
197
+ else:
198
+ # --------------- generic line/bar growth using FuncAnimation ----
199
+ # Pick numeric columns
200
+ num_cols = df.select_dtypes(include=['number']).columns.tolist()
201
+ if len(num_cols) < 1:
202
+ raise ValueError("No numeric data to plot")
203
+ col_y = num_cols[0]
204
+ col_x = num_cols[1] if len(num_cols) > 1 else None
205
+ fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
206
+
207
+ if "bar" in desc_low:
208
+ bars = ax.bar([], [])
209
+ def update(frame_idx):
210
+ frac = frame_idx / frames
211
+ upto = int(len(df) * frac) or 1
212
+ ydata = df[col_y].iloc[:upto]
213
+ xdata = df[col_x].iloc[:upto] if col_x else np.arange(upto)
214
+ ax.clear()
215
+ ax.bar(xdata, ydata, color="#1f77b4")
216
+ ax.set_title(desc); ax.grid(True, alpha=0.3)
217
+ frames = max(int(duration * fps), fps)
218
+ anim = FuncAnimation(fig, update, frames=frames, blit=False)
219
+ else:
220
+ line, = ax.plot([], [], lw=2)
221
+ ax.set_xlim(df.index.min(), df.index.max() or len(df))
222
+ ax.set_ylim(df[col_y].min(), df[col_y].max())
223
+ ax.set_title(desc); ax.grid(True, alpha=0.3)
224
+ def update(frame_idx):
225
+ upto = int(len(df) * frame_idx / frames) or 1
226
+ line.set_data(df.index[:upto], df[col_y].iloc[:upto])
227
+ return line,
228
+ frames = max(int(duration * fps), fps)
229
+ anim = FuncAnimation(fig, update, frames=frames, blit=True)
230
+
231
+ writer = FFMpegWriter(fps=fps, metadata=dict(artist='Sozo Studio'))
232
+ anim.save(str(out_path), writer=writer, dpi=144)
233
+ preview = Path(out_path.with_suffix(".png"))
234
+ fig.savefig(preview, bbox_inches="tight", facecolor="white")
235
+ plt.close('all')
236
+ return str(out_path), str(preview)
237
+ except Exception as e:
238
+ # Fallback: simple fade-in on static chart generated by agent
239
+ with st.spinner(f"Animation fallback due to {e}. Generating static image."):
240
+ fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
241
+ df.plot(ax=ax); ax.set_title(desc); ax.grid(alpha=0.3)
242
+ png_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
243
+ fig.savefig(png_path, bbox_inches="tight", facecolor="white"); plt.close('all')
244
+ img = cv2.imread(str(png_path)); img = cv2.resize(img, (WIDTH, HEIGHT))
245
+ mp4_path = Path(out_path)
246
+ animate_image_fade(img, duration, mp4_path, fps=fps)
247
+ return str(mp4_path), str(png_path)
248
+
249
+ # ─────────────────────────────────────────────────────────────────────────────
250
+ # REPORT GENERATION (unchanged)
251
+ # ─────────────────────────────────────────────────────────────────────────────
252
  def generate_report_assets(key, buf, name, ctx):
253
  df, err = load_dataframe_safely(buf, name)
254
  if err: st.error(err); return None
 
 
255
 
256
+ llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
257
+ google_api_key=API_KEY, temperature=0.1)
258
+ ctx_dict = {"shape": df.shape, "columns": list(df.columns),
259
+ "user_ctx": ctx or "General business analysis"}
260
+
261
+ report_md = llm.invoke(
262
+ f"""You are a senior business analyst. Write an executive-level Markdown report
263
  with insights & recommendations. Use chart tags like <generate_chart: "description"> where helpful.
264
+ Data Context: {json.dumps(ctx_dict, indent=2)}"""
265
+ ).content
266
 
267
+ chart_descs = extract_chart_tags(report_md)[:MAX_CHARTS]
268
+ chart_paths = {}
269
  if chart_descs:
270
+ ag = create_pandas_dataframe_agent(llm=llm, df=df, verbose=False,
271
+ allow_dangerous_code=True)
272
  for d in chart_descs:
273
  with st.spinner(f"Generating chart: {d}"):
274
  with plt.ioff():
275
  try:
276
  ag.run(f"Create a {d} with Matplotlib and save.")
277
+ fig = plt.gcf()
278
  if fig.axes:
279
  p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
280
  fig.savefig(p, dpi=300, bbox_inches="tight", facecolor="white")
281
  chart_paths[d] = str(p)
282
  plt.close("all")
283
+ except:
284
+ plt.close("all")
285
+
286
+ md = fix_bullet(report_md)
287
+ pdf = build_pdf(md, chart_paths)
288
+ preview = repl_tags(md, chart_paths,
289
+ lambda p: f'<img src="data:image/png;base64,{base64.b64encode(Path(p).read_bytes()).decode()}" style="max-width:100%;">')
290
 
291
+ return {"type": "report", "preview": preview, "pdf": pdf,
292
+ "report_md": md, "key": key}
 
 
 
293
 
294
  # ─────────────────────────────────────────────────────────────────────────────
295
+ # VIDEO GENERATION (animated charts!)
296
  # ─────────────────────────────────────────────────────────────────────────────
297
  def generate_image_from_prompt(prompt, style):
298
+ """Image placeholder using Gemini; falls back to gray canvas on error."""
299
  try:
300
  full_prompt = f"A professional, clean, illustrative image for a business presentation: {prompt}, in the style of {style}."
301
+ response = GEM.generate_content(
 
302
  contents=full_prompt,
303
  model="models/gemini-1.5-flash-latest",
304
  generation_config={"response_mime_type": "image/png"}
 
307
  return Image.open(io.BytesIO(img_bytes)).convert("RGB")
308
  except Exception as e:
309
  st.warning(f"Illustrative image generation failed: {e}. Using placeholder.")
310
+ return Image.new('RGB', (WIDTH, HEIGHT), color=(230, 230, 230))
311
+
312
+ def concat_media(inputs, output_path, media_type="video"):
313
+ """Concat list of mp4 or mp3 files using ffmpeg demuxer (copy, no re-encode)."""
314
+ concat_list = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
315
+ with open(concat_list, 'w') as f:
316
+ for item in inputs:
317
+ f.write(f"file '{Path(item).resolve()}'\n")
318
+ codec_copy = 'copy'
319
+ what = '-c:v' if media_type == "video" else '-c:a'
320
+ subprocess.run(['ffmpeg', '-y', '-f', 'concat', '-safe', '0',
321
+ '-i', str(concat_list), what, codec_copy, str(output_path)],
322
+ check=True, capture_output=True)
323
+ concat_list.unlink(missing_ok=True)
324
+
325
+ def generate_video_assets(key, buf, name, ctx, style, animate_charts=True):
326
+ # --- environment check ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  try:
328
  subprocess.run(['ffmpeg', '-version'], check=True, capture_output=True)
329
  except (FileNotFoundError, subprocess.CalledProcessError):
 
332
 
333
  df, err = load_dataframe_safely(buf, name)
334
  if err: st.error(err); return None
335
+
336
+ llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
337
+ google_api_key=API_KEY, temperature=0.2)
338
+ ctx_dict = {"shape": df.shape, "columns": list(df.columns),
339
+ "user_ctx": ctx or "General business analysis"}
340
 
341
  story_prompt = f"""Create a script for a short business video with exactly {VIDEO_SCENES} scenes.
342
  For each scene:
343
+ 1. Write a concise narration (12 sentences).
344
  2. If the data can be visualized for this scene, add a chart tag like <generate_chart: "bar chart of sales by region">.
345
  3. Separate each scene with the marker `[SCENE_BREAK]`.
346
  Data Context: {json.dumps(ctx_dict, indent=2)}"""
347
+
348
+ with st.spinner("Generating video script"):
349
  full_script = llm.invoke(story_prompt).content
350
+ scenes = [s.strip() for s in full_script.split("[SCENE_BREAK]") if s.strip()]
351
+
352
+ video_clips, audio_paths, temp_files = [], [], []
353
+ ag = create_pandas_dataframe_agent(llm=llm, df=df,
354
+ verbose=False, allow_dangerous_code=True)
355
 
 
356
  try:
 
357
  for i, scene_text in enumerate(scenes[:VIDEO_SCENES]):
358
+ st.progress((i + 1) / VIDEO_SCENES, text=f"Processing Scene {i+1}/{VIDEO_SCENES}…")
359
+
 
360
  chart_descs = extract_chart_tags(scene_text)
361
+ narrative = repl_tags(scene_text, {}, lambda _: "").strip()
362
+
363
+ # 1. Generate Audio (always)
364
+ audio_content, _ = deepgram_tts(narrative)
365
+ if audio_content:
366
+ audio_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
367
+ audio_path.write_bytes(audio_content)
368
+ audio_paths.append(str(audio_path))
369
+ temp_files.append(audio_path)
370
+ duration = get_audio_duration(str(audio_path))
371
+ else:
372
+ duration = 5.0 # fallback
373
+
374
+ # 2. Generate Visual (clip)
375
+ if chart_descs:
376
+ d = chart_descs[0]
377
+ with plt.ioff():
378
+ try:
379
+ ag.run(f"Create a {d} with Matplotlib and save.")
380
+ fig = plt.gcf()
381
+ if not fig.axes: raise ValueError("No axes")
382
+ static_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
383
+ fig.savefig(static_png, dpi=300, bbox_inches="tight", facecolor="white")
384
+ plt.close("all")
385
+ except Exception:
386
+ plt.close("all")
387
+ # fallback to illustrative image
388
+ static_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
389
+ generate_image_from_prompt(narrative, style).save(static_png)
390
+
391
+ # Animate?
392
+ if animate_charts:
393
+ clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
394
+ img = cv2.imread(str(static_png)); img = cv2.resize(img, (WIDTH, HEIGHT))
395
+ animate_image_fade(img, duration, clip_path)
396
+ video_clips.append(str(clip_path))
397
+ temp_files.extend([static_png, clip_path])
398
  else:
399
+ # Just still → Ken-Burns fade to duration seconds
400
+ clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
401
+ img = cv2.imread(str(static_png)); img = cv2.resize(img, (WIDTH, HEIGHT))
402
+ animate_image_fade(img, duration, clip_path) # still a clip
403
+ video_clips.append(str(clip_path))
404
+ temp_files.extend([static_png, clip_path])
405
+
406
+ else:
407
+ # No chart; illustrative image
408
+ static_img = generate_image_from_prompt(narrative, style)
409
+ static_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
410
+ static_img.save(static_png)
411
+ clip_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
412
+ img = cv2.cvtColor(np.array(static_img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
413
+ animate_image_fade(img, duration, clip_path)
414
+ video_clips.append(str(clip_path))
415
+ temp_files.extend([static_png, clip_path])
416
+
417
+ # --- Assemble video ---
418
+ st.progress(1.0, text="Assembling video…")
419
+ silent_video_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
420
+ concat_media(video_clips, silent_video_path, media_type="video")
421
+
422
+ # --- Concat audio ---
423
+ audio_concat_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
424
+ concat_media(audio_paths, audio_concat_path, media_type="audio")
425
+
426
+ # --- Merge AV streams ---
427
+ final_video_path = Path(tempfile.gettempdir()) / f"{key}.mp4"
428
+ subprocess.run(['ffmpeg', '-y',
429
+ '-i', str(silent_video_path),
430
+ '-i', str(audio_concat_path),
431
+ '-c:v', 'copy', '-c:a', 'aac',
432
+ '-shortest', str(final_video_path)],
433
+ check=True, capture_output=True)
434
+
435
+ return {"type": "video", "video_path": str(final_video_path), "key": key}
436
+
437
  finally:
438
+ # clean-up temps except final video
439
+ for f in temp_files:
440
+ f.unlink(missing_ok=True)
441
 
442
  # ─────────────────────────────────────────────────────────────────────────────
443
  # UI & MAIN WORKFLOW
444
  # ─────────────────────────────────────────────────────────────────────────────
445
  mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
446
 
447
+ # Video options
448
+ video_style = "professional illustration"
449
+ animate_charts_on = True
450
  if mode == "Video Narrative":
451
  with st.sidebar:
452
  st.subheader("🎬 Video Options")
453
+ video_style = st.selectbox("Visual Style",
454
+ ["professional illustration", "minimalist infographic",
455
+ "photorealistic", "cinematic", "data visualization aesthetic"])
456
+ animate_charts_on = st.toggle("Animate Charts", value=True)
457
+ st.caption("Disabling animation uses static slides with a quick fade-in.")
458
 
459
+ # Common file uploader
460
  upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
461
  if upl:
462
  df_prev, _ = load_dataframe_safely(upl.getvalue(), upl.name)
 
467
 
468
  if st.button("🚀 Generate", type="primary"):
469
  if not upl:
470
+ st.warning("Please upload a file first."); st.stop()
471
+
472
+ bkey = sha1_bytes(b"".join([upl.getvalue(), mode.encode(),
473
+ ctx.encode(), video_style.encode(),
474
+ str(animate_charts_on).encode()]))
475
+
476
  if mode == "Report (PDF)":
477
+ with st.spinner("Generating report and charts"):
478
  bundle = generate_report_assets(bkey, upl.getvalue(), upl.name, ctx)
479
+ else: # Video
480
+ bundle = generate_video_assets(bkey, upl.getvalue(), upl.name, ctx,
481
+ video_style, animate_charts=animate_charts_on)
482
+
483
  st.session_state.bundle = bundle
484
  st.rerun()
485
 
486
+ # ─────────────────────────────────────────────────────────────────────────────
487
+ # DISPLAY AREA
488
+ # ─────────────────────────────────────────────────────────────────────────────
489
+ if st.session_state.get("bundle"):
490
  bundle = st.session_state.bundle
491
 
492
  if bundle.get("type") == "report":
 
494
  with st.expander("View Report", expanded=True):
495
  if bundle["preview"]:
496
  st.markdown(bundle["preview"], unsafe_allow_html=True)
 
497
  c1, c2 = st.columns(2)
498
  with c1:
499
+ st.download_button("Download PDF", bundle["pdf"],
500
+ "business_report.pdf", "application/pdf",
501
+ use_container_width=True)
502
  with c2:
503
  if DG_KEY and st.button("🔊 Narrate Summary", use_container_width=True):
504
+ report_text = re.sub(r'<[^>]+>', '', bundle["report_md"])
505
  audio, mime = deepgram_tts(report_text)
506
  if audio:
507
  st.audio(audio, format=mime)
 
517
  with open(video_path, "rb") as f:
518
  st.video(f.read())
519
  with open(video_path, "rb") as f:
520
+ st.download_button("Download Video", f,
521
+ f"sozo_narrative_{bundle['key'][:8]}.mp4",
522
+ "video/mp4")
523
  else:
524
  st.error("Video file could not be found or generation failed.")