rairo commited on
Commit
c315412
·
verified ·
1 Parent(s): 99216e4

Create sozo_gen.py

Browse files
Files changed (1) hide show
  1. sozo_gen.py +373 -0
sozo_gen.py ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # sozo_gen.py
2
+
3
+ import os
4
+ import re
5
+ import json
6
+ import logging
7
+ import uuid
8
+ import io
9
+ from pathlib import Path
10
+ import pandas as pd
11
+ import numpy as np
12
+ import matplotlib
13
+ matplotlib.use("Agg")
14
+ import matplotlib.pyplot as plt
15
+ from matplotlib.animation import FuncAnimation, FFMpegWriter
16
+ from PIL import Image
17
+ import cv2
18
+ import inspect
19
+ import tempfile
20
+ import subprocess
21
+
22
+ from langchain_google_genai import ChatGoogleGenerativeAI
23
+ from google import genai
24
+ import requests
25
+
26
+ # --- Configuration ---
27
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(funcName)s] - %(message)s')
28
+ FPS, WIDTH, HEIGHT = 24, 1280, 720
29
+ MAX_CHARTS, VIDEO_SCENES = 5, 5
30
+
31
+ # --- Gemini API Initialization ---
32
+ API_KEY = os.getenv("GEMINI_API_KEY")
33
+ if not API_KEY:
34
+ raise ValueError("GEMINI_API_KEY environment variable not set.")
35
+ genai.configure(api_key=API_KEY)
36
+
37
+ # --- Helper Functions ---
38
+ def load_dataframe_safely(buf, name: str):
39
+ ext = Path(name).suffix.lower()
40
+ df = (pd.read_excel if ext in (".xlsx", ".xls") else pd.read_csv)(buf))
41
+ df.columns = df.columns.astype(str).str.strip()
42
+ df = df.dropna(how="all")
43
+ if df.empty or len(df.columns) == 0: raise ValueError("No usable data found")
44
+ return df
45
+
46
+ def deepgram_tts(txt: str, voice_model: str):
47
+ DG_KEY = os.getenv("DEEPGRAM_API_KEY")
48
+ if not DG_KEY or not txt: return None
49
+ txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000]
50
+ try:
51
+ r = requests.post("https://api.deepgram.com/v1/speak", params={"model": voice_model}, headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"}, json={"text": txt}, timeout=30)
52
+ r.raise_for_status()
53
+ return r.content
54
+ except Exception as e:
55
+ logging.error(f"Deepgram TTS failed: {e}")
56
+ return None
57
+
58
+ def generate_silence_mp3(duration: float, out: Path):
59
+ subprocess.run([ "ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono", "-t", f"{duration:.3f}", "-q:a", "9", str(out)], check=True, capture_output=True)
60
+
61
+ def audio_duration(path: str) -> float:
62
+ try:
63
+ res = subprocess.run([ "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=nw=1:nk=1", path], text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
64
+ return float(res.stdout.strip())
65
+ except Exception: return 5.0
66
+
67
+ TAG_RE = re.compile( r'[<[]\s*generate_?chart\s*[:=]?\s*[\"\'“”]?(?P<d>[^>\"\'”\]]+?)[\"\'“”]?\s*[>\]]', re.I, )
68
+ extract_chart_tags = lambda t: list( dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")) )
69
+
70
+ re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I | re.M)
71
+ def clean_narration(txt: str) -> str:
72
+ txt = TAG_RE.sub("", txt); txt = re_scene.sub("", txt)
73
+ phrases_to_remove = [r"as you can see in the chart", r"this chart shows", r"the chart illustrates", r"in this visual", r"this graph displays"]
74
+ for phrase in phrases_to_remove: txt = re.sub(phrase, "", txt, flags=re.IGNORECASE)
75
+ txt = re.sub(r"\s*\([^)]*\)", "", txt); txt = re.sub(r"[\*#_]", "", txt)
76
+ return re.sub(r"\s{2,}", " ", txt).strip()
77
+
78
+ def placeholder_img() -> Image.Image: return Image.new("RGB", (WIDTH, HEIGHT), (230, 230, 230))
79
+
80
+ def generate_image_from_prompt(prompt: str) -> Image.Image:
81
+ model_main = "gemini-2.0-flash-exp-image-generation"; model_fallback = "gemini-2.0-flash-preview-image-generation"
82
+ full_prompt = "A clean business-presentation illustration: " + prompt
83
+ def fetch(model_name):
84
+ try:
85
+ model = genai.GenerativeModel(model_name)
86
+ res = model.generate_content(full_prompt)
87
+ for part in res.candidates[0].content.parts:
88
+ if getattr(part, "inline_data", None):
89
+ return Image.open(io.BytesIO(part.inline_data.data)).convert("RGB")
90
+ return None
91
+ except Exception:
92
+ return None
93
+ try:
94
+ img = fetch(model_main) or fetch(model_fallback)
95
+ return img if img else placeholder_img()
96
+ except Exception: return placeholder_img()
97
+
98
+ # --- Chart Generation System ---
99
+ class ChartSpecification:
100
+ def __init__(self, chart_type: str, title: str, x_col: str, y_col: str, agg_method: str = None, filter_condition: str = None, top_n: int = None, color_scheme: str = "professional"):
101
+ self.chart_type = chart_type; self.title = title; self.x_col = x_col; self.y_col = y_col
102
+ self.agg_method = agg_method or "sum"; self.filter_condition = filter_condition; self.top_n = top_n; self.color_scheme = color_scheme
103
+
104
+ def enhance_data_context(df: pd.DataFrame, ctx_dict: Dict) -> Dict:
105
+ enhanced_ctx = ctx_dict.copy(); numeric_cols = df.select_dtypes(include=['number']).columns.tolist(); categorical_cols = df.select_dtypes(exclude=['number']).columns.tolist()
106
+ enhanced_ctx.update({"numeric_columns": numeric_cols, "categorical_columns": categorical_cols})
107
+ return enhanced_ctx
108
+
109
+ class ChartGenerator:
110
+ def __init__(self, llm, df: pd.DataFrame):
111
+ self.llm = llm; self.df = df
112
+ self.enhanced_ctx = enhance_data_context(df, {"columns": list(df.columns), "shape": df.shape, "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()}})
113
+
114
+ def generate_chart_spec(self, description: str) -> ChartSpecification:
115
+ spec_prompt = f"""
116
+ You are a data visualization expert. Based on the dataset and chart description, generate a precise chart specification.
117
+ **Dataset Info:** {json.dumps(self.enhanced_ctx, indent=2)}
118
+ **Chart Request:** {description}
119
+ **Return a JSON specification with these exact fields:**
120
+ {{
121
+ "chart_type": "bar|pie|line|scatter|hist", "title": "Professional chart title", "x_col": "column_name_for_x_axis",
122
+ "y_col": "column_name_for_y_axis_or_null", "agg_method": "sum|mean|count|max|min|null", "top_n": "number_for_top_n_filtering_or_null"
123
+ }}
124
+ Return only the JSON specification, no additional text.
125
+ """
126
+ try:
127
+ response = self.llm.invoke(spec_prompt).content.strip()
128
+ if response.startswith("```json"): response = response[7:-3]
129
+ elif response.startswith("```"): response = response[3:-3]
130
+ spec_dict = json.loads(response)
131
+ valid_keys = [p.name for p in inspect.signature(ChartSpecification).parameters.values() if p.name not in ['reasoning', 'filter_condition', 'color_scheme']]
132
+ filtered_dict = {k: v for k, v in spec_dict.items() if k in valid_keys}
133
+ return ChartSpecification(**filtered_dict)
134
+ except Exception as e:
135
+ logging.error(f"Spec generation failed: {e}. Using fallback.")
136
+ return self._create_fallback_spec(description)
137
+
138
+ def _create_fallback_spec(self, description: str) -> ChartSpecification:
139
+ numeric_cols = self.enhanced_ctx['numeric_columns']; categorical_cols = self.enhanced_ctx['categorical_columns']
140
+ ctype = "bar"
141
+ for t in ["pie", "line", "scatter", "hist"]:
142
+ if t in description.lower(): ctype = t
143
+ x = categorical_cols[0] if categorical_cols else self.df.columns[0]
144
+ y = numeric_cols[0] if numeric_cols and len(self.df.columns) > 1 else (self.df.columns[1] if len(self.df.columns) > 1 else None)
145
+ return ChartSpecification(ctype, description, x, y)
146
+
147
+ def execute_chart_spec(spec: ChartSpecification, df: pd.DataFrame, output_path: Path) -> bool:
148
+ try:
149
+ plot_data = prepare_plot_data(spec, df)
150
+ fig, ax = plt.subplots(figsize=(12, 8)); plt.style.use('default')
151
+ if spec.chart_type == "bar": ax.bar(plot_data.index.astype(str), plot_data.values, color='#2E86AB', alpha=0.8); ax.set_xlabel(spec.x_col); ax.set_ylabel(spec.y_col); ax.tick_params(axis='x', rotation=45)
152
+ elif spec.chart_type == "pie": ax.pie(plot_data.values, labels=plot_data.index, autopct='%1.1f%%', startangle=90); ax.axis('equal')
153
+ elif spec.chart_type == "line": ax.plot(plot_data.index, plot_data.values, marker='o', linewidth=2, color='#A23B72'); ax.set_xlabel(spec.x_col); ax.set_ylabel(spec.y_col); ax.grid(True, alpha=0.3)
154
+ elif spec.chart_type == "scatter": ax.scatter(plot_data.iloc[:, 0], plot_data.iloc[:, 1], alpha=0.6, color='#F18F01'); ax.set_xlabel(spec.x_col); ax.set_ylabel(spec.y_col); ax.grid(True, alpha=0.3)
155
+ elif spec.chart_type == "hist": ax.hist(plot_data.values, bins=20, color='#C73E1D', alpha=0.7, edgecolor='black'); ax.set_xlabel(spec.x_col); ax.set_ylabel('Frequency'); ax.grid(True, alpha=0.3)
156
+ ax.set_title(spec.title, fontsize=14, fontweight='bold', pad=20); plt.tight_layout()
157
+ plt.savefig(output_path, dpi=300, bbox_inches='tight', facecolor='white'); plt.close()
158
+ return True
159
+ except Exception as e: logging.error(f"Static chart generation failed for '{spec.title}': {e}"); return False
160
+
161
+ def prepare_plot_data(spec: ChartSpecification, df: pd.DataFrame) -> pd.Series:
162
+ if spec.x_col not in df.columns or (spec.y_col and spec.y_col not in df.columns): raise ValueError(f"Invalid columns in chart spec: {spec.x_col}, {spec.y_col}")
163
+ if spec.chart_type in ["bar", "pie"]:
164
+ if not spec.y_col: return df[spec.x_col].value_counts().nlargest(spec.top_n or 10)
165
+ grouped = df.groupby(spec.x_col)[spec.y_col].agg(spec.agg_method or 'sum')
166
+ return grouped.nlargest(spec.top_n or 10)
167
+ elif spec.chart_type == "line": return df.set_index(spec.x_col)[spec.y_col].sort_index()
168
+ elif spec.chart_type == "scatter": return df[[spec.x_col, spec.y_col]].dropna()
169
+ elif spec.chart_type == "hist": return df[spec.x_col].dropna()
170
+ return df[spec.x_col]
171
+
172
+ # --- Animation & Video Generation ---
173
+ def animate_chart(spec: ChartSpecification, df: pd.DataFrame, dur: float, out: Path, fps: int = FPS) -> str:
174
+ plot_data = prepare_plot_data(spec, df)
175
+ frames = max(10, int(dur * fps))
176
+ fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
177
+ plt.tight_layout(pad=3.0)
178
+ ctype = spec.chart_type
179
+ if ctype == "pie":
180
+ wedges, _, _ = ax.pie(plot_data, labels=plot_data.index, startangle=90, autopct='%1.1f%%')
181
+ ax.set_title(spec.title); ax.axis('equal')
182
+ def init(): [w.set_alpha(0) for w in wedges]; return wedges
183
+ def update(i): [w.set_alpha(i / (frames - 1)) for w in wedges]; return wedges
184
+ elif ctype == "bar":
185
+ bars = ax.bar(plot_data.index.astype(str), np.zeros_like(plot_data.values, dtype=float), color="#1f77b4")
186
+ ax.set_ylim(0, plot_data.max() * 1.1 if not pd.isna(plot_data.max()) and plot_data.max() > 0 else 1)
187
+ ax.set_title(spec.title); plt.xticks(rotation=45, ha="right")
188
+ def init(): return bars
189
+ def update(i):
190
+ for b, h in zip(bars, plot_data.values): b.set_height(h * (i / (frames - 1)))
191
+ return bars
192
+ elif ctype == "scatter":
193
+ scat = ax.scatter([], [], alpha=0.7)
194
+ x_full, y_full = plot_data.iloc[:, 0], plot_data.iloc[:, 1]
195
+ ax.set_xlim(x_full.min(), x_full.max()); ax.set_ylim(y_full.min(), y_full.max())
196
+ ax.set_title(spec.title); ax.grid(alpha=.3); ax.set_xlabel(spec.x_col); ax.set_ylabel(spec.y_col)
197
+ def init(): scat.set_offsets(np.empty((0, 2))); return [scat]
198
+ def update(i):
199
+ k = max(1, int(len(x_full) * (i / (frames - 1))))
200
+ scat.set_offsets(plot_data.iloc[:k].values); return [scat]
201
+ elif ctype == "hist":
202
+ _, _, patches = ax.hist(plot_data, bins=20, alpha=0)
203
+ ax.set_title(spec.title); ax.set_xlabel(spec.x_col); ax.set_ylabel("Frequency")
204
+ def init(): [p.set_alpha(0) for p in patches]; return patches
205
+ def update(i): [p.set_alpha((i / (frames - 1)) * 0.7) for p in patches]; return patches
206
+ else: # line
207
+ line, = ax.plot([], [], lw=2)
208
+ plot_data = plot_data.sort_index() if not plot_data.index.is_monotonic_increasing else plot_data
209
+ x_full, y_full = plot_data.index, plot_data.values
210
+ ax.set_xlim(x_full.min(), x_full.max()); ax.set_ylim(y_full.min() * 0.9, y_full.max() * 1.1)
211
+ ax.set_title(spec.title); ax.grid(alpha=.3); ax.set_xlabel(spec.x_col); ax.set_ylabel(spec.y_col)
212
+ def init(): line.set_data([], []); return [line]
213
+ def update(i):
214
+ k = max(2, int(len(x_full) * (i / (frames - 1))))
215
+ line.set_data(x_full[:k], y_full[:k]); return [line]
216
+ anim = FuncAnimation(fig, update, init_func=init, frames=frames, blit=True, interval=1000 / fps)
217
+ anim.save(str(out), writer=FFMpegWriter(fps=fps), dpi=144)
218
+ plt.close(fig)
219
+ return str(out)
220
+
221
+ def animate_image_fade(img: np.ndarray, dur: float, out: Path, fps: int = 24) -> str:
222
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v'); video_writer = cv2.VideoWriter(str(out), fourcc, fps, (WIDTH, HEIGHT))
223
+ total_frames = max(1, int(dur * fps))
224
+ for i in range(total_frames):
225
+ alpha = i / (total_frames - 1) if total_frames > 1 else 1.0
226
+ frame = cv2.addWeighted(img, alpha, np.zeros_like(img), 1 - alpha, 0)
227
+ video_writer.write(frame)
228
+ video_writer.release()
229
+ return str(out)
230
+
231
+ def safe_chart(desc: str, df: pd.DataFrame, dur: float, out: Path) -> str:
232
+ try:
233
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
234
+ chart_generator = ChartGenerator(llm, df)
235
+ chart_spec = chart_generator.generate_chart_spec(desc)
236
+ return animate_chart(chart_spec, df, dur, out)
237
+ except Exception as e:
238
+ logging.error(f"Chart animation failed for '{desc}': {e}. Falling back to static image.")
239
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_png_file:
240
+ temp_png = Path(temp_png_file.name)
241
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
242
+ chart_generator = ChartGenerator(llm, df)
243
+ chart_spec = chart_generator.generate_chart_spec(desc)
244
+ if execute_chart_spec(chart_spec, df, temp_png):
245
+ img = cv2.imread(str(temp_png)); os.unlink(temp_png)
246
+ img_resized = cv2.resize(img, (WIDTH, HEIGHT))
247
+ return animate_image_fade(img_resized, dur, out)
248
+ else:
249
+ img = generate_image_from_prompt(f"A professional business chart showing {desc}")
250
+ img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
251
+ return animate_image_fade(img_cv, dur, out)
252
+
253
+ def concat_media(file_paths: List[str], output_path: Path):
254
+ valid_paths = [p for p in file_paths if Path(p).exists() and Path(p).stat().st_size > 100]
255
+ if not valid_paths: raise ValueError("No valid media files to concatenate.")
256
+ if len(valid_paths) == 1: import shutil; shutil.copy2(valid_paths[0], str(output_path)); return
257
+ list_file = output_path.with_suffix(".txt")
258
+ with open(list_file, 'w') as f:
259
+ for path in valid_paths: f.write(f"file '{Path(path).resolve()}'\n")
260
+ cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(list_file), "-c", "copy", str(output_path)]
261
+ try:
262
+ subprocess.run(cmd, check=True, capture_output=True, text=True)
263
+ finally:
264
+ list_file.unlink(missing_ok=True)
265
+
266
+ # --- Main Business Logic Functions for Flask ---
267
+
268
+ def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, bucket):
269
+ logging.info(f"Generating report draft for project {project_id}")
270
+ df = load_dataframe_safely(buf, name)
271
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
272
+ ctx_dict = {"shape": df.shape, "columns": list(df.columns), "user_ctx": ctx}
273
+ enhanced_ctx = enhance_data_context(df, ctx_dict)
274
+ report_prompt = f"""
275
+ You are a senior data analyst and business intelligence expert. Analyze the provided dataset and write a comprehensive executive-level Markdown report.
276
+ **Dataset Analysis Context:** {json.dumps(enhanced_ctx, indent=2)}
277
+ **Instructions:**
278
+ 1. **Executive Summary**: Start with a high-level summary of key findings.
279
+ 2. **Key Insights**: Provide 3-5 key insights, each with its own chart tag.
280
+ 3. **Visual Support**: Insert chart tags like: `<generate_chart: "chart_type | specific description">`.
281
+ Valid chart types: bar, pie, line, scatter, hist.
282
+ Generate insights that would be valuable to C-level executives.
283
+ """
284
+ md = llm.invoke(report_prompt).content
285
+ chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
286
+ chart_urls = {}
287
+ chart_generator = ChartGenerator(llm, df)
288
+ for desc in chart_descs:
289
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
290
+ img_path = Path(temp_file.name)
291
+ try:
292
+ chart_spec = chart_generator.generate_chart_spec(desc)
293
+ if execute_chart_spec(chart_spec, df, img_path):
294
+ blob_name = f"sozo_projects/{uid}/{project_id}/charts/{uuid.uuid4().hex}.png"
295
+ blob = bucket.blob(blob_name)
296
+ blob.upload_from_filename(str(img_path))
297
+ chart_urls[desc] = blob.public_url
298
+ logging.info(f"Uploaded chart '{desc}' to {blob.public_url}")
299
+ finally:
300
+ os.unlink(img_path)
301
+ return {"raw_md": md, "chartUrls": chart_urls}
302
+
303
+ def generate_single_chart(df: pd.DataFrame, description: str, uid: str, project_id: str, bucket):
304
+ logging.info(f"Generating single chart '{description}' for project {project_id}")
305
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
306
+ chart_generator = ChartGenerator(llm, df)
307
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
308
+ img_path = Path(temp_file.name)
309
+ try:
310
+ chart_spec = chart_generator.generate_chart_spec(description)
311
+ if execute_chart_spec(chart_spec, df, img_path):
312
+ blob_name = f"sozo_projects/{uid}/{project_id}/charts/{uuid.uuid4().hex}.png"
313
+ blob = bucket.blob(blob_name)
314
+ blob.upload_from_filename(str(img_path))
315
+ logging.info(f"Uploaded single chart to {blob.public_url}")
316
+ return blob.public_url
317
+ finally:
318
+ os.unlink(img_path)
319
+ return None
320
+
321
+ def generate_video_from_project(df: pd.DataFrame, raw_md: str, uid: str, project_id: str, voice_model: str, bucket):
322
+ logging.info(f"Generating video for project {project_id} with voice {voice_model}")
323
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.2)
324
+ story_prompt = f"Based on the following report, create a script for a {VIDEO_SCENES}-scene video. Each scene must be separated by '[SCENE_BREAK]' and contain narration and one chart tag. Report: {raw_md}"
325
+ script = llm.invoke(story_prompt).content
326
+ scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
327
+ video_parts, audio_parts, temps = [], [], []
328
+ for sc in scenes:
329
+ descs, narrative = extract_chart_tags(sc), clean_narration(sc)
330
+ audio_bytes = deepgram_tts(narrative, voice_model)
331
+ mp3 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
332
+ if audio_bytes:
333
+ mp3.write_bytes(audio_bytes); dur = audio_duration(str(mp3))
334
+ if dur <= 0.1: dur = 5.0
335
+ else:
336
+ dur = 5.0; generate_silence_mp3(dur, mp3)
337
+ audio_parts.append(str(mp3)); temps.append(mp3)
338
+ mp4 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
339
+ if descs: safe_chart(descs[0], df, dur, mp4)
340
+ else:
341
+ img = generate_image_from_prompt(narrative)
342
+ img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
343
+ animate_image_fade(img_cv, dur, mp4)
344
+ video_parts.append(str(mp4)); temps.append(mp4)
345
+
346
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_vid, \
347
+ tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_aud, \
348
+ tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as final_vid:
349
+
350
+ silent_vid_path = Path(temp_vid.name)
351
+ audio_mix_path = Path(temp_aud.name)
352
+ final_vid_path = Path(final_vid.name)
353
+
354
+ concat_media(video_parts, silent_vid_path)
355
+ concat_media(audio_parts, audio_mix_path)
356
+
357
+ subprocess.run(
358
+ ["ffmpeg", "-y", "-i", str(silent_vid_path), "-i", str(audio_mix_path),
359
+ "-c:v", "libx264", "-pix_fmt", "yuv420p", "-c:a", "aac",
360
+ "-map", "0:v:0", "-map", "1:a:0", "-shortest", str(final_vid_path)],
361
+ check=True, capture_output=True,
362
+ )
363
+
364
+ blob_name = f"sozo_projects/{uid}/{project_id}/video.mp4"
365
+ blob = bucket.blob(blob_name)
366
+ blob.upload_from_filename(str(final_vid_path))
367
+ logging.info(f"Uploaded video to {blob.public_url}")
368
+
369
+ for p in temps + [silent_vid_path, audio_mix_path, final_vid_path]:
370
+ if os.path.exists(p): os.unlink(p)
371
+
372
+ return blob.public_url
373
+ return None