Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
##############################################################################
|
| 2 |
# Sozo Business Studio · 10-Jul-2025
|
| 3 |
-
# •
|
| 4 |
-
# •
|
| 5 |
-
# •
|
| 6 |
-
# •
|
| 7 |
##############################################################################
|
| 8 |
|
| 9 |
import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
|
|
@@ -44,26 +44,20 @@ GEM = genai.Client(api_key=API_KEY)
|
|
| 44 |
DG_KEY = os.getenv("DEEPGRAM_API_KEY") # optional narration
|
| 45 |
|
| 46 |
sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
|
| 47 |
-
|
| 48 |
-
# --- Simplified Session State (No Lazy Loading) ---
|
| 49 |
st.session_state.setdefault("bundle", None)
|
| 50 |
|
| 51 |
-
# ─── HELPERS
|
| 52 |
def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
|
| 53 |
-
"""Load CSV/Excel, return (df, err)."""
|
| 54 |
try:
|
| 55 |
ext = Path(name).suffix.lower()
|
| 56 |
df = (pd.read_excel if ext in (".xlsx", ".xls") else pd.read_csv)(io.BytesIO(buf))
|
| 57 |
df.columns = df.columns.astype(str).str.strip()
|
| 58 |
df = df.dropna(how="all")
|
| 59 |
-
if df.empty or len(df.columns) == 0:
|
| 60 |
-
raise ValueError("No usable data found")
|
| 61 |
return df, None
|
| 62 |
-
except Exception as e:
|
| 63 |
-
return None, str(e)
|
| 64 |
|
| 65 |
def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
|
| 66 |
-
"""Convert for Streamlit Arrow renderer."""
|
| 67 |
safe = df.copy()
|
| 68 |
for c in safe.columns:
|
| 69 |
if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
|
|
@@ -72,105 +66,52 @@ def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 72 |
|
| 73 |
@st.cache_data(show_spinner=False)
|
| 74 |
def deepgram_tts(txt: str) -> Tuple[bytes, str]:
|
| 75 |
-
|
| 76 |
-
if not DG_KEY or not txt:
|
| 77 |
-
return None, None
|
| 78 |
txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000]
|
| 79 |
try:
|
| 80 |
-
r = requests.post(
|
| 81 |
-
"https://api.deepgram.com/v1/speak",
|
| 82 |
-
params={"model": "aura-2-andromeda-en"},
|
| 83 |
-
headers={
|
| 84 |
-
"Authorization": f"Token {DG_KEY}",
|
| 85 |
-
"Content-Type": "application/json",
|
| 86 |
-
},
|
| 87 |
-
json={"text": txt},
|
| 88 |
-
timeout=30,
|
| 89 |
-
)
|
| 90 |
r.raise_for_status()
|
| 91 |
return r.content, r.headers.get("Content-Type", "audio/mpeg")
|
| 92 |
-
except Exception:
|
| 93 |
-
return None, None
|
| 94 |
|
| 95 |
def generate_silence_mp3(duration: float, out: Path):
|
| 96 |
-
subprocess.run(
|
| 97 |
-
[ "ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono", "-t", f"{duration:.3f}", "-q:a", "9", str(out), ],
|
| 98 |
-
check=True, capture_output=True,
|
| 99 |
-
)
|
| 100 |
|
| 101 |
def audio_duration(path: str) -> float:
|
| 102 |
try:
|
| 103 |
-
res = subprocess.run(
|
| 104 |
-
[ "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=nw=1:nk=1", path, ],
|
| 105 |
-
text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True,
|
| 106 |
-
)
|
| 107 |
return float(res.stdout.strip())
|
| 108 |
-
except Exception:
|
| 109 |
-
return 5.0
|
| 110 |
|
| 111 |
TAG_RE = re.compile( r'[<[]\s*generate_?chart\s*[:=]?\s*[\"\'“”]?(?P<d>[^>\"\'”\]]+?)[\"\'“”]?\s*[>\]]', re.I, )
|
| 112 |
extract_chart_tags = lambda t: list( dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")) )
|
| 113 |
|
| 114 |
re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I | re.M)
|
| 115 |
def clean_narration(txt: str) -> str:
|
| 116 |
-
"""
|
| 117 |
-
Aggressively cleans text for text-to-speech by removing artifacts.
|
| 118 |
-
This function no longer relies on the LLM to format correctly.
|
| 119 |
-
"""
|
| 120 |
-
# 1. Remove chart tags
|
| 121 |
txt = TAG_RE.sub("", txt)
|
| 122 |
-
|
| 123 |
-
# 2. Remove scene numbers (e.g., "Scene 1:", "SCENE 2.", etc.)
|
| 124 |
txt = re_scene.sub("", txt)
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
phrases_to_remove = [
|
| 128 |
-
r"as you can see in the chart",
|
| 129 |
-
r"this chart shows",
|
| 130 |
-
r"the chart illustrates",
|
| 131 |
-
r"in this visual",
|
| 132 |
-
r"this graph displays",
|
| 133 |
-
]
|
| 134 |
-
for phrase in phrases_to_remove:
|
| 135 |
-
txt = re.sub(phrase, "", txt, flags=re.IGNORECASE)
|
| 136 |
-
|
| 137 |
-
# 4. Remove text within parentheses, which often contains notes
|
| 138 |
txt = re.sub(r"\s*\([^)]*\)", "", txt)
|
| 139 |
-
|
| 140 |
-
# 5. Remove any remaining markdown or formatting artifacts
|
| 141 |
txt = re.sub(r"[\*#_]", "", txt)
|
| 142 |
-
|
| 143 |
-
# 6. Normalize whitespace to a single space
|
| 144 |
return re.sub(r"\s{2,}", " ", txt).strip()
|
| 145 |
|
| 146 |
-
|
| 147 |
-
def placeholder_img() -> Image.Image:
|
| 148 |
-
return Image.new("RGB", (WIDTH, HEIGHT), (230, 230, 230))
|
| 149 |
|
| 150 |
def generate_image_from_prompt(prompt: str) -> Image.Image:
|
| 151 |
-
model_main = "gemini-2.0-flash-exp-image-generation"
|
| 152 |
-
model_fallback = "gemini-2.0-flash-preview-image-generation"
|
| 153 |
full_prompt = "A clean business-presentation illustration: " + prompt
|
| 154 |
-
|
| 155 |
def fetch(model_name):
|
| 156 |
-
res = GEM.models.generate_content(
|
| 157 |
-
model=model_name, contents=full_prompt,
|
| 158 |
-
config=types.GenerateContentConfig(response_modalities=["IMAGE"]),
|
| 159 |
-
)
|
| 160 |
for part in res.candidates[0].content.parts:
|
| 161 |
-
if getattr(part, "inline_data", None):
|
| 162 |
-
return Image.open(io.BytesIO(part.inline_data.data)).convert("RGB")
|
| 163 |
return None
|
| 164 |
-
|
| 165 |
try:
|
| 166 |
img = fetch(model_main) or fetch(model_fallback)
|
| 167 |
return img if img else placeholder_img()
|
| 168 |
-
except Exception:
|
| 169 |
-
return placeholder_img()
|
| 170 |
|
| 171 |
-
# ─── PDF GENERATION ────────────────────────────────────────────────────────
|
| 172 |
class PDF(FPDF, HTMLMixin): pass
|
| 173 |
-
|
| 174 |
def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
|
| 175 |
def embed_chart_for_pdf(match):
|
| 176 |
desc = match.group("d").strip()
|
|
@@ -179,888 +120,218 @@ def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
|
|
| 179 |
b64 = base64.b64encode(Path(path).read_bytes()).decode()
|
| 180 |
return f'<img src="data:image/png;base64,{b64}" width="600">'
|
| 181 |
return ""
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
)
|
| 186 |
-
pdf = PDF()
|
| 187 |
-
pdf.set_auto_page_break(True, margin=15)
|
| 188 |
-
pdf.add_page()
|
| 189 |
-
pdf.set_font("Arial", "B", 18)
|
| 190 |
-
pdf.cell(0, 12, "AI-Generated Business Report", ln=True)
|
| 191 |
-
pdf.ln(3)
|
| 192 |
-
pdf.set_font("Arial", "", 11)
|
| 193 |
-
pdf.write_html(html)
|
| 194 |
return pdf.output(dest="S")
|
| 195 |
|
| 196 |
-
# ─── QUICK STATIC CHART (fallback if LLM code fails) ───────────────────────
|
| 197 |
def quick_chart(desc: str, df: pd.DataFrame, out: Path):
|
| 198 |
-
ctype, *rest = [s.strip().lower() for s in desc.split("|", 1)]
|
| 199 |
-
ctype = ctype or "bar"
|
| 200 |
title = rest[0] if rest else desc
|
| 201 |
-
num_cols = df.select_dtypes("number").columns
|
| 202 |
-
cat_cols = df.select_dtypes(exclude="number").columns
|
| 203 |
-
|
| 204 |
with plt.ioff():
|
| 205 |
fig, ax = plt.subplots(figsize=(6, 3.4), dpi=150)
|
| 206 |
-
if ctype == "pie" and len(cat_cols) >= 1 and len(num_cols) >= 1:
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
elif ctype == "
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
ax.hist(df[num_cols[0]], bins=20, alpha=0.7)
|
| 215 |
-
else: # bar fallback
|
| 216 |
-
plot = df[num_cols[0]].value_counts().head(10)
|
| 217 |
-
plot.plot(kind="bar", ax=ax)
|
| 218 |
-
ax.set_title(title)
|
| 219 |
-
fig.tight_layout()
|
| 220 |
-
fig.savefig(out, bbox_inches="tight", facecolor="white")
|
| 221 |
-
plt.close(fig)
|
| 222 |
-
|
| 223 |
-
# ─── SYNCHRONOUS REPORT GENERATION (NO LAZY LOADING) ─────────────────────────
|
| 224 |
-
# ─── ENHANCED CHART GENERATION SYSTEM ────────────────────────────────────────
|
| 225 |
-
|
| 226 |
class ChartSpecification:
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
top_n: int = None, color_scheme: str = "professional"):
|
| 231 |
-
self.chart_type = chart_type
|
| 232 |
-
self.title = title
|
| 233 |
-
self.x_col = x_col
|
| 234 |
-
self.y_col = y_col
|
| 235 |
-
self.agg_method = agg_method or "sum"
|
| 236 |
-
self.filter_condition = filter_condition
|
| 237 |
-
self.top_n = top_n
|
| 238 |
-
self.color_scheme = color_scheme
|
| 239 |
|
| 240 |
def enhance_data_context(df: pd.DataFrame, ctx_dict: Dict) -> Dict:
|
| 241 |
-
|
| 242 |
-
enhanced_ctx
|
| 243 |
-
|
| 244 |
-
# Add statistical insights
|
| 245 |
-
numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
|
| 246 |
-
categorical_cols = df.select_dtypes(exclude=['number']).columns.tolist()
|
| 247 |
-
|
| 248 |
-
enhanced_ctx.update({
|
| 249 |
-
"numeric_columns": numeric_cols,
|
| 250 |
-
"categorical_columns": categorical_cols,
|
| 251 |
-
"data_insights": {
|
| 252 |
-
"has_time_series": any(col.lower() in ['date', 'time', 'month', 'year'] for col in df.columns),
|
| 253 |
-
"has_categories": len(categorical_cols) > 0,
|
| 254 |
-
"has_numeric": len(numeric_cols) > 0,
|
| 255 |
-
"record_count": len(df),
|
| 256 |
-
"correlation_pairs": get_correlation_pairs(df, numeric_cols) if len(numeric_cols) > 1 else []
|
| 257 |
-
},
|
| 258 |
-
"recommended_charts": recommend_chart_types(df, numeric_cols, categorical_cols)
|
| 259 |
-
})
|
| 260 |
-
|
| 261 |
return enhanced_ctx
|
| 262 |
|
| 263 |
def get_correlation_pairs(df: pd.DataFrame, numeric_cols: List[str]) -> List[Tuple[str, str, float]]:
|
| 264 |
-
|
| 265 |
-
correlations = []
|
| 266 |
if len(numeric_cols) > 1:
|
| 267 |
corr_matrix = df[numeric_cols].corr()
|
| 268 |
for i, col1 in enumerate(numeric_cols):
|
| 269 |
for j, col2 in enumerate(numeric_cols[i+1:], i+1):
|
| 270 |
-
|
| 271 |
-
if abs(corr_val) > 0.5: # Strong correlation
|
| 272 |
-
correlations.append((col1, col2, corr_val))
|
| 273 |
return correlations
|
| 274 |
|
| 275 |
def recommend_chart_types(df: pd.DataFrame, numeric_cols: List[str], categorical_cols: List[str]) -> Dict[str, str]:
|
| 276 |
-
"""Recommend appropriate chart types based on data structure"""
|
| 277 |
recommendations = {}
|
| 278 |
-
|
| 279 |
if len(categorical_cols) > 0 and len(numeric_cols) > 0:
|
| 280 |
-
# Category + Numeric = Bar chart
|
| 281 |
recommendations["bar"] = f"Compare {numeric_cols[0]} across {categorical_cols[0]}"
|
| 282 |
-
|
| 283 |
-
# If few categories, pie chart possible
|
| 284 |
-
if len(df[categorical_cols[0]].unique()) <= 6:
|
| 285 |
-
recommendations["pie"] = f"Distribution of {numeric_cols[0]} by {categorical_cols[0]}"
|
| 286 |
-
|
| 287 |
if len(numeric_cols) > 1:
|
| 288 |
-
# Multiple numeric = Scatter plot
|
| 289 |
recommendations["scatter"] = f"Relationship between {numeric_cols[0]} and {numeric_cols[1]}"
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
date_cols = [col for col in df.columns if any(word in col.lower() for word in ['date', 'time', 'month', 'year'])]
|
| 293 |
-
if date_cols:
|
| 294 |
-
recommendations["line"] = f"Trend of {numeric_cols[0]} over time"
|
| 295 |
-
|
| 296 |
-
if len(numeric_cols) > 0:
|
| 297 |
-
# Distribution analysis
|
| 298 |
-
recommendations["hist"] = f"Distribution of {numeric_cols[0]}"
|
| 299 |
-
|
| 300 |
return recommendations
|
| 301 |
|
| 302 |
-
def create_chart_generator(llm, df: pd.DataFrame) -> 'ChartGenerator':
|
| 303 |
-
"""Create a reliable chart generator to replace pandas agent"""
|
| 304 |
-
return ChartGenerator(llm, df)
|
| 305 |
|
| 306 |
class ChartGenerator:
|
| 307 |
-
"""Reliable chart generation system using AI specifications"""
|
| 308 |
-
|
| 309 |
def __init__(self, llm, df: pd.DataFrame):
|
| 310 |
-
self.llm = llm
|
| 311 |
-
self.
|
| 312 |
-
|
| 313 |
-
"columns": list(df.columns),
|
| 314 |
-
"shape": df.shape,
|
| 315 |
-
"dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()}
|
| 316 |
-
})
|
| 317 |
-
|
| 318 |
def generate_chart_spec(self, description: str) -> ChartSpecification:
|
| 319 |
-
"""Use AI to generate detailed chart specifications"""
|
| 320 |
spec_prompt = f"""
|
| 321 |
You are a data visualization expert. Based on the dataset and chart description, generate a precise chart specification.
|
| 322 |
-
|
| 323 |
-
**Dataset Info:**
|
| 324 |
-
- Columns: {self.enhanced_ctx['columns']}
|
| 325 |
-
- Shape: {self.enhanced_ctx['shape']}
|
| 326 |
-
- Numeric columns: {self.enhanced_ctx['numeric_columns']}
|
| 327 |
-
- Categorical columns: {self.enhanced_ctx['categorical_columns']}
|
| 328 |
-
- Data insights: {json.dumps(self.enhanced_ctx['data_insights'], indent=2)}
|
| 329 |
-
|
| 330 |
**Chart Request:** {description}
|
| 331 |
-
|
| 332 |
-
**Instructions:**
|
| 333 |
-
1. Analyze the dataset structure and the chart description
|
| 334 |
-
2. Choose the most appropriate chart type: bar, pie, line, scatter, or hist
|
| 335 |
-
3. Select the best columns for x and y axes
|
| 336 |
-
4. Determine if data aggregation is needed
|
| 337 |
-
5. Suggest appropriate filtering if dataset is large
|
| 338 |
-
|
| 339 |
**Return a JSON specification with these exact fields:**
|
| 340 |
{{
|
| 341 |
-
"chart_type": "bar|pie|line|scatter|hist",
|
| 342 |
-
"
|
| 343 |
-
"
|
| 344 |
-
"y_col": "column_name_for_y_axis_or_null",
|
| 345 |
-
"agg_method": "sum|mean|count|max|min|null",
|
| 346 |
-
"filter_condition": "description_of_filtering_or_null",
|
| 347 |
-
"top_n": "number_for_top_n_filtering_or_null",
|
| 348 |
-
"reasoning": "Why this specification was chosen"
|
| 349 |
}}
|
| 350 |
-
|
| 351 |
-
**Validation Rules:**
|
| 352 |
-
- All column names must exist in the dataset
|
| 353 |
-
- Chart type must match the data structure
|
| 354 |
-
- For pie charts: use categorical + numeric columns, limit to top 6 categories
|
| 355 |
-
- For bar charts: use categorical x-axis, numeric y-axis
|
| 356 |
-
- For line charts: use sequential/time data
|
| 357 |
-
- For scatter: use two numeric columns
|
| 358 |
-
- For hist: use single numeric column
|
| 359 |
-
|
| 360 |
Return only the JSON specification, no additional text.
|
| 361 |
"""
|
| 362 |
-
|
| 363 |
try:
|
| 364 |
-
response = self.llm.invoke(spec_prompt).content
|
| 365 |
-
|
| 366 |
-
response = response
|
| 367 |
-
if response.startswith("```json"):
|
| 368 |
-
response = response[7:-3]
|
| 369 |
-
elif response.startswith("```"):
|
| 370 |
-
response = response[3:-3]
|
| 371 |
-
|
| 372 |
spec_dict = json.loads(response)
|
| 373 |
return ChartSpecification(**{k: v for k, v in spec_dict.items() if k != 'reasoning'})
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
# Fallback to simple specification
|
| 377 |
-
return self._create_fallback_spec(description)
|
| 378 |
-
|
| 379 |
def _create_fallback_spec(self, description: str) -> ChartSpecification:
|
| 380 |
-
|
| 381 |
-
numeric_cols
|
| 382 |
-
categorical_cols
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
elif "pie" in description.lower() and categorical_cols and numeric_cols:
|
| 388 |
-
return ChartSpecification("pie", description, categorical_cols[0], numeric_cols[0])
|
| 389 |
-
elif "line" in description.lower() and len(numeric_cols) >= 2:
|
| 390 |
-
return ChartSpecification("line", description, numeric_cols[0], numeric_cols[1])
|
| 391 |
-
elif "scatter" in description.lower() and len(numeric_cols) >= 2:
|
| 392 |
-
return ChartSpecification("scatter", description, numeric_cols[0], numeric_cols[1])
|
| 393 |
-
elif numeric_cols:
|
| 394 |
-
return ChartSpecification("hist", description, numeric_cols[0], None)
|
| 395 |
-
else:
|
| 396 |
-
return ChartSpecification("bar", description, self.df.columns[0], self.df.columns[1] if len(self.df.columns) > 1 else None)
|
| 397 |
|
| 398 |
def execute_chart_spec(spec: ChartSpecification, df: pd.DataFrame, output_path: Path) -> bool:
|
| 399 |
-
"""Execute chart specification with reliable matplotlib implementation"""
|
| 400 |
try:
|
| 401 |
-
# Prepare data based on specification
|
| 402 |
plot_data = prepare_plot_data(spec, df)
|
| 403 |
-
|
| 404 |
-
#
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
#
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
ax.set_xlabel(spec.x_col)
|
| 412 |
-
ax.set_ylabel(spec.y_col)
|
| 413 |
-
ax.tick_params(axis='x', rotation=45)
|
| 414 |
-
|
| 415 |
-
elif spec.chart_type == "pie":
|
| 416 |
-
wedges, texts, autotexts = ax.pie(plot_data.values, labels=plot_data.index,
|
| 417 |
-
autopct='%1.1f%%', startangle=90)
|
| 418 |
-
ax.axis('equal')
|
| 419 |
-
|
| 420 |
-
elif spec.chart_type == "line":
|
| 421 |
-
ax.plot(plot_data.index, plot_data.values, marker='o', linewidth=2, color='#A23B72')
|
| 422 |
-
ax.set_xlabel(spec.x_col)
|
| 423 |
-
ax.set_ylabel(spec.y_col)
|
| 424 |
-
ax.grid(True, alpha=0.3)
|
| 425 |
-
|
| 426 |
-
elif spec.chart_type == "scatter":
|
| 427 |
-
ax.scatter(plot_data.iloc[:, 0], plot_data.iloc[:, 1], alpha=0.6, color='#F18F01')
|
| 428 |
-
ax.set_xlabel(spec.x_col)
|
| 429 |
-
ax.set_ylabel(spec.y_col)
|
| 430 |
-
ax.grid(True, alpha=0.3)
|
| 431 |
-
|
| 432 |
-
elif spec.chart_type == "hist":
|
| 433 |
-
ax.hist(plot_data.values, bins=20, color='#C73E1D', alpha=0.7, edgecolor='black')
|
| 434 |
-
ax.set_xlabel(spec.x_col)
|
| 435 |
-
ax.set_ylabel('Frequency')
|
| 436 |
-
ax.grid(True, alpha=0.3)
|
| 437 |
-
|
| 438 |
-
# Apply consistent styling
|
| 439 |
-
ax.set_title(spec.title, fontsize=14, fontweight='bold', pad=20)
|
| 440 |
-
plt.tight_layout()
|
| 441 |
-
|
| 442 |
-
# Save with high quality
|
| 443 |
-
plt.savefig(output_path, dpi=300, bbox_inches='tight', facecolor='white')
|
| 444 |
-
plt.close()
|
| 445 |
-
|
| 446 |
return True
|
| 447 |
-
|
| 448 |
-
except Exception as e:
|
| 449 |
-
print(f"Chart generation failed: {e}")
|
| 450 |
-
return False
|
| 451 |
|
| 452 |
def prepare_plot_data(spec: ChartSpecification, df: pd.DataFrame) -> pd.Series:
|
| 453 |
-
"
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
"""Enhanced animation system using chart specifications"""
|
| 502 |
-
try:
|
| 503 |
-
plot_data = prepare_plot_data(spec, df)
|
| 504 |
-
frames = max(10, int(dur * fps))
|
| 505 |
-
|
| 506 |
-
fig, ax = plt.subplots(figsize=(16, 9), dpi=100)
|
| 507 |
-
plt.style.use('default')
|
| 508 |
-
|
| 509 |
-
# Animation logic based on chart type
|
| 510 |
-
if spec.chart_type == "bar":
|
| 511 |
-
bars = ax.bar(plot_data.index, np.zeros_like(plot_data.values), color='#2E86AB', alpha=0.8)
|
| 512 |
-
ax.set_ylim(0, plot_data.max() * 1.1)
|
| 513 |
-
ax.set_xlabel(spec.x_col)
|
| 514 |
-
ax.set_ylabel(spec.y_col)
|
| 515 |
-
ax.tick_params(axis='x', rotation=45)
|
| 516 |
-
|
| 517 |
-
def animate(frame):
|
| 518 |
-
progress = frame / (frames - 1)
|
| 519 |
-
for bar, height in zip(bars, plot_data.values):
|
| 520 |
-
bar.set_height(height * progress)
|
| 521 |
-
return bars
|
| 522 |
-
|
| 523 |
-
elif spec.chart_type == "pie":
|
| 524 |
-
wedges, texts, autotexts = ax.pie(plot_data.values, labels=plot_data.index,
|
| 525 |
-
autopct='%1.1f%%', startangle=90)
|
| 526 |
-
ax.axis('equal')
|
| 527 |
-
|
| 528 |
-
def animate(frame):
|
| 529 |
-
progress = frame / (frames - 1)
|
| 530 |
-
for wedge in wedges:
|
| 531 |
-
wedge.set_alpha(progress)
|
| 532 |
-
return wedges
|
| 533 |
-
|
| 534 |
-
elif spec.chart_type == "line":
|
| 535 |
-
line, = ax.plot([], [], marker='o', linewidth=2, color='#A23B72')
|
| 536 |
-
ax.set_xlim(0, len(plot_data))
|
| 537 |
-
ax.set_ylim(plot_data.min() * 0.9, plot_data.max() * 1.1)
|
| 538 |
-
ax.set_xlabel(spec.x_col)
|
| 539 |
-
ax.set_ylabel(spec.y_col)
|
| 540 |
-
ax.grid(True, alpha=0.3)
|
| 541 |
-
|
| 542 |
-
def animate(frame):
|
| 543 |
-
progress = frame / (frames - 1)
|
| 544 |
-
points = max(2, int(len(plot_data) * progress))
|
| 545 |
-
x_data = range(points)
|
| 546 |
-
y_data = plot_data.iloc[:points]
|
| 547 |
-
line.set_data(x_data, y_data)
|
| 548 |
-
return [line]
|
| 549 |
-
|
| 550 |
-
elif spec.chart_type == "scatter":
|
| 551 |
-
scat = ax.scatter([], [], alpha=0.6, color='#F18F01')
|
| 552 |
-
ax.set_xlim(plot_data.iloc[:, 0].min(), plot_data.iloc[:, 0].max())
|
| 553 |
-
ax.set_ylim(plot_data.iloc[:, 1].min(), plot_data.iloc[:, 1].max())
|
| 554 |
-
ax.set_xlabel(spec.x_col)
|
| 555 |
-
ax.set_ylabel(spec.y_col)
|
| 556 |
-
ax.grid(True, alpha=0.3)
|
| 557 |
-
|
| 558 |
-
def animate(frame):
|
| 559 |
-
progress = frame / (frames - 1)
|
| 560 |
-
points = max(1, int(len(plot_data) * progress))
|
| 561 |
-
scat.set_offsets(plot_data.iloc[:points].values)
|
| 562 |
-
return [scat]
|
| 563 |
-
|
| 564 |
-
elif spec.chart_type == "hist":
|
| 565 |
-
n, bins, patches = ax.hist(plot_data.values, bins=20, color='#C73E1D', alpha=0.7, edgecolor='black')
|
| 566 |
-
ax.set_xlabel(spec.x_col)
|
| 567 |
-
ax.set_ylabel('Frequency')
|
| 568 |
-
ax.grid(True, alpha=0.3)
|
| 569 |
-
|
| 570 |
-
def animate(frame):
|
| 571 |
-
progress = frame / (frames - 1)
|
| 572 |
-
for patch in patches:
|
| 573 |
-
patch.set_alpha(progress * 0.7)
|
| 574 |
-
return patches
|
| 575 |
-
|
| 576 |
-
# Apply title and styling
|
| 577 |
-
ax.set_title(spec.title, fontsize=14, fontweight='bold', pad=20)
|
| 578 |
-
plt.tight_layout()
|
| 579 |
-
|
| 580 |
-
# Create animation
|
| 581 |
-
anim = FuncAnimation(fig, animate, frames=frames, interval=1000/fps, blit=True, repeat=False)
|
| 582 |
-
|
| 583 |
-
# Save animation
|
| 584 |
-
writer = FFMpegWriter(fps=fps, metadata={'artist': 'Enhanced Chart System'})
|
| 585 |
-
anim.save(str(out), writer=writer, dpi=144)
|
| 586 |
-
plt.close()
|
| 587 |
-
|
| 588 |
-
return str(out)
|
| 589 |
-
|
| 590 |
-
except Exception as e:
|
| 591 |
-
print(f"Animation failed: {e}")
|
| 592 |
-
# Fallback to static chart animation
|
| 593 |
-
return animate_chart_fallback(spec, df, dur, out, fps)
|
| 594 |
|
| 595 |
-
def
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
return animate_image_fade(img, dur, out, fps)
|
| 605 |
-
|
| 606 |
-
# Ultimate fallback - simple plot
|
| 607 |
-
fig, ax = plt.subplots(figsize=(16, 9))
|
| 608 |
-
ax.text(0.5, 0.5, f"Chart: {spec.title}", ha='center', va='center', fontsize=20)
|
| 609 |
-
ax.set_xlim(0, 1)
|
| 610 |
-
ax.set_ylim(0, 1)
|
| 611 |
-
ax.axis('off')
|
| 612 |
-
|
| 613 |
-
temp_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
|
| 614 |
-
plt.savefig(temp_png, dpi=300, bbox_inches='tight', facecolor='white')
|
| 615 |
-
plt.close()
|
| 616 |
-
|
| 617 |
-
img = cv2.imread(str(temp_png))
|
| 618 |
-
img = cv2.resize(img, (1920, 1080))
|
| 619 |
-
return animate_image_fade(img, dur, out, fps)
|
| 620 |
-
|
| 621 |
-
except Exception:
|
| 622 |
-
return str(out) # Return path even if failed
|
| 623 |
|
| 624 |
def safe_chart(desc: str, df: pd.DataFrame, dur: float, out: Path) -> str:
|
| 625 |
-
"""
|
| 626 |
-
Enhanced safe chart generation with animation for video pipeline.
|
| 627 |
-
|
| 628 |
-
This function integrates with the existing ChartGenerator system to create
|
| 629 |
-
animated charts that are suitable for video scenes. It provides multiple
|
| 630 |
-
fallback layers to ensure reliable chart generation.
|
| 631 |
-
|
| 632 |
-
Args:
|
| 633 |
-
desc (str): Chart description/specification
|
| 634 |
-
df (pd.DataFrame): Source data
|
| 635 |
-
dur (float): Duration in seconds for animation
|
| 636 |
-
out (Path): Output video file path
|
| 637 |
-
|
| 638 |
-
Returns:
|
| 639 |
-
str: Path to generated video file
|
| 640 |
-
"""
|
| 641 |
try:
|
| 642 |
-
|
| 643 |
-
llm = ChatGoogleGenerativeAI(
|
| 644 |
-
model="gemini-2.0-flash",
|
| 645 |
-
google_api_key=API_KEY,
|
| 646 |
-
temperature=0.1
|
| 647 |
-
)
|
| 648 |
chart_generator = create_chart_generator(llm, df)
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
with st.spinner(f"Analyzing chart requirements: {desc}..."):
|
| 652 |
-
chart_spec = chart_generator.generate_chart_spec(desc)
|
| 653 |
-
|
| 654 |
-
# Attempt enhanced animation with specification
|
| 655 |
-
try:
|
| 656 |
-
return animate_chart_with_spec(chart_spec, df, dur, out, fps=FPS)
|
| 657 |
-
except Exception as anim_error:
|
| 658 |
-
print(f"Enhanced animation failed: {anim_error}")
|
| 659 |
-
|
| 660 |
-
# Fallback 1: Static chart with fade animation
|
| 661 |
-
try:
|
| 662 |
-
temp_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
|
| 663 |
-
if execute_chart_spec(chart_spec, df, temp_png):
|
| 664 |
-
img = cv2.imread(str(temp_png))
|
| 665 |
-
if img is not None:
|
| 666 |
-
img = cv2.resize(img, (WIDTH, HEIGHT))
|
| 667 |
-
return animate_image_fade(img, dur, out, fps=FPS)
|
| 668 |
-
else:
|
| 669 |
-
raise RuntimeError("Failed to load generated chart image")
|
| 670 |
-
else:
|
| 671 |
-
raise RuntimeError("Chart specification execution failed")
|
| 672 |
-
|
| 673 |
-
except Exception as static_error:
|
| 674 |
-
print(f"Static chart generation failed: {static_error}")
|
| 675 |
-
|
| 676 |
-
# Fallback 2: Quick chart generation
|
| 677 |
-
try:
|
| 678 |
-
temp_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
|
| 679 |
-
quick_chart(desc, df, temp_png)
|
| 680 |
-
|
| 681 |
-
if temp_png.exists():
|
| 682 |
-
img = cv2.imread(str(temp_png))
|
| 683 |
-
if img is not None:
|
| 684 |
-
img = cv2.resize(img, (WIDTH, HEIGHT))
|
| 685 |
-
return animate_image_fade(img, dur, out, fps=FPS)
|
| 686 |
-
else:
|
| 687 |
-
raise RuntimeError("Failed to load quick chart image")
|
| 688 |
-
else:
|
| 689 |
-
raise RuntimeError("Quick chart generation failed")
|
| 690 |
-
|
| 691 |
-
except Exception as quick_error:
|
| 692 |
-
print(f"Quick chart generation failed: {quick_error}")
|
| 693 |
-
|
| 694 |
-
# Fallback 3: AI-generated image
|
| 695 |
-
try:
|
| 696 |
-
# Generate descriptive prompt for AI image generation
|
| 697 |
-
img_prompt = f"Professional business chart showing {desc}. Clean, modern design with clear data visualization."
|
| 698 |
-
img = generate_image_from_prompt(img_prompt)
|
| 699 |
-
|
| 700 |
-
# Convert PIL to OpenCV format
|
| 701 |
-
img_cv = cv2.cvtColor(
|
| 702 |
-
np.array(img.resize((WIDTH, HEIGHT))),
|
| 703 |
-
cv2.COLOR_RGB2BGR
|
| 704 |
-
)
|
| 705 |
-
return animate_image_fade(img_cv, dur, out, fps=FPS)
|
| 706 |
-
|
| 707 |
-
except Exception as ai_error:
|
| 708 |
-
print(f"AI image generation failed: {ai_error}")
|
| 709 |
-
|
| 710 |
-
# Fallback 4: Placeholder with text
|
| 711 |
-
return create_placeholder_chart_video(desc, dur, out)
|
| 712 |
-
|
| 713 |
-
except Exception as e:
|
| 714 |
-
print(f"Safe chart generation completely failed: {e}")
|
| 715 |
-
# Ultimate fallback
|
| 716 |
-
return create_placeholder_chart_video(desc, dur, out)
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
def create_placeholder_chart_video(desc: str, dur: float, out: Path) -> str:
|
| 720 |
-
"""
|
| 721 |
-
Create a placeholder video with descriptive text when all chart generation fails.
|
| 722 |
-
|
| 723 |
-
Args:
|
| 724 |
-
desc (str): Chart description
|
| 725 |
-
dur (float): Duration in seconds
|
| 726 |
-
out (Path): Output path
|
| 727 |
-
|
| 728 |
-
Returns:
|
| 729 |
-
str: Path to generated placeholder video
|
| 730 |
-
"""
|
| 731 |
-
try:
|
| 732 |
-
# Create a professional-looking placeholder
|
| 733 |
-
fig, ax = plt.subplots(figsize=(16, 9), dpi=100)
|
| 734 |
-
fig.patch.set_facecolor('#f8f9fa')
|
| 735 |
-
ax.set_facecolor('#ffffff')
|
| 736 |
-
|
| 737 |
-
# Add title and description
|
| 738 |
-
ax.text(0.5, 0.65, "Data Visualization",
|
| 739 |
-
ha='center', va='center', fontsize=24, fontweight='bold',
|
| 740 |
-
color='#2c3e50', transform=ax.transAxes)
|
| 741 |
-
|
| 742 |
-
ax.text(0.5, 0.45, desc,
|
| 743 |
-
ha='center', va='center', fontsize=16,
|
| 744 |
-
color='#34495e', transform=ax.transAxes,
|
| 745 |
-
wrap=True, bbox=dict(boxstyle="round,pad=0.3", facecolor='#ecf0f1', alpha=0.8))
|
| 746 |
-
|
| 747 |
-
ax.text(0.5, 0.25, "Chart generation in progress...",
|
| 748 |
-
ha='center', va='center', fontsize=12,
|
| 749 |
-
color='#7f8c8d', transform=ax.transAxes)
|
| 750 |
-
|
| 751 |
-
# Add some decorative elements
|
| 752 |
-
ax.add_patch(plt.Rectangle((0.1, 0.1), 0.8, 0.8,
|
| 753 |
-
fill=False, edgecolor='#3498db', linewidth=3,
|
| 754 |
-
transform=ax.transAxes))
|
| 755 |
-
|
| 756 |
-
ax.set_xlim(0, 1)
|
| 757 |
-
ax.set_ylim(0, 1)
|
| 758 |
-
ax.axis('off')
|
| 759 |
-
|
| 760 |
-
# Save as temporary image
|
| 761 |
-
temp_png = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
|
| 762 |
-
plt.savefig(temp_png, dpi=150, bbox_inches='tight',
|
| 763 |
-
facecolor='#f8f9fa', edgecolor='none')
|
| 764 |
-
plt.close()
|
| 765 |
-
|
| 766 |
-
# Convert to video
|
| 767 |
-
img = cv2.imread(str(temp_png))
|
| 768 |
-
if img is not None:
|
| 769 |
-
img = cv2.resize(img, (WIDTH, HEIGHT))
|
| 770 |
-
return animate_image_fade(img, dur, out, fps=FPS)
|
| 771 |
-
else:
|
| 772 |
-
# Last resort: create solid color video
|
| 773 |
-
return create_solid_color_video(dur, out)
|
| 774 |
-
|
| 775 |
except Exception as e:
|
| 776 |
-
print(f"
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
|
| 780 |
-
def create_solid_color_video(dur: float, out: Path) -> str:
|
| 781 |
-
"""
|
| 782 |
-
Create a simple solid color video as the ultimate fallback.
|
| 783 |
-
|
| 784 |
-
Args:
|
| 785 |
-
dur (float): Duration in seconds
|
| 786 |
-
out (Path): Output path
|
| 787 |
-
|
| 788 |
-
Returns:
|
| 789 |
-
str: Path to generated video
|
| 790 |
-
"""
|
| 791 |
-
try:
|
| 792 |
-
# Create a simple colored frame
|
| 793 |
-
frame = np.full((HEIGHT, WIDTH, 3), [240, 240, 240], dtype=np.uint8)
|
| 794 |
-
|
| 795 |
-
# Add simple text
|
| 796 |
-
cv2.putText(frame, "Data Visualization",
|
| 797 |
-
(WIDTH//2 - 200, HEIGHT//2 - 50),
|
| 798 |
-
cv2.FONT_HERSHEY_SIMPLEX, 2, (100, 100, 100), 3)
|
| 799 |
-
|
| 800 |
-
cv2.putText(frame, "Loading...",
|
| 801 |
-
(WIDTH//2 - 80, HEIGHT//2 + 50),
|
| 802 |
-
cv2.FONT_HERSHEY_SIMPLEX, 1, (150, 150, 150), 2)
|
| 803 |
-
|
| 804 |
-
# Write video
|
| 805 |
-
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 806 |
-
video_writer = cv2.VideoWriter(str(out), fourcc, FPS, (WIDTH, HEIGHT))
|
| 807 |
-
|
| 808 |
-
total_frames = int(dur * FPS)
|
| 809 |
-
for i in range(total_frames):
|
| 810 |
-
video_writer.write(frame)
|
| 811 |
-
|
| 812 |
-
video_writer.release()
|
| 813 |
-
return str(out)
|
| 814 |
-
|
| 815 |
-
except Exception as e:
|
| 816 |
-
print(f"Solid color video creation failed: {e}")
|
| 817 |
-
# If even this fails, just return the output path
|
| 818 |
-
return str(out)
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
def animate_image_fade(img: np.ndarray, dur: float, out: Path, fps: int = 24) -> str:
|
| 822 |
-
"""
|
| 823 |
-
Create a fade-in animation for static images.
|
| 824 |
-
|
| 825 |
-
Args:
|
| 826 |
-
img (np.ndarray): Input image in BGR format
|
| 827 |
-
dur (float): Duration in seconds
|
| 828 |
-
out (Path): Output video path
|
| 829 |
-
fps (int): Frames per second
|
| 830 |
-
|
| 831 |
-
Returns:
|
| 832 |
-
str: Path to generated video
|
| 833 |
-
"""
|
| 834 |
-
try:
|
| 835 |
-
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 836 |
-
video_writer = cv2.VideoWriter(str(out), fourcc, fps, (WIDTH, HEIGHT))
|
| 837 |
-
|
| 838 |
-
total_frames = int(dur * fps)
|
| 839 |
-
fade_frames = min(int(fps * 0.5), total_frames // 3) # 0.5 second fade or 1/3 of total
|
| 840 |
-
|
| 841 |
-
for frame_idx in range(total_frames):
|
| 842 |
-
if frame_idx < fade_frames:
|
| 843 |
-
# Fade in
|
| 844 |
-
alpha = frame_idx / fade_frames
|
| 845 |
-
faded_img = cv2.addWeighted(img, alpha, np.zeros_like(img), 1 - alpha, 0)
|
| 846 |
-
else:
|
| 847 |
-
# Full opacity
|
| 848 |
-
faded_img = img
|
| 849 |
-
|
| 850 |
-
video_writer.write(faded_img)
|
| 851 |
-
|
| 852 |
-
video_writer.release()
|
| 853 |
-
return str(out)
|
| 854 |
-
|
| 855 |
-
except Exception as e:
|
| 856 |
-
print(f"Image fade animation failed: {e}")
|
| 857 |
-
return str(out)
|
| 858 |
-
|
| 859 |
|
| 860 |
def concat_media(file_paths: List[str], output_path: Path, media_type: str):
|
| 861 |
-
"""
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
"""
|
| 869 |
-
if not file_paths:
|
| 870 |
return
|
| 871 |
-
|
| 872 |
-
try:
|
| 873 |
-
# Create temporary file list for FFmpeg
|
| 874 |
-
list_file = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.txt"
|
| 875 |
-
|
| 876 |
-
with open(list_file, 'w') as f:
|
| 877 |
-
for path in file_paths:
|
| 878 |
-
# Escape path for FFmpeg and ensure it exists
|
| 879 |
-
if not Path(path).exists():
|
| 880 |
-
continue
|
| 881 |
-
escaped_path = str(path).replace('\\', '/').replace("'", "\\'")
|
| 882 |
-
f.write(f"file '{escaped_path}'\n")
|
| 883 |
-
|
| 884 |
-
# Build FFmpeg command with proper codec settings
|
| 885 |
-
cmd = [
|
| 886 |
-
"ffmpeg", "-y", "-f", "concat", "-safe", "0",
|
| 887 |
-
"-i", str(list_file)
|
| 888 |
-
]
|
| 889 |
-
|
| 890 |
-
if media_type == "video":
|
| 891 |
-
# For video: copy streams without re-encoding to preserve timing
|
| 892 |
-
cmd.extend(["-c:v", "copy", "-avoid_negative_ts", "make_zero"])
|
| 893 |
-
else: # audio
|
| 894 |
-
# For audio: ensure consistent sample rate and format
|
| 895 |
-
cmd.extend([
|
| 896 |
-
"-c:a", "aac",
|
| 897 |
-
"-ar", "44100", # Consistent sample rate
|
| 898 |
-
"-ac", "2", # Stereo
|
| 899 |
-
"-b:a", "128k" # Consistent bitrate
|
| 900 |
-
])
|
| 901 |
-
|
| 902 |
-
cmd.append(str(output_path))
|
| 903 |
-
|
| 904 |
-
# Execute FFmpeg command
|
| 905 |
-
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
| 906 |
-
|
| 907 |
-
# Clean up temporary file
|
| 908 |
-
list_file.unlink(missing_ok=True)
|
| 909 |
-
|
| 910 |
-
except subprocess.CalledProcessError as e:
|
| 911 |
-
print(f"FFmpeg concatenation failed: {e.stderr}")
|
| 912 |
-
# Create a fallback if concatenation fails
|
| 913 |
-
if file_paths and Path(file_paths[0]).exists():
|
| 914 |
-
# Just copy the first file as a fallback
|
| 915 |
-
import shutil
|
| 916 |
-
shutil.copy2(file_paths[0], str(output_path))
|
| 917 |
-
except Exception as e:
|
| 918 |
-
print(f"Media concatenation failed: {e}")
|
| 919 |
-
# Create a fallback if concatenation fails
|
| 920 |
-
if file_paths and Path(file_paths[0]).exists():
|
| 921 |
-
import shutil
|
| 922 |
-
shutil.copy2(file_paths[0], str(output_path))
|
| 923 |
|
|
|
|
|
|
|
| 924 |
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
| 928 |
-
subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
|
| 929 |
-
except Exception:
|
| 930 |
-
st.error("🔴 FFmpeg not available — cannot render video.")
|
| 931 |
-
return None
|
| 932 |
|
| 933 |
-
|
| 934 |
-
|
| 935 |
-
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
ctx_dict = {
|
| 942 |
-
"shape": df.shape,
|
| 943 |
-
"columns": list(df.columns),
|
| 944 |
-
"user_ctx": ctx or "General business analysis",
|
| 945 |
-
"full_dataframe": df.to_dict("records"),
|
| 946 |
-
"data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
|
| 947 |
-
"numeric_summary": {col: {stat: float(val) for stat, val in stats.items()} for col, stats in df.describe().to_dict().items()} if len(df.select_dtypes(include=["number"]).columns) > 0 else {},
|
| 948 |
-
}
|
| 949 |
-
|
| 950 |
-
script = llm.invoke(build_story_prompt(ctx_dict)).content
|
| 951 |
-
scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
|
| 952 |
-
|
| 953 |
-
# ENHANCED: Better chart generation for video
|
| 954 |
-
chart_generator = create_chart_generator(llm, df)
|
| 955 |
-
|
| 956 |
-
video_parts, audio_parts, temps = [], [], []
|
| 957 |
-
|
| 958 |
-
for idx, sc in enumerate(scenes[:VIDEO_SCENES]):
|
| 959 |
-
st.progress((idx + 1) / VIDEO_SCENES, text=f"Rendering Scene {idx + 1}/{VIDEO_SCENES}")
|
| 960 |
-
descs, narrative = extract_chart_tags(sc), clean_narration(sc)
|
| 961 |
-
|
| 962 |
-
# FIXED: Generate audio first to get exact duration
|
| 963 |
-
audio_bytes, _ = deepgram_tts(narrative)
|
| 964 |
-
mp3 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
|
| 965 |
-
|
| 966 |
-
if audio_bytes:
|
| 967 |
-
mp3.write_bytes(audio_bytes)
|
| 968 |
-
# Get the EXACT duration of the generated audio
|
| 969 |
-
dur = audio_duration(str(mp3))
|
| 970 |
-
if dur <= 0: # Fallback if duration detection fails
|
| 971 |
-
dur = 5.0
|
| 972 |
-
else:
|
| 973 |
-
dur = 5.0
|
| 974 |
-
generate_silence_mp3(dur, mp3)
|
| 975 |
-
|
| 976 |
-
audio_parts.append(str(mp3))
|
| 977 |
-
temps.append(mp3)
|
| 978 |
-
|
| 979 |
-
# FIXED: Create video with EXACT same duration as audio
|
| 980 |
-
mp4 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
|
| 981 |
-
|
| 982 |
-
if descs:
|
| 983 |
-
safe_chart(descs[0], df, dur, mp4)
|
| 984 |
-
else:
|
| 985 |
-
img = generate_image_from_prompt(narrative)
|
| 986 |
-
img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
|
| 987 |
-
animate_image_fade(img_cv, dur, mp4)
|
| 988 |
-
|
| 989 |
-
video_parts.append(str(mp4))
|
| 990 |
-
temps.append(mp4)
|
| 991 |
-
|
| 992 |
-
# FIXED: Create concatenated files with proper sync
|
| 993 |
-
silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
|
| 994 |
-
audio_mix = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
|
| 995 |
-
|
| 996 |
-
# Concatenate video and audio separately first
|
| 997 |
-
concat_media(video_parts, silent_vid, "video")
|
| 998 |
-
concat_media(audio_parts, audio_mix, "audio")
|
| 999 |
-
|
| 1000 |
-
# FIXED: Final merge with proper sync settings
|
| 1001 |
-
final_vid = Path(tempfile.gettempdir()) / f"{key}.mp4"
|
| 1002 |
-
|
| 1003 |
-
# Enhanced FFmpeg command for perfect sync
|
| 1004 |
-
subprocess.run([
|
| 1005 |
-
"ffmpeg", "-y",
|
| 1006 |
-
"-i", str(silent_vid), # Video input
|
| 1007 |
-
"-i", str(audio_mix), # Audio input
|
| 1008 |
-
"-c:v", "libx264", # Video codec (re-encode for compatibility)
|
| 1009 |
-
"-c:a", "aac", # Audio codec
|
| 1010 |
-
"-map", "0:v:0", # Map first video stream
|
| 1011 |
-
"-map", "1:a:0", # Map first audio stream
|
| 1012 |
-
"-shortest", # End when shortest stream ends
|
| 1013 |
-
"-avoid_negative_ts", "make_zero", # Fix timestamp issues
|
| 1014 |
-
"-fflags", "+genpts", # Generate presentation timestamps
|
| 1015 |
-
"-r", str(FPS), # Ensure consistent framerate
|
| 1016 |
-
str(final_vid)
|
| 1017 |
-
], check=True, capture_output=True)
|
| 1018 |
-
|
| 1019 |
-
# Clean up temporary files
|
| 1020 |
-
for p in temps + [silent_vid, audio_mix]:
|
| 1021 |
-
p.unlink(missing_ok=True)
|
| 1022 |
-
|
| 1023 |
-
return str(final_vid)
|
| 1024 |
-
|
| 1025 |
-
# ─── ENHANCED MAIN FUNCTIONS (DROP-IN REPLACEMENTS) ────────────────────────────
|
| 1026 |
|
|
|
|
| 1027 |
def generate_report_bundle(buf: bytes, name: str, ctx: str, key: str):
|
| 1028 |
-
"""
|
| 1029 |
-
Enhanced report generation with reliable chart system - DROP-IN REPLACEMENT
|
| 1030 |
-
"""
|
| 1031 |
-
# 1. Load data and generate markdown text (UNCHANGED)
|
| 1032 |
df, err = load_dataframe_safely(buf, name)
|
| 1033 |
-
if err:
|
| 1034 |
-
st.error(err)
|
| 1035 |
-
return None
|
| 1036 |
-
|
| 1037 |
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
|
| 1038 |
-
|
| 1039 |
-
# ENHANCED: Better data context analysis
|
| 1040 |
-
ctx_dict = {
|
| 1041 |
-
"shape": df.shape,
|
| 1042 |
-
"columns": list(df.columns),
|
| 1043 |
-
"user_ctx": ctx or "General business analysis",
|
| 1044 |
-
"full_dataframe": df.to_dict("records"),
|
| 1045 |
-
"data_types": {c: str(d) for c, d in df.dtypes.to_dict().items()},
|
| 1046 |
-
"missing_values": {c: int(v) for c, v in df.isnull().sum().to_dict().items()},
|
| 1047 |
-
"numeric_summary": {c: {s: float(v) for s, v in stats.items()} for c, stats in df.describe().to_dict().items()} if len(df.select_dtypes(include=["number"]).columns) > 0 else {},
|
| 1048 |
-
}
|
| 1049 |
-
|
| 1050 |
-
# ENHANCED: Add intelligent data context
|
| 1051 |
enhanced_ctx = enhance_data_context(df, ctx_dict)
|
| 1052 |
cols = ", ".join(enhanced_ctx["columns"][:6])
|
| 1053 |
-
|
| 1054 |
-
# ENHANCED: Smarter report prompt with chart guidance
|
| 1055 |
report_prompt = f"""
|
| 1056 |
You are a senior data analyst and business intelligence expert. Analyze the provided dataset and write a comprehensive executive-level Markdown report.
|
| 1057 |
-
|
| 1058 |
-
**
|
| 1059 |
-
{json.dumps(enhanced_ctx, indent=2)}
|
| 1060 |
-
|
| 1061 |
-
**Chart Recommendations Available:**
|
| 1062 |
-
{json.dumps(enhanced_ctx.get('recommended_charts', {}), indent=2)}
|
| 1063 |
-
|
| 1064 |
**Instructions:**
|
| 1065 |
1. **Identify Data Domain**: First, determine what type of data this represents (e.g., sales/revenue, healthcare/medical, HR/employee, financial, operational, customer, research, etc.) based on column names and sample data.
|
| 1066 |
2. **Executive Summary**: Start with a high-level summary of key findings and business impact.
|
|
@@ -1068,186 +339,86 @@ def generate_report_bundle(buf: bytes, name: str, ctx: str, key: str):
|
|
| 1068 |
4. **Key Insights**: You must provide exactly 5 key insights, each with its own chart tag.
|
| 1069 |
5. **Strategic Recommendations**: Offer concrete, actionable recommendations based on the data.
|
| 1070 |
6. **Visual Support**: When a visualization would enhance understanding, insert chart tags like: `<generate_chart: "chart_type | specific description">`
|
| 1071 |
-
|
| 1072 |
-
Valid chart types: bar, pie, line, scatter, hist
|
| 1073 |
-
Base every chart on actual columns: {cols}
|
| 1074 |
-
|
| 1075 |
**IMPORTANT CHART SELECTION RULES:**
|
| 1076 |
- bar: Use when comparing categories with numeric values (requires categorical + numeric columns)
|
| 1077 |
- pie: Use for proportional breakdowns with few categories (<7) (requires categorical + numeric columns)
|
| 1078 |
- line: Use for time series, trends, or sequential data (requires numeric columns, preferably with time/sequence)
|
| 1079 |
- scatter: Use for correlation analysis between two numeric variables (requires 2+ numeric columns)
|
| 1080 |
- hist: Use for distribution analysis of a single numeric variable (requires 1 numeric column)
|
| 1081 |
-
|
| 1082 |
**Data-Driven Chart Suggestions:**
|
| 1083 |
{chr(10).join([f" - {chart_type}: {description}" for chart_type, description in enhanced_ctx.get('recommended_charts', {}).items()])}
|
| 1084 |
-
|
| 1085 |
7. **Format Requirements**:
|
| 1086 |
-
- Use professional business language
|
| 1087 |
-
- Include relevant metrics and percentages
|
| 1088 |
-
- Structure with clear headers (## Executive Summary, ## Key Insights, etc.)
|
| 1089 |
-
- End with ## Next Steps section
|
| 1090 |
-
|
| 1091 |
**Domain-Specific Focus Areas:**
|
| 1092 |
-
- If sales data: focus on revenue trends, customer segments, product performance
|
| 1093 |
-
- If
|
| 1094 |
-
- If
|
| 1095 |
-
- If operational data: focus on efficiency, bottlenecks, process optimization
|
| 1096 |
-
- If customer data: focus on behavior patterns, satisfaction, churn analysis
|
| 1097 |
-
|
| 1098 |
Generate insights that would be valuable to C-level executives and department heads. Ensure all charts use real data columns and appropriate chart types.
|
| 1099 |
"""
|
| 1100 |
-
|
| 1101 |
md = llm.invoke(report_prompt).content
|
| 1102 |
chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
|
| 1103 |
-
|
| 1104 |
-
# 2. ENHANCED: Generate all charts with reliable system
|
| 1105 |
-
chart_paths = {}
|
| 1106 |
-
chart_generator = create_chart_generator(llm, df) # REPLACE pandas agent
|
| 1107 |
-
|
| 1108 |
for desc in chart_descs:
|
| 1109 |
with st.spinner(f"Generating chart: {desc}..."):
|
| 1110 |
img_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
|
| 1111 |
try:
|
| 1112 |
-
# ENHANCED: Use AI-driven chart specification
|
| 1113 |
chart_spec = chart_generator.generate_chart_spec(desc)
|
| 1114 |
-
|
| 1115 |
-
|
| 1116 |
-
|
| 1117 |
-
|
| 1118 |
-
else:
|
| 1119 |
-
raise RuntimeError("Chart generation failed")
|
| 1120 |
-
|
| 1121 |
-
except Exception as e:
|
| 1122 |
-
# ENHANCED: Better fallback handling
|
| 1123 |
-
try:
|
| 1124 |
-
# Try simple chart generation as fallback
|
| 1125 |
-
quick_chart(desc, df, img_path)
|
| 1126 |
-
if img_path.exists():
|
| 1127 |
-
chart_paths[desc] = str(img_path)
|
| 1128 |
-
except Exception:
|
| 1129 |
-
# Skip this chart if all methods fail
|
| 1130 |
-
print(f"Failed to generate chart: {desc}")
|
| 1131 |
-
continue
|
| 1132 |
-
|
| 1133 |
-
# 3. Assemble the final report bundle
|
| 1134 |
-
try:
|
| 1135 |
-
pdf_bytes = build_pdf(md, chart_paths)
|
| 1136 |
-
except Exception as e:
|
| 1137 |
-
st.warning(f"⚠️ PDF generation failed and will be skipped. Error: {e}")
|
| 1138 |
-
pdf_bytes = None
|
| 1139 |
-
|
| 1140 |
-
return {
|
| 1141 |
-
"type": "report",
|
| 1142 |
-
"key": key,
|
| 1143 |
-
"raw_md": md,
|
| 1144 |
-
"charts": chart_paths,
|
| 1145 |
-
"pdf": pdf_bytes
|
| 1146 |
-
}
|
| 1147 |
|
| 1148 |
def build_story_prompt(ctx_dict):
|
| 1149 |
-
"""ENHANCED: Better story generation with data-driven insights"""
|
| 1150 |
enhanced_ctx = enhance_data_context(pd.DataFrame(ctx_dict.get("full_dataframe", [])), ctx_dict)
|
| 1151 |
cols = ", ".join(enhanced_ctx["columns"][:6])
|
| 1152 |
-
|
| 1153 |
return f"""
|
| 1154 |
You are a professional business storyteller and data analyst. You must create a script with exactly {VIDEO_SCENES} scenes, each separated by '[SCENE_BREAK]'.
|
| 1155 |
-
|
| 1156 |
-
**
|
| 1157 |
-
{json.dumps(enhanced_ctx, indent=2)}
|
| 1158 |
-
|
| 1159 |
-
**Available Chart Types and Recommendations:**
|
| 1160 |
-
{json.dumps(enhanced_ctx.get('recommended_charts', {}), indent=2)}
|
| 1161 |
-
|
| 1162 |
**Task Requirements:**
|
| 1163 |
1. **Identify the Data Story**: Determine what business domain this data represents and what story it tells
|
| 1164 |
2. **Create {VIDEO_SCENES} distinct scenes** that build a logical narrative arc
|
| 1165 |
-
3. **Each scene must contain:**
|
| 1166 |
-
- 1-2 sentences of clear, professional narration (plain English, no jargon)
|
| 1167 |
-
- Exactly one chart tag: `<generate_chart: "chart_type | specific description">`
|
| 1168 |
-
|
| 1169 |
**ENHANCED Chart Guidelines:**
|
| 1170 |
-
- Valid types: bar, pie, line, scatter, hist
|
| 1171 |
-
- Base all charts on actual columns: {cols}
|
| 1172 |
- **USE RECOMMENDED CHARTS**: {list(enhanced_ctx.get('recommended_charts', {}).keys())}
|
| 1173 |
-
- Choose chart types that best tell the story and match the data
|
| 1174 |
-
* bar: categorical comparisons, rankings (needs categorical + numeric data)
|
| 1175 |
-
* pie: proportional breakdowns (≤6 categories, needs categorical + numeric data)
|
| 1176 |
-
* line: trends over time, progression (needs sequential/time data)
|
| 1177 |
-
* scatter: relationships, correlations (needs 2+ numeric columns)
|
| 1178 |
-
* hist: distributions, frequency analysis (needs 1 numeric column)
|
| 1179 |
-
|
| 1180 |
**Data-Driven Chart Selection:**
|
| 1181 |
-
- Numeric columns available: {enhanced_ctx.get('numeric_columns', [])}
|
| 1182 |
-
-
|
| 1183 |
-
-
|
| 1184 |
-
|
| 1185 |
-
|
| 1186 |
-
**Narrative Structure:**
|
| 1187 |
-
- Scene 1: Set the context and introduce the main story
|
| 1188 |
-
- Middle scenes: Develop key insights and supporting evidence
|
| 1189 |
-
- Final scene: Conclude with actionable takeaways or future outlook
|
| 1190 |
-
|
| 1191 |
-
**Content Standards:**
|
| 1192 |
-
- Use conversational, executive-level language
|
| 1193 |
-
- Include specific data insights (trends, percentages, comparisons)
|
| 1194 |
-
- Avoid chart descriptions in narration ("as shown in the chart")
|
| 1195 |
-
- Make each scene self-contained but connected to the overall story
|
| 1196 |
-
- Focus on business impact and actionable insights
|
| 1197 |
-
|
| 1198 |
-
**Domain-Specific Approaches:**
|
| 1199 |
-
- Sales data: Customer journey, revenue trends, market performance
|
| 1200 |
-
- HR data: Workforce insights, talent analytics, organizational health
|
| 1201 |
-
- Financial data: Performance indicators, cost analysis, profitability
|
| 1202 |
-
- Operational data: Process efficiency, bottlenecks, optimization opportunities
|
| 1203 |
-
- Customer data: Behavior patterns, satisfaction trends, retention analysis
|
| 1204 |
-
|
| 1205 |
**Output Format:** Separate each scene with exactly [SCENE_BREAK]
|
| 1206 |
-
|
| 1207 |
-
**IMPORTANT:** Ensure each chart request uses appropriate chart types for the available data structure. Don't request pie charts if there are too many categories, don't request scatter plots if there aren't enough numeric columns, etc.
|
| 1208 |
-
|
| 1209 |
Create a compelling, data-driven story that executives would find engaging and actionable, using charts that actually make sense for the data structure.
|
| 1210 |
"""
|
| 1211 |
|
| 1212 |
def generate_video(buf: bytes, name: str, ctx: str, key: str):
|
| 1213 |
-
"""
|
| 1214 |
-
try:
|
| 1215 |
-
|
| 1216 |
-
except Exception:
|
| 1217 |
-
st.error("🔴 FFmpeg not available — cannot render video.")
|
| 1218 |
-
return None
|
| 1219 |
|
| 1220 |
df, err = load_dataframe_safely(buf, name)
|
| 1221 |
-
if err:
|
| 1222 |
-
st.error(err)
|
| 1223 |
-
return None
|
| 1224 |
|
| 1225 |
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.2)
|
| 1226 |
-
|
| 1227 |
-
# ENHANCED: Better context for video generation
|
| 1228 |
-
ctx_dict = {
|
| 1229 |
-
"shape": df.shape,
|
| 1230 |
-
"columns": list(df.columns),
|
| 1231 |
-
"user_ctx": ctx or "General business analysis",
|
| 1232 |
-
"full_dataframe": df.to_dict("records"),
|
| 1233 |
-
"data_types": {col: str(dtype) for col, dtype in df.dtypes.to_dict().items()},
|
| 1234 |
-
"numeric_summary": {col: {stat: float(val) for stat, val in stats.items()} for col, stats in df.describe().to_dict().items()} if len(df.select_dtypes(include=["number"]).columns) > 0 else {},
|
| 1235 |
-
}
|
| 1236 |
-
|
| 1237 |
script = llm.invoke(build_story_prompt(ctx_dict)).content
|
| 1238 |
scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
|
| 1239 |
|
| 1240 |
-
# ENHANCED: Better chart generation for video
|
| 1241 |
-
chart_generator = create_chart_generator(llm, df)
|
| 1242 |
-
|
| 1243 |
video_parts, audio_parts, temps = [], [], []
|
| 1244 |
for idx, sc in enumerate(scenes[:VIDEO_SCENES]):
|
| 1245 |
st.progress((idx + 1) / VIDEO_SCENES, text=f"Rendering Scene {idx + 1}/{VIDEO_SCENES}")
|
| 1246 |
descs, narrative = extract_chart_tags(sc), clean_narration(sc)
|
|
|
|
| 1247 |
audio_bytes, _ = deepgram_tts(narrative)
|
| 1248 |
mp3 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
|
| 1249 |
-
if audio_bytes:
|
| 1250 |
-
|
|
|
|
|
|
|
|
|
|
| 1251 |
audio_parts.append(str(mp3)); temps.append(mp3)
|
| 1252 |
|
| 1253 |
mp4 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
|
|
@@ -1258,92 +429,67 @@ def generate_video(buf: bytes, name: str, ctx: str, key: str):
|
|
| 1258 |
animate_image_fade(img_cv, dur, mp4)
|
| 1259 |
video_parts.append(str(mp4)); temps.append(mp4)
|
| 1260 |
|
| 1261 |
-
silent_vid
|
|
|
|
| 1262 |
concat_media(video_parts, silent_vid, "video")
|
| 1263 |
concat_media(audio_parts, audio_mix, "audio")
|
|
|
|
| 1264 |
final_vid = Path(tempfile.gettempdir()) / f"{key}.mp4"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1265 |
subprocess.run(
|
| 1266 |
-
[
|
|
|
|
|
|
|
| 1267 |
check=True, capture_output=True,
|
| 1268 |
)
|
| 1269 |
for p in temps + [silent_vid, audio_mix]: p.unlink(missing_ok=True)
|
| 1270 |
return str(final_vid)
|
| 1271 |
|
| 1272 |
-
|
| 1273 |
-
# ─── UI & MAIN WORKFLOW ──────────────────────────────────────────────────
|
| 1274 |
mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
|
| 1275 |
-
|
| 1276 |
upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
|
| 1277 |
if upl:
|
| 1278 |
df_prev, _ = load_dataframe_safely(upl.getvalue(), upl.name)
|
| 1279 |
-
with st.expander("📊 Data Preview"):
|
| 1280 |
-
st.dataframe(arrow_df(df_prev.head()))
|
| 1281 |
-
|
| 1282 |
ctx = st.text_area("Business context or specific instructions (optional)")
|
| 1283 |
|
| 1284 |
-
# ─── Generate button (with synchronous flow) ──────────────────────────
|
| 1285 |
if st.button("🚀 Generate", type="primary", disabled=not upl):
|
| 1286 |
key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
|
| 1287 |
-
st.session_state.bundle = None
|
| 1288 |
-
|
| 1289 |
if mode == "Report (PDF)":
|
| 1290 |
with st.spinner("Generating full report and charts... Please wait."):
|
| 1291 |
bundle = generate_report_bundle(upl.getvalue(), upl.name, ctx, key)
|
| 1292 |
st.session_state.bundle = bundle
|
| 1293 |
-
else:
|
| 1294 |
-
# The video function already shows progress, so a top-level spinner is not needed.
|
| 1295 |
bundle_path = generate_video(upl.getvalue(), upl.name, ctx, key)
|
| 1296 |
-
if bundle_path:
|
| 1297 |
-
|
| 1298 |
-
st.rerun() # Rerun once to display the final state
|
| 1299 |
|
| 1300 |
-
# ─── UNIFIED OUTPUT AREA ─────────────────────────────────────────────────
|
| 1301 |
if (bundle := st.session_state.get("bundle")):
|
| 1302 |
if bundle.get("type") == "report":
|
| 1303 |
st.subheader("📄 Generated Report")
|
| 1304 |
with st.expander("View Report", expanded=True):
|
| 1305 |
-
|
| 1306 |
-
# uses native st.image() for charts, guaranteeing correct display.
|
| 1307 |
-
report_md = bundle["raw_md"]
|
| 1308 |
-
charts = bundle["charts"]
|
| 1309 |
last_end = 0
|
| 1310 |
for match in TAG_RE.finditer(report_md):
|
| 1311 |
-
# Render the text that comes before the chart tag
|
| 1312 |
st.markdown(report_md[last_end:match.start()])
|
| 1313 |
-
|
| 1314 |
-
# Render the chart using st.image
|
| 1315 |
desc = match.group("d").strip()
|
| 1316 |
-
chart_path
|
| 1317 |
-
|
| 1318 |
-
st.image(chart_path)
|
| 1319 |
-
else:
|
| 1320 |
-
st.warning(f"Could not render chart: '{desc}'")
|
| 1321 |
-
|
| 1322 |
last_end = match.end()
|
| 1323 |
-
|
| 1324 |
-
# Render any remaining text after the last chart
|
| 1325 |
st.markdown(report_md[last_end:])
|
| 1326 |
-
|
| 1327 |
c1, c2 = st.columns(2)
|
| 1328 |
-
|
| 1329 |
-
|
| 1330 |
-
|
| 1331 |
-
|
| 1332 |
-
|
| 1333 |
-
)
|
| 1334 |
-
with c2:
|
| 1335 |
-
if DG_KEY and st.button("🔊 Narrate Summary", key=f"aud_{bundle['key']}"):
|
| 1336 |
-
txt = re.sub(r"<[^>]+>", "", bundle["raw_md"])
|
| 1337 |
-
audio, mime = deepgram_tts(txt)
|
| 1338 |
-
if audio: st.audio(audio, format=mime)
|
| 1339 |
-
else: st.error("Narration failed.")
|
| 1340 |
-
|
| 1341 |
elif bundle.get("type") == "video":
|
| 1342 |
st.subheader("🎬 Generated Video Narrative")
|
| 1343 |
-
vp
|
| 1344 |
-
if Path(vp).exists():
|
| 1345 |
with open(vp, "rb") as f: st.video(f.read())
|
| 1346 |
-
with open(vp, "rb") as f:
|
| 1347 |
-
|
| 1348 |
-
else:
|
| 1349 |
-
st.error("Video file missing – generation may have failed.")
|
|
|
|
| 1 |
##############################################################################
|
| 2 |
# Sozo Business Studio · 10-Jul-2025
|
| 3 |
+
# • FIXED: Animation and FFmpeg errors without altering the user's AI architecture.
|
| 4 |
+
# • FIXED: The 'can't multiply sequence' error by replacing the animation engine.
|
| 5 |
+
# • FIXED: FFmpeg failures with a robust media concatenation function.
|
| 6 |
+
# • NOTE: The user's prompts, classes, and AI calls are preserved exactly.
|
| 7 |
##############################################################################
|
| 8 |
|
| 9 |
import os, re, json, hashlib, uuid, base64, io, tempfile, requests, subprocess
|
|
|
|
| 44 |
DG_KEY = os.getenv("DEEPGRAM_API_KEY") # optional narration
|
| 45 |
|
| 46 |
sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
|
|
|
|
|
|
|
| 47 |
st.session_state.setdefault("bundle", None)
|
| 48 |
|
| 49 |
+
# ─── HELPERS (Unchanged) ──────────────────────────────────────────────────
|
| 50 |
def load_dataframe_safely(buf: bytes, name: str) -> Tuple[pd.DataFrame, str]:
|
|
|
|
| 51 |
try:
|
| 52 |
ext = Path(name).suffix.lower()
|
| 53 |
df = (pd.read_excel if ext in (".xlsx", ".xls") else pd.read_csv)(io.BytesIO(buf))
|
| 54 |
df.columns = df.columns.astype(str).str.strip()
|
| 55 |
df = df.dropna(how="all")
|
| 56 |
+
if df.empty or len(df.columns) == 0: raise ValueError("No usable data found")
|
|
|
|
| 57 |
return df, None
|
| 58 |
+
except Exception as e: return None, str(e)
|
|
|
|
| 59 |
|
| 60 |
def arrow_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
| 61 |
safe = df.copy()
|
| 62 |
for c in safe.columns:
|
| 63 |
if safe[c].dtype.name in ("Int64", "Float64", "Boolean"):
|
|
|
|
| 66 |
|
| 67 |
@st.cache_data(show_spinner=False)
|
| 68 |
def deepgram_tts(txt: str) -> Tuple[bytes, str]:
|
| 69 |
+
if not DG_KEY or not txt: return None, None
|
|
|
|
|
|
|
| 70 |
txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000]
|
| 71 |
try:
|
| 72 |
+
r = requests.post("https://api.deepgram.com/v1/speak", params={"model": "aura-2-andromeda-en"}, headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"}, json={"text": txt}, timeout=30)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
r.raise_for_status()
|
| 74 |
return r.content, r.headers.get("Content-Type", "audio/mpeg")
|
| 75 |
+
except Exception: return None, None
|
|
|
|
| 76 |
|
| 77 |
def generate_silence_mp3(duration: float, out: Path):
|
| 78 |
+
subprocess.run([ "ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono", "-t", f"{duration:.3f}", "-q:a", "9", str(out)], check=True, capture_output=True)
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
def audio_duration(path: str) -> float:
|
| 81 |
try:
|
| 82 |
+
res = subprocess.run([ "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=nw=1:nk=1", path], text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
|
|
|
|
|
|
|
|
|
|
| 83 |
return float(res.stdout.strip())
|
| 84 |
+
except Exception: return 5.0
|
|
|
|
| 85 |
|
| 86 |
TAG_RE = re.compile( r'[<[]\s*generate_?chart\s*[:=]?\s*[\"\'“”]?(?P<d>[^>\"\'”\]]+?)[\"\'“”]?\s*[>\]]', re.I, )
|
| 87 |
extract_chart_tags = lambda t: list( dict.fromkeys(m.group("d").strip() for m in TAG_RE.finditer(t or "")) )
|
| 88 |
|
| 89 |
re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I | re.M)
|
| 90 |
def clean_narration(txt: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
txt = TAG_RE.sub("", txt)
|
|
|
|
|
|
|
| 92 |
txt = re_scene.sub("", txt)
|
| 93 |
+
phrases_to_remove = [r"as you can see in the chart", r"this chart shows", r"the chart illustrates", r"in this visual", r"this graph displays"]
|
| 94 |
+
for phrase in phrases_to_remove: txt = re.sub(phrase, "", txt, flags=re.IGNORECASE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
txt = re.sub(r"\s*\([^)]*\)", "", txt)
|
|
|
|
|
|
|
| 96 |
txt = re.sub(r"[\*#_]", "", txt)
|
|
|
|
|
|
|
| 97 |
return re.sub(r"\s{2,}", " ", txt).strip()
|
| 98 |
|
| 99 |
+
def placeholder_img() -> Image.Image: return Image.new("RGB", (WIDTH, HEIGHT), (230, 230, 230))
|
|
|
|
|
|
|
| 100 |
|
| 101 |
def generate_image_from_prompt(prompt: str) -> Image.Image:
|
| 102 |
+
model_main = "gemini-2.0-flash-exp-image-generation"; model_fallback = "gemini-2.0-flash-preview-image-generation"
|
|
|
|
| 103 |
full_prompt = "A clean business-presentation illustration: " + prompt
|
|
|
|
| 104 |
def fetch(model_name):
|
| 105 |
+
res = GEM.models.generate_content(model=model_name, contents=full_prompt, config=types.GenerateContentConfig(response_modalities=["IMAGE"]))
|
|
|
|
|
|
|
|
|
|
| 106 |
for part in res.candidates[0].content.parts:
|
| 107 |
+
if getattr(part, "inline_data", None): return Image.open(io.BytesIO(part.inline_data.data)).convert("RGB")
|
|
|
|
| 108 |
return None
|
|
|
|
| 109 |
try:
|
| 110 |
img = fetch(model_main) or fetch(model_fallback)
|
| 111 |
return img if img else placeholder_img()
|
| 112 |
+
except Exception: return placeholder_img()
|
|
|
|
| 113 |
|
|
|
|
| 114 |
class PDF(FPDF, HTMLMixin): pass
|
|
|
|
| 115 |
def build_pdf(md: str, charts: Dict[str, str]) -> bytes:
|
| 116 |
def embed_chart_for_pdf(match):
|
| 117 |
desc = match.group("d").strip()
|
|
|
|
| 120 |
b64 = base64.b64encode(Path(path).read_bytes()).decode()
|
| 121 |
return f'<img src="data:image/png;base64,{b64}" width="600">'
|
| 122 |
return ""
|
| 123 |
+
html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(TAG_RE.sub(embed_chart_for_pdf, md))
|
| 124 |
+
pdf = PDF(); pdf.set_auto_page_break(True, margin=15); pdf.add_page()
|
| 125 |
+
pdf.set_font("Arial", "B", 18); pdf.cell(0, 12, "AI-Generated Business Report", ln=True); pdf.ln(3)
|
| 126 |
+
pdf.set_font("Arial", "", 11); pdf.write_html(html)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
return pdf.output(dest="S")
|
| 128 |
|
|
|
|
| 129 |
def quick_chart(desc: str, df: pd.DataFrame, out: Path):
|
| 130 |
+
ctype, *rest = [s.strip().lower() for s in desc.split("|", 1)]; ctype = ctype or "bar"
|
|
|
|
| 131 |
title = rest[0] if rest else desc
|
| 132 |
+
num_cols = df.select_dtypes("number").columns; cat_cols = df.select_dtypes(exclude="number").columns
|
|
|
|
|
|
|
| 133 |
with plt.ioff():
|
| 134 |
fig, ax = plt.subplots(figsize=(6, 3.4), dpi=150)
|
| 135 |
+
if ctype == "pie" and len(cat_cols) >= 1 and len(num_cols) >= 1: ax.pie(df.groupby(cat_cols[0])[num_cols[0]].sum().head(8), labels=df.groupby(cat_cols[0])[num_cols[0]].sum().head(8).index, autopct="%1.1f%%", startangle=90)
|
| 136 |
+
elif ctype == "line" and len(num_cols) >= 1: df[num_cols[0]].plot(kind="line", ax=ax)
|
| 137 |
+
elif ctype == "scatter" and len(num_cols) >= 2: ax.scatter(df[num_cols[0]], df[num_cols[1]], s=10, alpha=0.7)
|
| 138 |
+
elif ctype == "hist" and len(num_cols) >= 1: ax.hist(df[num_cols[0]], bins=20, alpha=0.7)
|
| 139 |
+
else: df[num_cols[0]].value_counts().head(10).plot(kind="bar", ax=ax)
|
| 140 |
+
ax.set_title(title); fig.tight_layout(); fig.savefig(out, bbox_inches="tight", facecolor="white"); plt.close(fig)
|
| 141 |
+
|
| 142 |
+
# ─── ENHANCED CHART GENERATION SYSTEM (User's code - unchanged) ───────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
class ChartSpecification:
|
| 144 |
+
def __init__(self, chart_type: str, title: str, x_col: str, y_col: str, agg_method: str = None, filter_condition: str = None, top_n: int = None, color_scheme: str = "professional"):
|
| 145 |
+
self.chart_type = chart_type; self.title = title; self.x_col = x_col; self.y_col = y_col
|
| 146 |
+
self.agg_method = agg_method or "sum"; self.filter_condition = filter_condition; self.top_n = top_n; self.color_scheme = color_scheme
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
def enhance_data_context(df: pd.DataFrame, ctx_dict: Dict) -> Dict:
|
| 149 |
+
enhanced_ctx = ctx_dict.copy(); numeric_cols = df.select_dtypes(include=['number']).columns.tolist(); categorical_cols = df.select_dtypes(exclude=['number']).columns.tolist()
|
| 150 |
+
enhanced_ctx.update({"numeric_columns": numeric_cols, "categorical_columns": categorical_cols, "data_insights": {"has_time_series": any(col.lower() in ['date', 'time', 'month', 'year'] for col in df.columns), "has_categories": len(categorical_cols) > 0, "has_numeric": len(numeric_cols) > 0, "record_count": len(df), "correlation_pairs": get_correlation_pairs(df, numeric_cols) if len(numeric_cols) > 1 else []}, "recommended_charts": recommend_chart_types(df, numeric_cols, categorical_cols)})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
return enhanced_ctx
|
| 152 |
|
| 153 |
def get_correlation_pairs(df: pd.DataFrame, numeric_cols: List[str]) -> List[Tuple[str, str, float]]:
|
| 154 |
+
correlations = [];
|
|
|
|
| 155 |
if len(numeric_cols) > 1:
|
| 156 |
corr_matrix = df[numeric_cols].corr()
|
| 157 |
for i, col1 in enumerate(numeric_cols):
|
| 158 |
for j, col2 in enumerate(numeric_cols[i+1:], i+1):
|
| 159 |
+
if abs(corr_matrix.loc[col1, col2]) > 0.5: correlations.append((col1, col2, corr_matrix.loc[col1, col2]))
|
|
|
|
|
|
|
| 160 |
return correlations
|
| 161 |
|
| 162 |
def recommend_chart_types(df: pd.DataFrame, numeric_cols: List[str], categorical_cols: List[str]) -> Dict[str, str]:
|
|
|
|
| 163 |
recommendations = {}
|
|
|
|
| 164 |
if len(categorical_cols) > 0 and len(numeric_cols) > 0:
|
|
|
|
| 165 |
recommendations["bar"] = f"Compare {numeric_cols[0]} across {categorical_cols[0]}"
|
| 166 |
+
if len(df[categorical_cols[0]].unique()) <= 6: recommendations["pie"] = f"Distribution of {numeric_cols[0]} by {categorical_cols[0]}"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
if len(numeric_cols) > 1:
|
|
|
|
| 168 |
recommendations["scatter"] = f"Relationship between {numeric_cols[0]} and {numeric_cols[1]}"
|
| 169 |
+
if any(word in col.lower() for col in df.columns for word in ['date', 'time', 'month', 'year']): recommendations["line"] = f"Trend of {numeric_cols[0]} over time"
|
| 170 |
+
if len(numeric_cols) > 0: recommendations["hist"] = f"Distribution of {numeric_cols[0]}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
return recommendations
|
| 172 |
|
| 173 |
+
def create_chart_generator(llm, df: pd.DataFrame) -> 'ChartGenerator': return ChartGenerator(llm, df)
|
|
|
|
|
|
|
| 174 |
|
| 175 |
class ChartGenerator:
|
|
|
|
|
|
|
| 176 |
def __init__(self, llm, df: pd.DataFrame):
|
| 177 |
+
self.llm = llm; self.df = df
|
| 178 |
+
self.enhanced_ctx = enhance_data_context(df, {"columns": list(df.columns), "shape": df.shape, "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()}})
|
| 179 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
def generate_chart_spec(self, description: str) -> ChartSpecification:
|
|
|
|
| 181 |
spec_prompt = f"""
|
| 182 |
You are a data visualization expert. Based on the dataset and chart description, generate a precise chart specification.
|
| 183 |
+
**Dataset Info:** {json.dumps(self.enhanced_ctx, indent=2)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
**Chart Request:** {description}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
**Return a JSON specification with these exact fields:**
|
| 186 |
{{
|
| 187 |
+
"chart_type": "bar|pie|line|scatter|hist", "title": "Professional chart title", "x_col": "column_name_for_x_axis",
|
| 188 |
+
"y_col": "column_name_for_y_axis_or_null", "agg_method": "sum|mean|count|max|min|null", "filter_condition": "description_of_filtering_or_null",
|
| 189 |
+
"top_n": "number_for_top_n_filtering_or_null", "reasoning": "Why this specification was chosen"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
Return only the JSON specification, no additional text.
|
| 192 |
"""
|
|
|
|
| 193 |
try:
|
| 194 |
+
response = self.llm.invoke(spec_prompt).content.strip()
|
| 195 |
+
if response.startswith("```json"): response = response[7:-3]
|
| 196 |
+
elif response.startswith("```"): response = response[3:-3]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
spec_dict = json.loads(response)
|
| 198 |
return ChartSpecification(**{k: v for k, v in spec_dict.items() if k != 'reasoning'})
|
| 199 |
+
except Exception as e: return self._create_fallback_spec(description)
|
| 200 |
+
|
|
|
|
|
|
|
|
|
|
| 201 |
def _create_fallback_spec(self, description: str) -> ChartSpecification:
|
| 202 |
+
numeric_cols = self.enhanced_ctx['numeric_columns']; categorical_cols = self.enhanced_ctx['categorical_columns']
|
| 203 |
+
if "bar" in description.lower() and categorical_cols and numeric_cols: return ChartSpecification("bar", description, categorical_cols[0], numeric_cols[0])
|
| 204 |
+
elif "pie" in description.lower() and categorical_cols and numeric_cols: return ChartSpecification("pie", description, categorical_cols[0], numeric_cols[0])
|
| 205 |
+
elif "line" in description.lower() and len(numeric_cols) >= 2: return ChartSpecification("line", description, numeric_cols[0], numeric_cols[1])
|
| 206 |
+
elif "scatter" in description.lower() and len(numeric_cols) >= 2: return ChartSpecification("scatter", description, numeric_cols[0], numeric_cols[1])
|
| 207 |
+
elif numeric_cols: return ChartSpecification("hist", description, numeric_cols[0], None)
|
| 208 |
+
else: return ChartSpecification("bar", description, self.df.columns[0], self.df.columns[1] if len(self.df.columns) > 1 else None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
def execute_chart_spec(spec: ChartSpecification, df: pd.DataFrame, output_path: Path) -> bool:
|
|
|
|
| 211 |
try:
|
|
|
|
| 212 |
plot_data = prepare_plot_data(spec, df)
|
| 213 |
+
fig, ax = plt.subplots(figsize=(12, 8)); plt.style.use('default')
|
| 214 |
+
if spec.chart_type == "bar": ax.bar(plot_data.index.astype(str), plot_data.values, color='#2E86AB', alpha=0.8); ax.set_xlabel(spec.x_col); ax.set_ylabel(spec.y_col); ax.tick_params(axis='x', rotation=45)
|
| 215 |
+
elif spec.chart_type == "pie": ax.pie(plot_data.values, labels=plot_data.index, autopct='%1.1f%%', startangle=90); ax.axis('equal')
|
| 216 |
+
elif spec.chart_type == "line": ax.plot(plot_data.index, plot_data.values, marker='o', linewidth=2, color='#A23B72'); ax.set_xlabel(spec.x_col); ax.set_ylabel(spec.y_col); ax.grid(True, alpha=0.3)
|
| 217 |
+
elif spec.chart_type == "scatter": ax.scatter(plot_data.iloc[:, 0], plot_data.iloc[:, 1], alpha=0.6, color='#F18F01'); ax.set_xlabel(spec.x_col); ax.set_ylabel(spec.y_col); ax.grid(True, alpha=0.3)
|
| 218 |
+
elif spec.chart_type == "hist": ax.hist(plot_data.values, bins=20, color='#C73E1D', alpha=0.7, edgecolor='black'); ax.set_xlabel(spec.x_col); ax.set_ylabel('Frequency'); ax.grid(True, alpha=0.3)
|
| 219 |
+
ax.set_title(spec.title, fontsize=14, fontweight='bold', pad=20); plt.tight_layout()
|
| 220 |
+
plt.savefig(output_path, dpi=300, bbox_inches='tight', facecolor='white'); plt.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
return True
|
| 222 |
+
except Exception as e: print(f"Chart generation failed: {e}"); return False
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
def prepare_plot_data(spec: ChartSpecification, df: pd.DataFrame) -> pd.Series:
|
| 225 |
+
if spec.x_col not in df.columns or (spec.y_col and spec.y_col not in df.columns): raise ValueError(f"Invalid columns in chart spec: {spec.x_col}, {spec.y_col}")
|
| 226 |
+
if spec.chart_type in ["bar", "pie"]:
|
| 227 |
+
if not spec.y_col: return df[spec.x_col].value_counts().nlargest(spec.top_n or 10)
|
| 228 |
+
grouped = df.groupby(spec.x_col)[spec.y_col].agg(spec.agg_method or 'sum')
|
| 229 |
+
return grouped.nlargest(spec.top_n or 10)
|
| 230 |
+
elif spec.chart_type == "line": return df.set_index(spec.x_col)[spec.y_col].sort_index()
|
| 231 |
+
elif spec.chart_type == "scatter": return df[[spec.x_col, spec.y_col]].dropna()
|
| 232 |
+
elif spec.chart_type == "hist": return df[spec.x_col].dropna()
|
| 233 |
+
return df[spec.x_col]
|
| 234 |
+
|
| 235 |
+
# ─── FIXED ANIMATION SYSTEM ───────────────────────────────────────────────
|
| 236 |
+
def animate_chart(spec: ChartSpecification, df: pd.DataFrame, dur: float, out: Path, fps: int = FPS) -> str:
|
| 237 |
+
"""FIXED: Renders a reliable animated chart using proven patterns, adapted for ChartSpecification."""
|
| 238 |
+
plot_data = prepare_plot_data(spec, df)
|
| 239 |
+
title = spec.title
|
| 240 |
+
frames = max(10, int(dur * fps)) # Ensure integer frame count
|
| 241 |
+
fig, ax = plt.subplots(figsize=(WIDTH / 100, HEIGHT / 100), dpi=100)
|
| 242 |
+
plt.tight_layout(pad=2.5)
|
| 243 |
+
ctype = spec.chart_type
|
| 244 |
+
|
| 245 |
+
if ctype == "pie":
|
| 246 |
+
wedges, _ = ax.pie(plot_data, labels=plot_data.index, startangle=90, autopct='%1.1f%%')
|
| 247 |
+
ax.set_title(title); ax.axis('equal')
|
| 248 |
+
def init(): [w.set_alpha(0) for w in wedges]; return wedges
|
| 249 |
+
def update(i): [w.set_alpha(i / (frames - 1)) for w in wedges]; return wedges
|
| 250 |
+
elif ctype == "bar":
|
| 251 |
+
bars = ax.bar(plot_data.index.astype(str), np.zeros_like(plot_data.values, dtype=float), color="#1f77b4")
|
| 252 |
+
ax.set_ylim(0, plot_data.max() * 1.1 if not pd.isna(plot_data.max()) and plot_data.max() > 0 else 1)
|
| 253 |
+
ax.set_title(title); plt.xticks(rotation=45, ha="right")
|
| 254 |
+
def init(): return bars
|
| 255 |
+
def update(i):
|
| 256 |
+
for b, h in zip(bars, plot_data.values): b.set_height(h * (i / (frames - 1)))
|
| 257 |
+
return bars
|
| 258 |
+
else: # line, scatter, hist
|
| 259 |
+
line, = ax.plot([], [], lw=2)
|
| 260 |
+
plot_data = plot_data.sort_index() if ctype == 'line' and not plot_data.index.is_monotonic_increasing else plot_data
|
| 261 |
+
x_full, y_full = (plot_data.iloc[:, 0], plot_data.iloc[:, 1]) if ctype == 'scatter' else (plot_data.index, plot_data.values)
|
| 262 |
+
ax.set_xlim(x_full.min(), x_full.max()); ax.set_ylim(y_full.min() * 0.9, y_full.max() * 1.1)
|
| 263 |
+
ax.set_title(title); ax.grid(alpha=.3); ax.set_xlabel(spec.x_col); ax.set_ylabel(spec.y_col)
|
| 264 |
+
def init(): line.set_data([], []); return [line]
|
| 265 |
+
def update(i):
|
| 266 |
+
k = max(2, int(len(x_full) * (i / (frames - 1))))
|
| 267 |
+
line.set_data(x_full[:k], y_full[:k]); return [line]
|
| 268 |
+
|
| 269 |
+
anim = FuncAnimation(fig, update, init_func=init, frames=frames, blit=True, interval=1000 / fps)
|
| 270 |
+
anim.save(str(out), writer=FFMpegWriter(fps=fps, metadata={'artist': 'Sozo Studio'}), dpi=144)
|
| 271 |
+
plt.close(fig)
|
| 272 |
+
return str(out)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
|
| 274 |
+
def animate_image_fade(img: np.ndarray, dur: float, out: Path, fps: int = 24) -> str:
|
| 275 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v'); video_writer = cv2.VideoWriter(str(out), fourcc, fps, (WIDTH, HEIGHT))
|
| 276 |
+
total_frames = max(1, int(dur * fps))
|
| 277 |
+
for i in range(total_frames):
|
| 278 |
+
alpha = i / (total_frames - 1) if total_frames > 1 else 1.0
|
| 279 |
+
frame = cv2.addWeighted(img, alpha, np.zeros_like(img), 1 - alpha, 0)
|
| 280 |
+
video_writer.write(frame)
|
| 281 |
+
video_writer.release()
|
| 282 |
+
return str(out)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
def safe_chart(desc: str, df: pd.DataFrame, dur: float, out: Path) -> str:
|
| 285 |
+
"""FIXED: A simplified and more reliable chart generation wrapper using the new animation engine."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
try:
|
| 287 |
+
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
chart_generator = create_chart_generator(llm, df)
|
| 289 |
+
chart_spec = chart_generator.generate_chart_spec(desc)
|
| 290 |
+
return animate_chart(chart_spec, df, dur, out, fps=FPS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
except Exception as e:
|
| 292 |
+
print(f"Chart animation failed for '{desc}': {e}. Falling back to placeholder image.")
|
| 293 |
+
img = generate_image_from_prompt(f"A professional business chart showing {desc}")
|
| 294 |
+
img_cv = cv2.cvtColor(np.array(img.resize((WIDTH, HEIGHT))), cv2.COLOR_RGB2BGR)
|
| 295 |
+
return animate_image_fade(img_cv, dur, out)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
|
| 297 |
def concat_media(file_paths: List[str], output_path: Path, media_type: str):
|
| 298 |
+
"""FIXED: Concatenate multiple media files using FFmpeg, robustly checking for valid files."""
|
| 299 |
+
valid_paths = [p for p in file_paths if Path(p).exists() and Path(p).stat().st_size > 100]
|
| 300 |
+
if not valid_paths:
|
| 301 |
+
print(f"Concatenation failed: No valid {media_type} files found.")
|
| 302 |
+
fallback_dur = 1.0
|
| 303 |
+
if media_type == 'video': animate_image_fade(cv2.cvtColor(np.array(placeholder_img()), cv2.COLOR_RGB2BGR), fallback_dur, output_path)
|
| 304 |
+
else: generate_silence_mp3(fallback_dur, output_path)
|
|
|
|
|
|
|
| 305 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
+
if len(valid_paths) == 1:
|
| 308 |
+
import shutil; shutil.copy2(valid_paths[0], str(output_path)); return
|
| 309 |
|
| 310 |
+
list_file = output_path.with_suffix(".txt")
|
| 311 |
+
with open(list_file, 'w') as f:
|
| 312 |
+
for path in valid_paths: f.write(f"file '{Path(path).resolve()}'\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
+
cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(list_file), "-c", "copy", str(output_path)]
|
| 315 |
+
try:
|
| 316 |
+
subprocess.run(cmd, check=True, capture_output=True, text=True)
|
| 317 |
+
except subprocess.CalledProcessError as e:
|
| 318 |
+
print(f"FFmpeg concatenation failed for {media_type}: {e.stderr}")
|
| 319 |
+
import shutil; shutil.copy2(valid_paths[0], str(output_path))
|
| 320 |
+
finally:
|
| 321 |
+
list_file.unlink(missing_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
|
| 323 |
+
# ─── REPORT & VIDEO WORKFLOWS (User's prompts and classes are UNCHANGED) ───
|
| 324 |
def generate_report_bundle(buf: bytes, name: str, ctx: str, key: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
df, err = load_dataframe_safely(buf, name)
|
| 326 |
+
if err: st.error(err); return None
|
|
|
|
|
|
|
|
|
|
| 327 |
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
|
| 328 |
+
ctx_dict = {"shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis", "full_dataframe": df.to_dict("records"), "data_types": {c: str(d) for c, d in df.dtypes.to_dict().items()}, "missing_values": {c: int(v) for c, v in df.isnull().sum().to_dict().items()}, "numeric_summary": {c: {s: float(v) for s, v in stats.items()} for c, stats in df.describe().to_dict().items()} if len(df.select_dtypes(include=["number"]).columns) > 0 else {}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
enhanced_ctx = enhance_data_context(df, ctx_dict)
|
| 330 |
cols = ", ".join(enhanced_ctx["columns"][:6])
|
|
|
|
|
|
|
| 331 |
report_prompt = f"""
|
| 332 |
You are a senior data analyst and business intelligence expert. Analyze the provided dataset and write a comprehensive executive-level Markdown report.
|
| 333 |
+
**Dataset Analysis Context:** {json.dumps(enhanced_ctx, indent=2)}
|
| 334 |
+
**Chart Recommendations Available:** {json.dumps(enhanced_ctx.get('recommended_charts', {}), indent=2)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
**Instructions:**
|
| 336 |
1. **Identify Data Domain**: First, determine what type of data this represents (e.g., sales/revenue, healthcare/medical, HR/employee, financial, operational, customer, research, etc.) based on column names and sample data.
|
| 337 |
2. **Executive Summary**: Start with a high-level summary of key findings and business impact.
|
|
|
|
| 339 |
4. **Key Insights**: You must provide exactly 5 key insights, each with its own chart tag.
|
| 340 |
5. **Strategic Recommendations**: Offer concrete, actionable recommendations based on the data.
|
| 341 |
6. **Visual Support**: When a visualization would enhance understanding, insert chart tags like: `<generate_chart: "chart_type | specific description">`
|
| 342 |
+
Valid chart types: bar, pie, line, scatter, hist. Base every chart on actual columns: {cols}
|
|
|
|
|
|
|
|
|
|
| 343 |
**IMPORTANT CHART SELECTION RULES:**
|
| 344 |
- bar: Use when comparing categories with numeric values (requires categorical + numeric columns)
|
| 345 |
- pie: Use for proportional breakdowns with few categories (<7) (requires categorical + numeric columns)
|
| 346 |
- line: Use for time series, trends, or sequential data (requires numeric columns, preferably with time/sequence)
|
| 347 |
- scatter: Use for correlation analysis between two numeric variables (requires 2+ numeric columns)
|
| 348 |
- hist: Use for distribution analysis of a single numeric variable (requires 1 numeric column)
|
|
|
|
| 349 |
**Data-Driven Chart Suggestions:**
|
| 350 |
{chr(10).join([f" - {chart_type}: {description}" for chart_type, description in enhanced_ctx.get('recommended_charts', {}).items()])}
|
|
|
|
| 351 |
7. **Format Requirements**:
|
| 352 |
+
- Use professional business language, include relevant metrics and percentages, structure with clear headers, and end with ## Next Steps section.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
**Domain-Specific Focus Areas:**
|
| 354 |
+
- If sales data: focus on revenue trends, customer segments, product performance. If HR data: focus on workforce analytics, retention, performance metrics.
|
| 355 |
+
- If financial data: focus on profitability, cost analysis, financial health. If operational data: focus on efficiency, bottlenecks, process optimization.
|
| 356 |
+
- If customer data: focus on behavior patterns, satisfaction, churn analysis.
|
|
|
|
|
|
|
|
|
|
| 357 |
Generate insights that would be valuable to C-level executives and department heads. Ensure all charts use real data columns and appropriate chart types.
|
| 358 |
"""
|
|
|
|
| 359 |
md = llm.invoke(report_prompt).content
|
| 360 |
chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
|
| 361 |
+
chart_paths = {}; chart_generator = create_chart_generator(llm, df)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
for desc in chart_descs:
|
| 363 |
with st.spinner(f"Generating chart: {desc}..."):
|
| 364 |
img_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
|
| 365 |
try:
|
|
|
|
| 366 |
chart_spec = chart_generator.generate_chart_spec(desc)
|
| 367 |
+
if execute_chart_spec(chart_spec, df, img_path): chart_paths[desc] = str(img_path)
|
| 368 |
+
except Exception as e: print(f"Failed to generate chart: {desc}, {e}")
|
| 369 |
+
pdf_bytes = build_pdf(md, chart_paths)
|
| 370 |
+
return {"type": "report", "key": key, "raw_md": md, "charts": chart_paths, "pdf": pdf_bytes}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
|
| 372 |
def build_story_prompt(ctx_dict):
|
|
|
|
| 373 |
enhanced_ctx = enhance_data_context(pd.DataFrame(ctx_dict.get("full_dataframe", [])), ctx_dict)
|
| 374 |
cols = ", ".join(enhanced_ctx["columns"][:6])
|
|
|
|
| 375 |
return f"""
|
| 376 |
You are a professional business storyteller and data analyst. You must create a script with exactly {VIDEO_SCENES} scenes, each separated by '[SCENE_BREAK]'.
|
| 377 |
+
**Enhanced Dataset Context:** {json.dumps(enhanced_ctx, indent=2)}
|
| 378 |
+
**Available Chart Types and Recommendations:** {json.dumps(enhanced_ctx.get('recommended_charts', {}), indent=2)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
**Task Requirements:**
|
| 380 |
1. **Identify the Data Story**: Determine what business domain this data represents and what story it tells
|
| 381 |
2. **Create {VIDEO_SCENES} distinct scenes** that build a logical narrative arc
|
| 382 |
+
3. **Each scene must contain:** 1-2 sentences of clear, professional narration and exactly one chart tag: `<generate_chart: "chart_type | specific description">`
|
|
|
|
|
|
|
|
|
|
| 383 |
**ENHANCED Chart Guidelines:**
|
| 384 |
+
- Valid types: bar, pie, line, scatter, hist. Base all charts on actual columns: {cols}.
|
|
|
|
| 385 |
- **USE RECOMMENDED CHARTS**: {list(enhanced_ctx.get('recommended_charts', {}).keys())}
|
| 386 |
+
- Choose chart types that best tell the story and match the data.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
**Data-Driven Chart Selection:**
|
| 388 |
+
- Numeric columns available: {enhanced_ctx.get('numeric_columns', [])}. Categorical columns available: {enhanced_ctx.get('categorical_columns', [])}.
|
| 389 |
+
- Correlation opportunities: {len(enhanced_ctx.get('data_insights', {}).get('correlation_pairs', []))} strong correlations found.
|
| 390 |
+
- Time series potential: {enhanced_ctx.get('data_insights', {}).get('has_time_series', False)}.
|
| 391 |
+
**Narrative Structure:** Scene 1: Set the context. Middle scenes: Develop insights. Final scene: Conclude with takeaways.
|
| 392 |
+
**Content Standards:** Use conversational, executive-level language. Include specific data insights. Avoid chart descriptions in narration. Focus on business impact.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
**Output Format:** Separate each scene with exactly [SCENE_BREAK]
|
|
|
|
|
|
|
|
|
|
| 394 |
Create a compelling, data-driven story that executives would find engaging and actionable, using charts that actually make sense for the data structure.
|
| 395 |
"""
|
| 396 |
|
| 397 |
def generate_video(buf: bytes, name: str, ctx: str, key: str):
|
| 398 |
+
"""FIXED: Generates video with reliable charts and perfect audio sync."""
|
| 399 |
+
try: subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
|
| 400 |
+
except Exception: st.error("🔴 FFmpeg not available — cannot render video."); return None
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
df, err = load_dataframe_safely(buf, name)
|
| 403 |
+
if err: st.error(err); return None
|
|
|
|
|
|
|
| 404 |
|
| 405 |
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.2)
|
| 406 |
+
ctx_dict = {"shape": df.shape, "columns": list(df.columns), "user_ctx": ctx or "General business analysis", "full_dataframe": df.to_dict("records")}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
script = llm.invoke(build_story_prompt(ctx_dict)).content
|
| 408 |
scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
|
| 409 |
|
|
|
|
|
|
|
|
|
|
| 410 |
video_parts, audio_parts, temps = [], [], []
|
| 411 |
for idx, sc in enumerate(scenes[:VIDEO_SCENES]):
|
| 412 |
st.progress((idx + 1) / VIDEO_SCENES, text=f"Rendering Scene {idx + 1}/{VIDEO_SCENES}")
|
| 413 |
descs, narrative = extract_chart_tags(sc), clean_narration(sc)
|
| 414 |
+
|
| 415 |
audio_bytes, _ = deepgram_tts(narrative)
|
| 416 |
mp3 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp3"
|
| 417 |
+
if audio_bytes:
|
| 418 |
+
mp3.write_bytes(audio_bytes); dur = audio_duration(str(mp3))
|
| 419 |
+
if dur <= 0.1: dur = 5.0
|
| 420 |
+
else:
|
| 421 |
+
dur = 5.0; generate_silence_mp3(dur, mp3)
|
| 422 |
audio_parts.append(str(mp3)); temps.append(mp3)
|
| 423 |
|
| 424 |
mp4 = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.mp4"
|
|
|
|
| 429 |
animate_image_fade(img_cv, dur, mp4)
|
| 430 |
video_parts.append(str(mp4)); temps.append(mp4)
|
| 431 |
|
| 432 |
+
silent_vid = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}_v.mp4"
|
| 433 |
+
audio_mix = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}_a.mp3"
|
| 434 |
concat_media(video_parts, silent_vid, "video")
|
| 435 |
concat_media(audio_parts, audio_mix, "audio")
|
| 436 |
+
|
| 437 |
final_vid = Path(tempfile.gettempdir()) / f"{key}.mp4"
|
| 438 |
+
if not (silent_vid.exists() and audio_mix.exists()):
|
| 439 |
+
st.error("Media concatenation failed. Cannot create final video."); return None
|
| 440 |
+
|
| 441 |
+
# FIXED: Final merge with robust flags for perfect sync
|
| 442 |
subprocess.run(
|
| 443 |
+
["ffmpeg", "-y", "-i", str(silent_vid), "-i", str(audio_mix),
|
| 444 |
+
"-c:v", "libx264", "-pix_fmt", "yuv420p", "-c:a", "aac",
|
| 445 |
+
"-map", "0:v:0", "-map", "1:a:0", "-shortest", str(final_vid)],
|
| 446 |
check=True, capture_output=True,
|
| 447 |
)
|
| 448 |
for p in temps + [silent_vid, audio_mix]: p.unlink(missing_ok=True)
|
| 449 |
return str(final_vid)
|
| 450 |
|
| 451 |
+
# ─── UI & MAIN WORKFLOW (Unchanged) ──────────────────────────────────────
|
|
|
|
| 452 |
mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
|
|
|
|
| 453 |
upl = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"])
|
| 454 |
if upl:
|
| 455 |
df_prev, _ = load_dataframe_safely(upl.getvalue(), upl.name)
|
| 456 |
+
with st.expander("📊 Data Preview"): st.dataframe(arrow_df(df_prev.head()))
|
|
|
|
|
|
|
| 457 |
ctx = st.text_area("Business context or specific instructions (optional)")
|
| 458 |
|
|
|
|
| 459 |
if st.button("🚀 Generate", type="primary", disabled=not upl):
|
| 460 |
key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
|
| 461 |
+
st.session_state.bundle = None
|
|
|
|
| 462 |
if mode == "Report (PDF)":
|
| 463 |
with st.spinner("Generating full report and charts... Please wait."):
|
| 464 |
bundle = generate_report_bundle(upl.getvalue(), upl.name, ctx, key)
|
| 465 |
st.session_state.bundle = bundle
|
| 466 |
+
else:
|
|
|
|
| 467 |
bundle_path = generate_video(upl.getvalue(), upl.name, ctx, key)
|
| 468 |
+
if bundle_path: st.session_state.bundle = {"type": "video", "video_path": bundle_path, "key": key}
|
| 469 |
+
st.rerun()
|
|
|
|
| 470 |
|
|
|
|
| 471 |
if (bundle := st.session_state.get("bundle")):
|
| 472 |
if bundle.get("type") == "report":
|
| 473 |
st.subheader("📄 Generated Report")
|
| 474 |
with st.expander("View Report", expanded=True):
|
| 475 |
+
report_md, charts = bundle["raw_md"], bundle["charts"]
|
|
|
|
|
|
|
|
|
|
| 476 |
last_end = 0
|
| 477 |
for match in TAG_RE.finditer(report_md):
|
|
|
|
| 478 |
st.markdown(report_md[last_end:match.start()])
|
|
|
|
|
|
|
| 479 |
desc = match.group("d").strip()
|
| 480 |
+
if (chart_path := charts.get(desc)) and Path(chart_path).exists(): st.image(chart_path)
|
| 481 |
+
else: st.warning(f"Could not render chart: '{desc}'")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
last_end = match.end()
|
|
|
|
|
|
|
| 483 |
st.markdown(report_md[last_end:])
|
|
|
|
| 484 |
c1, c2 = st.columns(2)
|
| 485 |
+
if bundle.get("pdf"): c1.download_button("Download PDF", bundle["pdf"], f"report_{bundle['key'][:8]}.pdf", "application/pdf", use_container_width=True)
|
| 486 |
+
if DG_KEY and c2.button("🔊 Narrate Summary", key=f"aud_{bundle['key']}"):
|
| 487 |
+
audio, mime = deepgram_tts(re.sub(r"<[^>]+>", "", bundle["raw_md"]))
|
| 488 |
+
if audio: st.audio(audio, format=mime)
|
| 489 |
+
else: st.error("Narration failed.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
elif bundle.get("type") == "video":
|
| 491 |
st.subheader("🎬 Generated Video Narrative")
|
| 492 |
+
if (vp := bundle.get("video_path")) and Path(vp).exists():
|
|
|
|
| 493 |
with open(vp, "rb") as f: st.video(f.read())
|
| 494 |
+
with open(vp, "rb") as f: st.download_button("Download Video", f, f"narrative_{bundle['key'][:8]}.mp4", "video/mp4")
|
| 495 |
+
else: st.error("Video file missing – generation may have failed.")
|
|
|
|
|
|