rairo commited on
Commit
a2bd41d
Β·
verified Β·
1 Parent(s): 26532f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -19
app.py CHANGED
@@ -179,20 +179,70 @@ def generate_report(buf: bytes, name: str, ctx: str, key: str):
179
  llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
180
  google_api_key=API_KEY, temperature=0.1)
181
 
 
182
  ctx_dict = {
183
  "shape": df.shape,
184
  "columns": list(df.columns),
185
  "user_ctx": ctx or "General business analysis",
 
 
 
 
186
  }
 
187
  cols = ", ".join(ctx_dict["columns"][:6])
188
- report_prompt = (
189
- "You are a senior business analyst. Write an executive-level Markdown report "
190
- "with insights & recommendations.\n"
191
- 'When a visual is helpful, insert a tag like <generate_chart: "pie | sales by region"> '
192
- "(chart_type first, then a description). Valid chart types: bar, pie, line, scatter, hist.\n"
193
- f"Base every chart on columns ({cols}) from the dataset.\n"
194
- f"Data context:\n{json.dumps(ctx_dict, indent=2)}"
195
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  md = llm.invoke(report_prompt).content
197
 
198
  chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
@@ -205,7 +255,18 @@ def generate_report(buf: bytes, name: str, ctx: str, key: str):
205
  with st.spinner(f"Generating chart: {d}"):
206
  with plt.ioff():
207
  try:
208
- agent.run(f"Create a {d} with Matplotlib and save.")
 
 
 
 
 
 
 
 
 
 
 
209
  fig = plt.gcf()
210
  if fig.axes:
211
  p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
@@ -365,15 +426,63 @@ def concat_media(paths: List[str], out: Path, kind="video"):
365
  # ─── VIDEO GENERATION ──────────────────────────────────────────────────────
366
  def build_story_prompt(ctx_dict):
367
  cols = ", ".join(ctx_dict["columns"][:6])
368
- return (
369
- f"Create a script for a short business video with exactly {VIDEO_SCENES} scenes.\n"
370
- "Each scene must include:\n"
371
- "β€’ 1–2 sentences of narration (no scene labels, no chart descriptions).\n"
372
- 'β€’ Exactly one chart tag, e.g. <generate_chart: "bar | total revenue by month">.\n'
373
- "Valid chart types: bar, pie, line, scatter, hist.\n"
374
- f"Use the dataset columns ({cols}) with sensible aggregations.\n"
375
- "Separate scenes with [SCENE_BREAK]."
376
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
 
378
 
379
  def generate_video(buf: bytes, name: str, ctx: str, key: str):
@@ -389,11 +498,16 @@ def generate_video(buf: bytes, name: str, ctx: str, key: str):
389
  llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
390
  google_api_key=API_KEY, temperature=0.2)
391
 
 
392
  ctx_dict = {
393
  "shape": df.shape,
394
  "columns": list(df.columns),
395
  "user_ctx": ctx or "General business analysis",
 
 
 
396
  }
 
397
  script = llm.invoke(build_story_prompt(ctx_dict)).content
398
  scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
399
 
@@ -443,7 +557,6 @@ def generate_video(buf: bytes, name: str, ctx: str, key: str):
443
 
444
  return str(final_vid)
445
 
446
-
447
  # ─── UI ─────────────────────────────────────────────────────────────────────
448
  mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
449
 
 
179
  llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
180
  google_api_key=API_KEY, temperature=0.1)
181
 
182
+ # Enhanced context analysis
183
  ctx_dict = {
184
  "shape": df.shape,
185
  "columns": list(df.columns),
186
  "user_ctx": ctx or "General business analysis",
187
+ "full_dataframe": df.to_dict('records'),
188
+ "data_types": df.dtypes.to_dict(),
189
+ "missing_values": df.isnull().sum().to_dict(),
190
+ "numeric_summary": df.describe().to_dict() if len(df.select_dtypes(include=['number']).columns) > 0 else {}
191
  }
192
+
193
  cols = ", ".join(ctx_dict["columns"][:6])
194
+
195
+ # Enhanced report prompt with domain intelligence
196
+ report_prompt = f"""
197
+ You are a senior data analyst and business intelligence expert. Analyze the provided dataset and write a comprehensive executive-level Markdown report.
198
+
199
+ **Dataset Analysis Context:**
200
+ {json.dumps(ctx_dict, indent=2)}
201
+
202
+ **Instructions:**
203
+ 1. **Identify Data Domain**: First, determine what type of data this represents (e.g., sales/revenue, healthcare/medical, HR/employee, financial, operational, customer, research, etc.) based on column names and sample data.
204
+
205
+ 2. **Executive Summary**: Start with a high-level summary of key findings and business impact.
206
+
207
+ 3. **Data Quality Assessment**: Comment on data completeness, any notable missing values, and data reliability.
208
+
209
+ 4. **Key Insights**: Provide 4-6 actionable insights specific to the identified domain:
210
+ - Trends and patterns
211
+ - Outliers or anomalies
212
+ - Performance indicators
213
+ - Risk factors or opportunities
214
+
215
+ 5. **Strategic Recommendations**: Offer concrete, actionable recommendations based on the data.
216
+
217
+ 6. **Visual Support**: When a visualization would enhance understanding, insert chart tags like:
218
+ `<generate_chart: "chart_type | specific description">`
219
+
220
+ Valid chart types: bar, pie, line, scatter, hist
221
+ Base every chart on actual columns: {cols}
222
+
223
+ Choose chart types strategically:
224
+ - bar: for categorical comparisons
225
+ - pie: for proportional breakdowns (when categories < 7)
226
+ - line: for time series or trends
227
+ - scatter: for correlation analysis
228
+ - hist: for distribution analysis
229
+
230
+ 7. **Format Requirements**:
231
+ - Use professional business language
232
+ - Include relevant metrics and percentages
233
+ - Structure with clear headers (## Executive Summary, ## Key Insights, etc.)
234
+ - End with ## Next Steps section
235
+
236
+ **Domain-Specific Focus Areas:**
237
+ - If sales data: focus on revenue trends, customer segments, product performance
238
+ - If HR data: focus on workforce analytics, retention, performance metrics
239
+ - If financial data: focus on profitability, cost analysis, financial health
240
+ - If operational data: focus on efficiency, bottlenecks, process optimization
241
+ - If customer data: focus on behavior patterns, satisfaction, churn analysis
242
+
243
+ Generate insights that would be valuable to C-level executives and department heads.
244
+ """
245
+
246
  md = llm.invoke(report_prompt).content
247
 
248
  chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
 
255
  with st.spinner(f"Generating chart: {d}"):
256
  with plt.ioff():
257
  try:
258
+ # Enhanced chart generation prompt
259
+ chart_prompt = f"""
260
+ Create a professional {d} chart using matplotlib with these requirements:
261
+ 1. Use a clean, business-appropriate style
262
+ 2. Include proper title, axis labels, and legends
263
+ 3. Apply appropriate color schemes (avoid rainbow colors)
264
+ 4. Ensure text is readable (font size 10+)
265
+ 5. Format numbers appropriately (e.g., currency, percentages)
266
+ 6. Save the figure with high quality
267
+ 7. Handle any missing or null values appropriately
268
+ """
269
+ agent.run(chart_prompt)
270
  fig = plt.gcf()
271
  if fig.axes:
272
  p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
 
426
  # ─── VIDEO GENERATION ──────────────────────────────────────────────────────
427
  def build_story_prompt(ctx_dict):
428
  cols = ", ".join(ctx_dict["columns"][:6])
429
+
430
+ return f"""
431
+ You are a professional business storyteller and data analyst. Create a compelling script for a {VIDEO_SCENES}-scene business video presentation.
432
+
433
+ **Complete Dataset Context:**
434
+ {json.dumps(ctx_dict, indent=2)}
435
+
436
+ **Task Requirements:**
437
+ 1. **Identify the Data Story**: Determine what business domain this data represents and what story it tells
438
+ 2. **Create {VIDEO_SCENES} distinct scenes** that build a logical narrative arc
439
+ 3. **Each scene must contain:**
440
+ - 1-2 sentences of clear, professional narration (plain English, no jargon)
441
+ - Exactly one chart tag: `<generate_chart: "chart_type | specific description">`
442
+
443
+ **Chart Guidelines:**
444
+ - Valid types: bar, pie, line, scatter, hist
445
+ - Base all charts on actual columns: {cols}
446
+ - Choose chart types that best tell the story:
447
+ * bar: categorical comparisons, rankings
448
+ * pie: proportional breakdowns (≀6 categories)
449
+ * line: trends over time, progression
450
+ * scatter: relationships, correlations
451
+ * hist: distributions, frequency analysis
452
+
453
+ **Narrative Structure:**
454
+ - Scene 1: Set the context and introduce the main story
455
+ - Middle scenes: Develop key insights and supporting evidence
456
+ - Final scene: Conclude with actionable takeaways or future outlook
457
+
458
+ **Content Standards:**
459
+ - Use conversational, executive-level language
460
+ - Include specific data insights (trends, percentages, comparisons)
461
+ - Avoid chart descriptions in narration ("as shown in the chart")
462
+ - Make each scene self-contained but connected to the overall story
463
+ - Focus on business impact and actionable insights
464
+
465
+ **Domain-Specific Approaches:**
466
+ - Sales data: Customer journey, revenue trends, market performance
467
+ - HR data: Workforce insights, talent analytics, organizational health
468
+ - Financial data: Performance indicators, cost analysis, profitability
469
+ - Operational data: Process efficiency, bottlenecks, optimization opportunities
470
+ - Customer data: Behavior patterns, satisfaction trends, retention analysis
471
+
472
+ **Output Format:**
473
+ Separate each scene with exactly [SCENE_BREAK]
474
+
475
+ **Example Structure:**
476
+ Our company's data reveals fascinating insights about market performance over the past year. Let's explore what the numbers tell us about our growth trajectory.
477
+ <generate_chart: "line | monthly revenue growth over 12 months">
478
+
479
+ [SCENE_BREAK]
480
+
481
+ Customer acquisition has shown remarkable patterns, with certain segments driving significantly more value than others. The data shows a clear preference emerging in our target markets.
482
+ <generate_chart: "bar | customer acquisition by segment">
483
+
484
+ Create a compelling, data-driven story that executives would find engaging and actionable.
485
+ """
486
 
487
 
488
  def generate_video(buf: bytes, name: str, ctx: str, key: str):
 
498
  llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
499
  google_api_key=API_KEY, temperature=0.2)
500
 
501
+ # Enhanced context with complete data insights
502
  ctx_dict = {
503
  "shape": df.shape,
504
  "columns": list(df.columns),
505
  "user_ctx": ctx or "General business analysis",
506
+ "full_dataframe": df.to_dict('records'),
507
+ "data_types": df.dtypes.to_dict(),
508
+ "numeric_summary": df.describe().to_dict() if len(df.select_dtypes(include=['number']).columns) > 0 else {}
509
  }
510
+
511
  script = llm.invoke(build_story_prompt(ctx_dict)).content
512
  scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
513
 
 
557
 
558
  return str(final_vid)
559
 
 
560
  # ─── UI ─────────────────────────────────────────────────────────────────────
561
  mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
562