Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -179,20 +179,70 @@ def generate_report(buf: bytes, name: str, ctx: str, key: str):
|
|
| 179 |
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
|
| 180 |
google_api_key=API_KEY, temperature=0.1)
|
| 181 |
|
|
|
|
| 182 |
ctx_dict = {
|
| 183 |
"shape": df.shape,
|
| 184 |
"columns": list(df.columns),
|
| 185 |
"user_ctx": ctx or "General business analysis",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
}
|
|
|
|
| 187 |
cols = ", ".join(ctx_dict["columns"][:6])
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
md = llm.invoke(report_prompt).content
|
| 197 |
|
| 198 |
chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
|
|
@@ -205,7 +255,18 @@ def generate_report(buf: bytes, name: str, ctx: str, key: str):
|
|
| 205 |
with st.spinner(f"Generating chart: {d}"):
|
| 206 |
with plt.ioff():
|
| 207 |
try:
|
| 208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
fig = plt.gcf()
|
| 210 |
if fig.axes:
|
| 211 |
p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
|
|
@@ -365,15 +426,63 @@ def concat_media(paths: List[str], out: Path, kind="video"):
|
|
| 365 |
# βββ VIDEO GENERATION ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 366 |
def build_story_prompt(ctx_dict):
|
| 367 |
cols = ", ".join(ctx_dict["columns"][:6])
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
|
| 378 |
|
| 379 |
def generate_video(buf: bytes, name: str, ctx: str, key: str):
|
|
@@ -389,11 +498,16 @@ def generate_video(buf: bytes, name: str, ctx: str, key: str):
|
|
| 389 |
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
|
| 390 |
google_api_key=API_KEY, temperature=0.2)
|
| 391 |
|
|
|
|
| 392 |
ctx_dict = {
|
| 393 |
"shape": df.shape,
|
| 394 |
"columns": list(df.columns),
|
| 395 |
"user_ctx": ctx or "General business analysis",
|
|
|
|
|
|
|
|
|
|
| 396 |
}
|
|
|
|
| 397 |
script = llm.invoke(build_story_prompt(ctx_dict)).content
|
| 398 |
scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
|
| 399 |
|
|
@@ -443,7 +557,6 @@ def generate_video(buf: bytes, name: str, ctx: str, key: str):
|
|
| 443 |
|
| 444 |
return str(final_vid)
|
| 445 |
|
| 446 |
-
|
| 447 |
# βββ UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 448 |
mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
|
| 449 |
|
|
|
|
| 179 |
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
|
| 180 |
google_api_key=API_KEY, temperature=0.1)
|
| 181 |
|
| 182 |
+
# Enhanced context analysis
|
| 183 |
ctx_dict = {
|
| 184 |
"shape": df.shape,
|
| 185 |
"columns": list(df.columns),
|
| 186 |
"user_ctx": ctx or "General business analysis",
|
| 187 |
+
"full_dataframe": df.to_dict('records'),
|
| 188 |
+
"data_types": df.dtypes.to_dict(),
|
| 189 |
+
"missing_values": df.isnull().sum().to_dict(),
|
| 190 |
+
"numeric_summary": df.describe().to_dict() if len(df.select_dtypes(include=['number']).columns) > 0 else {}
|
| 191 |
}
|
| 192 |
+
|
| 193 |
cols = ", ".join(ctx_dict["columns"][:6])
|
| 194 |
+
|
| 195 |
+
# Enhanced report prompt with domain intelligence
|
| 196 |
+
report_prompt = f"""
|
| 197 |
+
You are a senior data analyst and business intelligence expert. Analyze the provided dataset and write a comprehensive executive-level Markdown report.
|
| 198 |
+
|
| 199 |
+
**Dataset Analysis Context:**
|
| 200 |
+
{json.dumps(ctx_dict, indent=2)}
|
| 201 |
+
|
| 202 |
+
**Instructions:**
|
| 203 |
+
1. **Identify Data Domain**: First, determine what type of data this represents (e.g., sales/revenue, healthcare/medical, HR/employee, financial, operational, customer, research, etc.) based on column names and sample data.
|
| 204 |
+
|
| 205 |
+
2. **Executive Summary**: Start with a high-level summary of key findings and business impact.
|
| 206 |
+
|
| 207 |
+
3. **Data Quality Assessment**: Comment on data completeness, any notable missing values, and data reliability.
|
| 208 |
+
|
| 209 |
+
4. **Key Insights**: Provide 4-6 actionable insights specific to the identified domain:
|
| 210 |
+
- Trends and patterns
|
| 211 |
+
- Outliers or anomalies
|
| 212 |
+
- Performance indicators
|
| 213 |
+
- Risk factors or opportunities
|
| 214 |
+
|
| 215 |
+
5. **Strategic Recommendations**: Offer concrete, actionable recommendations based on the data.
|
| 216 |
+
|
| 217 |
+
6. **Visual Support**: When a visualization would enhance understanding, insert chart tags like:
|
| 218 |
+
`<generate_chart: "chart_type | specific description">`
|
| 219 |
+
|
| 220 |
+
Valid chart types: bar, pie, line, scatter, hist
|
| 221 |
+
Base every chart on actual columns: {cols}
|
| 222 |
+
|
| 223 |
+
Choose chart types strategically:
|
| 224 |
+
- bar: for categorical comparisons
|
| 225 |
+
- pie: for proportional breakdowns (when categories < 7)
|
| 226 |
+
- line: for time series or trends
|
| 227 |
+
- scatter: for correlation analysis
|
| 228 |
+
- hist: for distribution analysis
|
| 229 |
+
|
| 230 |
+
7. **Format Requirements**:
|
| 231 |
+
- Use professional business language
|
| 232 |
+
- Include relevant metrics and percentages
|
| 233 |
+
- Structure with clear headers (## Executive Summary, ## Key Insights, etc.)
|
| 234 |
+
- End with ## Next Steps section
|
| 235 |
+
|
| 236 |
+
**Domain-Specific Focus Areas:**
|
| 237 |
+
- If sales data: focus on revenue trends, customer segments, product performance
|
| 238 |
+
- If HR data: focus on workforce analytics, retention, performance metrics
|
| 239 |
+
- If financial data: focus on profitability, cost analysis, financial health
|
| 240 |
+
- If operational data: focus on efficiency, bottlenecks, process optimization
|
| 241 |
+
- If customer data: focus on behavior patterns, satisfaction, churn analysis
|
| 242 |
+
|
| 243 |
+
Generate insights that would be valuable to C-level executives and department heads.
|
| 244 |
+
"""
|
| 245 |
+
|
| 246 |
md = llm.invoke(report_prompt).content
|
| 247 |
|
| 248 |
chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
|
|
|
|
| 255 |
with st.spinner(f"Generating chart: {d}"):
|
| 256 |
with plt.ioff():
|
| 257 |
try:
|
| 258 |
+
# Enhanced chart generation prompt
|
| 259 |
+
chart_prompt = f"""
|
| 260 |
+
Create a professional {d} chart using matplotlib with these requirements:
|
| 261 |
+
1. Use a clean, business-appropriate style
|
| 262 |
+
2. Include proper title, axis labels, and legends
|
| 263 |
+
3. Apply appropriate color schemes (avoid rainbow colors)
|
| 264 |
+
4. Ensure text is readable (font size 10+)
|
| 265 |
+
5. Format numbers appropriately (e.g., currency, percentages)
|
| 266 |
+
6. Save the figure with high quality
|
| 267 |
+
7. Handle any missing or null values appropriately
|
| 268 |
+
"""
|
| 269 |
+
agent.run(chart_prompt)
|
| 270 |
fig = plt.gcf()
|
| 271 |
if fig.axes:
|
| 272 |
p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
|
|
|
|
| 426 |
# βββ VIDEO GENERATION ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 427 |
def build_story_prompt(ctx_dict):
|
| 428 |
cols = ", ".join(ctx_dict["columns"][:6])
|
| 429 |
+
|
| 430 |
+
return f"""
|
| 431 |
+
You are a professional business storyteller and data analyst. Create a compelling script for a {VIDEO_SCENES}-scene business video presentation.
|
| 432 |
+
|
| 433 |
+
**Complete Dataset Context:**
|
| 434 |
+
{json.dumps(ctx_dict, indent=2)}
|
| 435 |
+
|
| 436 |
+
**Task Requirements:**
|
| 437 |
+
1. **Identify the Data Story**: Determine what business domain this data represents and what story it tells
|
| 438 |
+
2. **Create {VIDEO_SCENES} distinct scenes** that build a logical narrative arc
|
| 439 |
+
3. **Each scene must contain:**
|
| 440 |
+
- 1-2 sentences of clear, professional narration (plain English, no jargon)
|
| 441 |
+
- Exactly one chart tag: `<generate_chart: "chart_type | specific description">`
|
| 442 |
+
|
| 443 |
+
**Chart Guidelines:**
|
| 444 |
+
- Valid types: bar, pie, line, scatter, hist
|
| 445 |
+
- Base all charts on actual columns: {cols}
|
| 446 |
+
- Choose chart types that best tell the story:
|
| 447 |
+
* bar: categorical comparisons, rankings
|
| 448 |
+
* pie: proportional breakdowns (β€6 categories)
|
| 449 |
+
* line: trends over time, progression
|
| 450 |
+
* scatter: relationships, correlations
|
| 451 |
+
* hist: distributions, frequency analysis
|
| 452 |
+
|
| 453 |
+
**Narrative Structure:**
|
| 454 |
+
- Scene 1: Set the context and introduce the main story
|
| 455 |
+
- Middle scenes: Develop key insights and supporting evidence
|
| 456 |
+
- Final scene: Conclude with actionable takeaways or future outlook
|
| 457 |
+
|
| 458 |
+
**Content Standards:**
|
| 459 |
+
- Use conversational, executive-level language
|
| 460 |
+
- Include specific data insights (trends, percentages, comparisons)
|
| 461 |
+
- Avoid chart descriptions in narration ("as shown in the chart")
|
| 462 |
+
- Make each scene self-contained but connected to the overall story
|
| 463 |
+
- Focus on business impact and actionable insights
|
| 464 |
+
|
| 465 |
+
**Domain-Specific Approaches:**
|
| 466 |
+
- Sales data: Customer journey, revenue trends, market performance
|
| 467 |
+
- HR data: Workforce insights, talent analytics, organizational health
|
| 468 |
+
- Financial data: Performance indicators, cost analysis, profitability
|
| 469 |
+
- Operational data: Process efficiency, bottlenecks, optimization opportunities
|
| 470 |
+
- Customer data: Behavior patterns, satisfaction trends, retention analysis
|
| 471 |
+
|
| 472 |
+
**Output Format:**
|
| 473 |
+
Separate each scene with exactly [SCENE_BREAK]
|
| 474 |
+
|
| 475 |
+
**Example Structure:**
|
| 476 |
+
Our company's data reveals fascinating insights about market performance over the past year. Let's explore what the numbers tell us about our growth trajectory.
|
| 477 |
+
<generate_chart: "line | monthly revenue growth over 12 months">
|
| 478 |
+
|
| 479 |
+
[SCENE_BREAK]
|
| 480 |
+
|
| 481 |
+
Customer acquisition has shown remarkable patterns, with certain segments driving significantly more value than others. The data shows a clear preference emerging in our target markets.
|
| 482 |
+
<generate_chart: "bar | customer acquisition by segment">
|
| 483 |
+
|
| 484 |
+
Create a compelling, data-driven story that executives would find engaging and actionable.
|
| 485 |
+
"""
|
| 486 |
|
| 487 |
|
| 488 |
def generate_video(buf: bytes, name: str, ctx: str, key: str):
|
|
|
|
| 498 |
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
|
| 499 |
google_api_key=API_KEY, temperature=0.2)
|
| 500 |
|
| 501 |
+
# Enhanced context with complete data insights
|
| 502 |
ctx_dict = {
|
| 503 |
"shape": df.shape,
|
| 504 |
"columns": list(df.columns),
|
| 505 |
"user_ctx": ctx or "General business analysis",
|
| 506 |
+
"full_dataframe": df.to_dict('records'),
|
| 507 |
+
"data_types": df.dtypes.to_dict(),
|
| 508 |
+
"numeric_summary": df.describe().to_dict() if len(df.select_dtypes(include=['number']).columns) > 0 else {}
|
| 509 |
}
|
| 510 |
+
|
| 511 |
script = llm.invoke(build_story_prompt(ctx_dict)).content
|
| 512 |
scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
|
| 513 |
|
|
|
|
| 557 |
|
| 558 |
return str(final_vid)
|
| 559 |
|
|
|
|
| 560 |
# βββ UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 561 |
mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)
|
| 562 |
|