Update sozo_gen.py
Browse files- sozo_gen.py +19 -7
sozo_gen.py
CHANGED
|
@@ -502,7 +502,7 @@ def get_augmented_context(df: pd.DataFrame, user_ctx: str) -> Dict:
|
|
| 502 |
def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, bucket):
|
| 503 |
logging.info(f"Generating guided storyteller report draft for project {project_id}")
|
| 504 |
df = load_dataframe_safely(buf, name)
|
| 505 |
-
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.
|
| 506 |
|
| 507 |
data_context_str, context_for_charts = "", {}
|
| 508 |
try:
|
|
@@ -522,20 +522,27 @@ def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, b
|
|
| 522 |
|
| 523 |
md = ""
|
| 524 |
try:
|
| 525 |
-
# --- Pass 1: The Visualization Strategist ---
|
| 526 |
strategist_prompt = f"""
|
| 527 |
-
You are a data visualization expert. Your task is to create a palette of unique and impactful charts for a data storyteller.
|
| 528 |
Based on the provided data context, identify the 4-5 most distinct and insightful stories that can be visualized.
|
| 529 |
-
For each one, provide only the chart description tag.
|
| 530 |
|
| 531 |
**Data Context:**
|
| 532 |
{data_context_str}
|
| 533 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 534 |
**Output Format:**
|
| 535 |
Return ONLY a valid JSON array of strings. Each string must be a unique chart description tag.
|
| 536 |
|
| 537 |
Example:
|
| 538 |
-
["bar | Average Charges by Smoker Status", "scatter | Charges vs. BMI", "
|
| 539 |
"""
|
| 540 |
logging.info("Executing Visualization Strategist Pass...")
|
| 541 |
strategist_response = llm.invoke(strategist_prompt).content.strip()
|
|
@@ -544,13 +551,18 @@ def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, b
|
|
| 544 |
chart_palette = json.loads(strategist_response)
|
| 545 |
logging.info(f"Strategist Pass successful. Palette has {len(chart_palette)} unique charts.")
|
| 546 |
|
| 547 |
-
# --- Pass 2: The Master Storyteller ---
|
| 548 |
storyteller_prompt = f"""
|
| 549 |
-
You are an elite data storyteller. Your mission is to write a comprehensive, flowing narrative that analyzes the entire dataset provided.
|
| 550 |
|
| 551 |
**Data Context:**
|
| 552 |
{data_context_str}
|
| 553 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
**Your Toolbox (Most Important):**
|
| 555 |
To support your story with visuals, you have been provided with a pre-approved 'palette' of unique charts. As you write your narrative, you **must** integrate each of these chart tags, one time, at the most logical point in the story.
|
| 556 |
- You **must** use every chart tag from the provided palette exactly once.
|
|
|
|
| 502 |
def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, bucket):
|
| 503 |
logging.info(f"Generating guided storyteller report draft for project {project_id}")
|
| 504 |
df = load_dataframe_safely(buf, name)
|
| 505 |
+
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.3)
|
| 506 |
|
| 507 |
data_context_str, context_for_charts = "", {}
|
| 508 |
try:
|
|
|
|
| 522 |
|
| 523 |
md = ""
|
| 524 |
try:
|
| 525 |
+
# --- Pass 1: The "Visualization Strategist" ---
|
| 526 |
strategist_prompt = f"""
|
| 527 |
+
You are a data visualization expert. Your task is to create a diverse palette of unique and impactful charts for a data storyteller.
|
| 528 |
Based on the provided data context, identify the 4-5 most distinct and insightful stories that can be visualized.
|
|
|
|
| 529 |
|
| 530 |
**Data Context:**
|
| 531 |
{data_context_str}
|
| 532 |
|
| 533 |
+
**Your Goal:**
|
| 534 |
+
Your primary goal is to select a **diverse palette of chart types**. A high-quality response will use a mix of different charts from the available list to create a visually engaging and comprehensive report. **Do not use the same chart type more than twice.**
|
| 535 |
+
|
| 536 |
+
**Strategic Hints:**
|
| 537 |
+
- Consider a `histogram` to show the distribution of a key variable (like age or bmi).
|
| 538 |
+
- Consider a `pie chart` for a clear part-to-whole relationship (e.g., smoker vs. non-smoker proportions).
|
| 539 |
+
- Consider a `heatmap` if the dataset has multiple numeric columns and you believe the overall pattern of their correlations is a key insight in itself.
|
| 540 |
+
|
| 541 |
**Output Format:**
|
| 542 |
Return ONLY a valid JSON array of strings. Each string must be a unique chart description tag.
|
| 543 |
|
| 544 |
Example:
|
| 545 |
+
["bar | Average Charges by Smoker Status", "scatter | Charges vs. BMI", "hist | Distribution of Beneficiary Ages", "pie | Regional Proportions"]
|
| 546 |
"""
|
| 547 |
logging.info("Executing Visualization Strategist Pass...")
|
| 548 |
strategist_response = llm.invoke(strategist_prompt).content.strip()
|
|
|
|
| 551 |
chart_palette = json.loads(strategist_response)
|
| 552 |
logging.info(f"Strategist Pass successful. Palette has {len(chart_palette)} unique charts.")
|
| 553 |
|
| 554 |
+
# --- Pass 2: The "Master Storyteller" ---
|
| 555 |
storyteller_prompt = f"""
|
| 556 |
+
You are an elite data storyteller and business intelligence expert. Your mission is to write a comprehensive, flowing narrative that analyzes the entire dataset provided. Your goal is to create a captivating story that **drives action**.
|
| 557 |
|
| 558 |
**Data Context:**
|
| 559 |
{data_context_str}
|
| 560 |
|
| 561 |
+
**Narrative Construction Guidelines:**
|
| 562 |
+
1. **Use Compelling Headers:** Structure your report with multiple sections using Markdown headings (`##` or `###`). Do not write one long block of text. Create curiosity with your headers (e.g., 'The Smoking Premium: A Costly Habit', 'Geographic Hotspots: Where Charges Are Highest').
|
| 563 |
+
2. **Weave a Story:** Don't just describe the charts one by one. Connect the findings together. For example, how does 'age' relate to 'smoker status' and how do they both impact 'charges'?
|
| 564 |
+
3. **Drive to Action:** Conclude your report with a dedicated section titled `## Actionable Recommendations`. Based on your analysis, provide specific, data-driven suggestions that a business leader could implement.
|
| 565 |
+
|
| 566 |
**Your Toolbox (Most Important):**
|
| 567 |
To support your story with visuals, you have been provided with a pre-approved 'palette' of unique charts. As you write your narrative, you **must** integrate each of these chart tags, one time, at the most logical point in the story.
|
| 568 |
- You **must** use every chart tag from the provided palette exactly once.
|