Update sozo_gen.py
Browse files- sozo_gen.py +38 -35
sozo_gen.py
CHANGED
|
@@ -500,9 +500,9 @@ def get_augmented_context(df: pd.DataFrame, user_ctx: str) -> Dict:
|
|
| 500 |
return json.loads(json.dumps(context, default=str))
|
| 501 |
|
| 502 |
def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, bucket):
|
| 503 |
-
logging.info(f"Generating
|
| 504 |
df = load_dataframe_safely(buf, name)
|
| 505 |
-
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.
|
| 506 |
|
| 507 |
data_context_str, context_for_charts = "", {}
|
| 508 |
try:
|
|
@@ -522,50 +522,53 @@ def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, b
|
|
| 522 |
|
| 523 |
md = ""
|
| 524 |
try:
|
| 525 |
-
# --- Pass 1: The
|
| 526 |
-
|
| 527 |
-
You are a
|
| 528 |
-
|
| 529 |
-
For each
|
| 530 |
|
| 531 |
**Data Context:**
|
| 532 |
{data_context_str}
|
| 533 |
|
| 534 |
**Output Format:**
|
| 535 |
-
Return ONLY a valid JSON array of
|
| 536 |
-
Ensure each chart_description is unique and directly relates to its insight_summary.
|
| 537 |
|
| 538 |
Example:
|
| 539 |
-
[
|
| 540 |
-
{{ "insight_summary": "Smokers incur significantly higher medical charges.", "chart_description": "bar | Average Charges by Smoker Status" }},
|
| 541 |
-
{{ "insight_summary": "Medical charges show a clear positive correlation with Body Mass Index.", "chart_description": "scatter | Charges vs. BMI" }}
|
| 542 |
-
]
|
| 543 |
"""
|
| 544 |
-
logging.info("Executing
|
| 545 |
-
|
| 546 |
-
if
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
logging.info(f"
|
| 550 |
-
|
| 551 |
-
# --- Pass 2: The
|
| 552 |
-
|
| 553 |
-
You are an
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
**
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 562 |
"""
|
| 563 |
-
logging.info("Executing
|
| 564 |
-
md = llm.invoke(
|
| 565 |
-
logging.info("
|
| 566 |
|
| 567 |
except Exception as e:
|
| 568 |
-
logging.error(f"
|
| 569 |
fallback_prompt = f"""
|
| 570 |
You are an elite data storyteller and business intelligence expert. Your mission is to uncover the compelling, hidden narrative in this dataset and present it as a captivating story in Markdown format that drives action.
|
| 571 |
**Data Context:** {data_context_str}
|
|
|
|
| 500 |
return json.loads(json.dumps(context, default=str))
|
| 501 |
|
| 502 |
def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, bucket):
|
| 503 |
+
logging.info(f"Generating guided storyteller report draft for project {project_id}")
|
| 504 |
df = load_dataframe_safely(buf, name)
|
| 505 |
+
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.25)
|
| 506 |
|
| 507 |
data_context_str, context_for_charts = "", {}
|
| 508 |
try:
|
|
|
|
| 522 |
|
| 523 |
md = ""
|
| 524 |
try:
|
| 525 |
+
# --- Pass 1: The Visualization Strategist ---
|
| 526 |
+
strategist_prompt = f"""
|
| 527 |
+
You are a data visualization expert. Your task is to create a palette of unique and impactful charts for a data storyteller.
|
| 528 |
+
Based on the provided data context, identify the 4-5 most distinct and insightful stories that can be visualized.
|
| 529 |
+
For each one, provide only the chart description tag.
|
| 530 |
|
| 531 |
**Data Context:**
|
| 532 |
{data_context_str}
|
| 533 |
|
| 534 |
**Output Format:**
|
| 535 |
+
Return ONLY a valid JSON array of strings. Each string must be a unique chart description tag.
|
|
|
|
| 536 |
|
| 537 |
Example:
|
| 538 |
+
["bar | Average Charges by Smoker Status", "scatter | Charges vs. BMI", "bar | Average Charges by Region"]
|
|
|
|
|
|
|
|
|
|
| 539 |
"""
|
| 540 |
+
logging.info("Executing Visualization Strategist Pass...")
|
| 541 |
+
strategist_response = llm.invoke(strategist_prompt).content.strip()
|
| 542 |
+
if strategist_response.startswith("```json"):
|
| 543 |
+
strategist_response = strategist_response[7:-3]
|
| 544 |
+
chart_palette = json.loads(strategist_response)
|
| 545 |
+
logging.info(f"Strategist Pass successful. Palette has {len(chart_palette)} unique charts.")
|
| 546 |
+
|
| 547 |
+
# --- Pass 2: The Master Storyteller ---
|
| 548 |
+
storyteller_prompt = f"""
|
| 549 |
+
You are an elite data storyteller. Your mission is to write a comprehensive, flowing narrative that analyzes the entire dataset provided. Weave a story that connects different findings, explores nuances, and provides a holistic view.
|
| 550 |
+
|
| 551 |
+
**Data Context:**
|
| 552 |
+
{data_context_str}
|
| 553 |
+
|
| 554 |
+
**Your Toolbox (Most Important):**
|
| 555 |
+
To support your story with visuals, you have been provided with a pre-approved 'palette' of unique charts. As you write your narrative, you **must** integrate each of these chart tags, one time, at the most logical point in the story.
|
| 556 |
+
- You **must** use every chart tag from the provided palette exactly once.
|
| 557 |
+
- Do **not** repeat chart tags.
|
| 558 |
+
- Do **not** invent new chart tags.
|
| 559 |
+
- Insert the tags in the format `<generate_chart: "the_description">`.
|
| 560 |
+
|
| 561 |
+
**Chart Palette:**
|
| 562 |
+
{json.dumps(chart_palette, indent=2)}
|
| 563 |
+
|
| 564 |
+
Now, write the complete, comprehensive Markdown report.
|
| 565 |
"""
|
| 566 |
+
logging.info("Executing Master Storyteller Pass...")
|
| 567 |
+
md = llm.invoke(storyteller_prompt).content.strip()
|
| 568 |
+
logging.info("Master Storyteller Pass successful.")
|
| 569 |
|
| 570 |
except Exception as e:
|
| 571 |
+
logging.error(f"Guided Storyteller system failed: {e}. Reverting to single-pass fallback.")
|
| 572 |
fallback_prompt = f"""
|
| 573 |
You are an elite data storyteller and business intelligence expert. Your mission is to uncover the compelling, hidden narrative in this dataset and present it as a captivating story in Markdown format that drives action.
|
| 574 |
**Data Context:** {data_context_str}
|