rairo commited on
Commit
b1c71e5
·
verified ·
1 Parent(s): ec8c9c1

Update sozo_gen.py

Browse files
Files changed (1) hide show
  1. sozo_gen.py +38 -35
sozo_gen.py CHANGED
@@ -500,9 +500,9 @@ def get_augmented_context(df: pd.DataFrame, user_ctx: str) -> Dict:
500
  return json.loads(json.dumps(context, default=str))
501
 
502
  def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, bucket):
503
- logging.info(f"Generating two-pass report draft for project {project_id}")
504
  df = load_dataframe_safely(buf, name)
505
- llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.2)
506
 
507
  data_context_str, context_for_charts = "", {}
508
  try:
@@ -522,50 +522,53 @@ def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, b
522
 
523
  md = ""
524
  try:
525
- # --- Pass 1: The Analyst ---
526
- analyst_prompt = f"""
527
- You are a senior data analyst. Your task is to analyze the provided data and create a structured report plan.
528
- Identify 4-5 of the most important, non-overlapping insights from the data.
529
- For each insight, provide a brief summary and the single best chart description tag to visualize it.
530
 
531
  **Data Context:**
532
  {data_context_str}
533
 
534
  **Output Format:**
535
- Return ONLY a valid JSON array of objects. Each object must have two keys: "insight_summary" and "chart_description".
536
- Ensure each chart_description is unique and directly relates to its insight_summary.
537
 
538
  Example:
539
- [
540
- {{ "insight_summary": "Smokers incur significantly higher medical charges.", "chart_description": "bar | Average Charges by Smoker Status" }},
541
- {{ "insight_summary": "Medical charges show a clear positive correlation with Body Mass Index.", "chart_description": "scatter | Charges vs. BMI" }}
542
- ]
543
  """
544
- logging.info("Executing Analyst Pass...")
545
- analyst_response = llm.invoke(analyst_prompt).content.strip()
546
- if analyst_response.startswith("```json"):
547
- analyst_response = analyst_response[7:-3]
548
- report_plan = json.loads(analyst_response)
549
- logging.info(f"Analyst Pass successful. Plan has {len(report_plan)} insights.")
550
-
551
- # --- Pass 2: The Writer ---
552
- writer_prompt = f"""
553
- You are an expert business writer. Your task is to write a flowing, narrative-style report based on the following plan from our data analyst.
554
- For each point in the plan, write a clear, insightful paragraph that explains the finding.
555
- After your explanation, you **must** insert the `chart_description` tag provided in the plan, exactly as it is written in the format `<generate_chart: "the_description">`.
556
- Start with a brief, engaging introduction and end with a short conclusion.
557
-
558
- **Analyst's Plan:**
559
- {json.dumps(report_plan, indent=2)}
560
-
561
- Now, write the complete Markdown report.
 
 
 
 
 
 
 
562
  """
563
- logging.info("Executing Writer Pass...")
564
- md = llm.invoke(writer_prompt).content.strip()
565
- logging.info("Writer Pass successful.")
566
 
567
  except Exception as e:
568
- logging.error(f"Two-pass system failed: {e}. Reverting to single-pass fallback.")
569
  fallback_prompt = f"""
570
  You are an elite data storyteller and business intelligence expert. Your mission is to uncover the compelling, hidden narrative in this dataset and present it as a captivating story in Markdown format that drives action.
571
  **Data Context:** {data_context_str}
 
500
  return json.loads(json.dumps(context, default=str))
501
 
502
  def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, bucket):
503
+ logging.info(f"Generating guided storyteller report draft for project {project_id}")
504
  df = load_dataframe_safely(buf, name)
505
+ llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.25)
506
 
507
  data_context_str, context_for_charts = "", {}
508
  try:
 
522
 
523
  md = ""
524
  try:
525
+ # --- Pass 1: The Visualization Strategist ---
526
+ strategist_prompt = f"""
527
+ You are a data visualization expert. Your task is to create a palette of unique and impactful charts for a data storyteller.
528
+ Based on the provided data context, identify the 4-5 most distinct and insightful stories that can be visualized.
529
+ For each one, provide only the chart description tag.
530
 
531
  **Data Context:**
532
  {data_context_str}
533
 
534
  **Output Format:**
535
+ Return ONLY a valid JSON array of strings. Each string must be a unique chart description tag.
 
536
 
537
  Example:
538
+ ["bar | Average Charges by Smoker Status", "scatter | Charges vs. BMI", "bar | Average Charges by Region"]
 
 
 
539
  """
540
+ logging.info("Executing Visualization Strategist Pass...")
541
+ strategist_response = llm.invoke(strategist_prompt).content.strip()
542
+ if strategist_response.startswith("```json"):
543
+ strategist_response = strategist_response[7:-3]
544
+ chart_palette = json.loads(strategist_response)
545
+ logging.info(f"Strategist Pass successful. Palette has {len(chart_palette)} unique charts.")
546
+
547
+ # --- Pass 2: The Master Storyteller ---
548
+ storyteller_prompt = f"""
549
+ You are an elite data storyteller. Your mission is to write a comprehensive, flowing narrative that analyzes the entire dataset provided. Weave a story that connects different findings, explores nuances, and provides a holistic view.
550
+
551
+ **Data Context:**
552
+ {data_context_str}
553
+
554
+ **Your Toolbox (Most Important):**
555
+ To support your story with visuals, you have been provided with a pre-approved 'palette' of unique charts. As you write your narrative, you **must** integrate each of these chart tags, one time, at the most logical point in the story.
556
+ - You **must** use every chart tag from the provided palette exactly once.
557
+ - Do **not** repeat chart tags.
558
+ - Do **not** invent new chart tags.
559
+ - Insert the tags in the format `<generate_chart: "the_description">`.
560
+
561
+ **Chart Palette:**
562
+ {json.dumps(chart_palette, indent=2)}
563
+
564
+ Now, write the complete, comprehensive Markdown report.
565
  """
566
+ logging.info("Executing Master Storyteller Pass...")
567
+ md = llm.invoke(storyteller_prompt).content.strip()
568
+ logging.info("Master Storyteller Pass successful.")
569
 
570
  except Exception as e:
571
+ logging.error(f"Guided Storyteller system failed: {e}. Reverting to single-pass fallback.")
572
  fallback_prompt = f"""
573
  You are an elite data storyteller and business intelligence expert. Your mission is to uncover the compelling, hidden narrative in this dataset and present it as a captivating story in Markdown format that drives action.
574
  **Data Context:** {data_context_str}