sbs-API

Build error

App Files Files Community

rairo commited on Jul 19, 2025

Commit

b1c71e5

verified ·

1 Parent(s): ec8c9c1

Update sozo_gen.py

Browse files

Files changed (1) hide show

sozo_gen.py +38 -35

sozo_gen.py CHANGED Viewed

@@ -500,9 +500,9 @@ def get_augmented_context(df: pd.DataFrame, user_ctx: str) -> Dict:
     return json.loads(json.dumps(context, default=str))
 def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, bucket):
-    logging.info(f"Generating two-pass report draft for project {project_id}")
     df = load_dataframe_safely(buf, name)
-    llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.2)
     data_context_str, context_for_charts = "", {}
     try:
@@ -522,50 +522,53 @@ def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, b
     md = ""
     try:
-        # --- Pass 1: The Analyst ---
-        analyst_prompt = f"""
-        You are a senior data analyst. Your task is to analyze the provided data and create a structured report plan.
-        Identify 4-5 of the most important, non-overlapping insights from the data.
-        For each insight, provide a brief summary and the single best chart description tag to visualize it.
         **Data Context:**
         {data_context_str}
         **Output Format:**
-        Return ONLY a valid JSON array of objects. Each object must have two keys: "insight_summary" and "chart_description".
-        Ensure each chart_description is unique and directly relates to its insight_summary.
         Example:
-        [
-          {{ "insight_summary": "Smokers incur significantly higher medical charges.", "chart_description": "bar | Average Charges by Smoker Status" }},
-          {{ "insight_summary": "Medical charges show a clear positive correlation with Body Mass Index.", "chart_description": "scatter | Charges vs. BMI" }}
-        ]
         """
-        logging.info("Executing Analyst Pass...")
-        analyst_response = llm.invoke(analyst_prompt).content.strip()
-        if analyst_response.startswith("```json"):
-            analyst_response = analyst_response[7:-3]
-        report_plan = json.loads(analyst_response)
-        logging.info(f"Analyst Pass successful. Plan has {len(report_plan)} insights.")
-        # --- Pass 2: The Writer ---
-        writer_prompt = f"""
-        You are an expert business writer. Your task is to write a flowing, narrative-style report based on the following plan from our data analyst.
-        For each point in the plan, write a clear, insightful paragraph that explains the finding.
-        After your explanation, you **must** insert the `chart_description` tag provided in the plan, exactly as it is written in the format `<generate_chart: "the_description">`.
-        Start with a brief, engaging introduction and end with a short conclusion.
-        **Analyst's Plan:**
-        {json.dumps(report_plan, indent=2)}
-        Now, write the complete Markdown report.
         """
-        logging.info("Executing Writer Pass...")
-        md = llm.invoke(writer_prompt).content.strip()
-        logging.info("Writer Pass successful.")
     except Exception as e:
-        logging.error(f"Two-pass system failed: {e}. Reverting to single-pass fallback.")
         fallback_prompt = f"""
         You are an elite data storyteller and business intelligence expert. Your mission is to uncover the compelling, hidden narrative in this dataset and present it as a captivating story in Markdown format that drives action.
         **Data Context:** {data_context_str}

     return json.loads(json.dumps(context, default=str))
 def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, bucket):
+    logging.info(f"Generating guided storyteller report draft for project {project_id}")
     df = load_dataframe_safely(buf, name)
+    llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.25)
     data_context_str, context_for_charts = "", {}
     try:
     md = ""
     try:
+        # --- Pass 1: The Visualization Strategist ---
+        strategist_prompt = f"""
+        You are a data visualization expert. Your task is to create a palette of unique and impactful charts for a data storyteller.
+        Based on the provided data context, identify the 4-5 most distinct and insightful stories that can be visualized.
+        For each one, provide only the chart description tag.
         **Data Context:**
         {data_context_str}
         **Output Format:**
+        Return ONLY a valid JSON array of strings. Each string must be a unique chart description tag.
         Example:
+        ["bar | Average Charges by Smoker Status", "scatter | Charges vs. BMI", "bar | Average Charges by Region"]
         """
+        logging.info("Executing Visualization Strategist Pass...")
+        strategist_response = llm.invoke(strategist_prompt).content.strip()
+        if strategist_response.startswith("```json"):
+            strategist_response = strategist_response[7:-3]
+        chart_palette = json.loads(strategist_response)
+        logging.info(f"Strategist Pass successful. Palette has {len(chart_palette)} unique charts.")
+        # --- Pass 2: The Master Storyteller ---
+        storyteller_prompt = f"""
+        You are an elite data storyteller. Your mission is to write a comprehensive, flowing narrative that analyzes the entire dataset provided. Weave a story that connects different findings, explores nuances, and provides a holistic view.
+        **Data Context:**
+        {data_context_str}
+        **Your Toolbox (Most Important):**
+        To support your story with visuals, you have been provided with a pre-approved 'palette' of unique charts. As you write your narrative, you **must** integrate each of these chart tags, one time, at the most logical point in the story.
+        - You **must** use every chart tag from the provided palette exactly once.
+        - Do **not** repeat chart tags.
+        - Do **not** invent new chart tags.
+        - Insert the tags in the format `<generate_chart: "the_description">`.
+        **Chart Palette:**
+        {json.dumps(chart_palette, indent=2)}
+        Now, write the complete, comprehensive Markdown report.
         """
+        logging.info("Executing Master Storyteller Pass...")
+        md = llm.invoke(storyteller_prompt).content.strip()
+        logging.info("Master Storyteller Pass successful.")
     except Exception as e:
+        logging.error(f"Guided Storyteller system failed: {e}. Reverting to single-pass fallback.")
         fallback_prompt = f"""
         You are an elite data storyteller and business intelligence expert. Your mission is to uncover the compelling, hidden narrative in this dataset and present it as a captivating story in Markdown format that drives action.
         **Data Context:** {data_context_str}