Adk-Analyst2

Sleeping

App Files Files Community

rairo commited on Jul 5, 2025

Commit

a2bd41d

verified ·

1 Parent(s): 26532f6

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -19

app.py CHANGED Viewed

@@ -179,20 +179,70 @@ def generate_report(buf: bytes, name: str, ctx: str, key: str):
     llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
                                  google_api_key=API_KEY, temperature=0.1)
     ctx_dict = {
         "shape": df.shape,
         "columns": list(df.columns),
         "user_ctx": ctx or "General business analysis",
     }
     cols = ", ".join(ctx_dict["columns"][:6])
-    report_prompt = (
-        "You are a senior business analyst. Write an executive-level Markdown report "
-        "with insights & recommendations.\n"
-        'When a visual is helpful, insert a tag like <generate_chart: "pie | sales by region"> '
-        "(chart_type first, then a description). Valid chart types: bar, pie, line, scatter, hist.\n"
-        f"Base every chart on columns ({cols}) from the dataset.\n"
-        f"Data context:\n{json.dumps(ctx_dict, indent=2)}"
-    )
     md = llm.invoke(report_prompt).content
     chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
@@ -205,7 +255,18 @@ def generate_report(buf: bytes, name: str, ctx: str, key: str):
             with st.spinner(f"Generating chart: {d}"):
                 with plt.ioff():
                     try:
-                        agent.run(f"Create a {d} with Matplotlib and save.")
                         fig = plt.gcf()
                         if fig.axes:
                             p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
@@ -365,15 +426,63 @@ def concat_media(paths: List[str], out: Path, kind="video"):
 # ─── VIDEO GENERATION ──────────────────────────────────────────────────────
 def build_story_prompt(ctx_dict):
     cols = ", ".join(ctx_dict["columns"][:6])
-    return (
-        f"Create a script for a short business video with exactly {VIDEO_SCENES} scenes.\n"
-        "Each scene must include:\n"
-        "• 1–2 sentences of narration (no scene labels, no chart descriptions).\n"
-        '• Exactly one chart tag, e.g. <generate_chart: "bar | total revenue by month">.\n'
-        "Valid chart types: bar, pie, line, scatter, hist.\n"
-        f"Use the dataset columns ({cols}) with sensible aggregations.\n"
-        "Separate scenes with [SCENE_BREAK]."
-    )
 def generate_video(buf: bytes, name: str, ctx: str, key: str):
@@ -389,11 +498,16 @@ def generate_video(buf: bytes, name: str, ctx: str, key: str):
     llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
                                  google_api_key=API_KEY, temperature=0.2)
     ctx_dict = {
         "shape": df.shape,
         "columns": list(df.columns),
         "user_ctx": ctx or "General business analysis",
     }
     script = llm.invoke(build_story_prompt(ctx_dict)).content
     scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
@@ -443,7 +557,6 @@ def generate_video(buf: bytes, name: str, ctx: str, key: str):
     return str(final_vid)
 # ─── UI ─────────────────────────────────────────────────────────────────────
 mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)

     llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
                                  google_api_key=API_KEY, temperature=0.1)
+    # Enhanced context analysis
     ctx_dict = {
         "shape": df.shape,
         "columns": list(df.columns),
         "user_ctx": ctx or "General business analysis",
+        "full_dataframe": df.to_dict('records'),
+        "data_types": df.dtypes.to_dict(),
+        "missing_values": df.isnull().sum().to_dict(),
+        "numeric_summary": df.describe().to_dict() if len(df.select_dtypes(include=['number']).columns) > 0 else {}
     }
     cols = ", ".join(ctx_dict["columns"][:6])
+    # Enhanced report prompt with domain intelligence
+    report_prompt = f"""
+    You are a senior data analyst and business intelligence expert. Analyze the provided dataset and write a comprehensive executive-level Markdown report.
+    **Dataset Analysis Context:**
+    {json.dumps(ctx_dict, indent=2)}
+    **Instructions:**
+    1. **Identify Data Domain**: First, determine what type of data this represents (e.g., sales/revenue, healthcare/medical, HR/employee, financial, operational, customer, research, etc.) based on column names and sample data.
+    2. **Executive Summary**: Start with a high-level summary of key findings and business impact.
+    3. **Data Quality Assessment**: Comment on data completeness, any notable missing values, and data reliability.
+    4. **Key Insights**: Provide 4-6 actionable insights specific to the identified domain:
+       - Trends and patterns
+       - Outliers or anomalies
+       - Performance indicators
+       - Risk factors or opportunities
+    5. **Strategic Recommendations**: Offer concrete, actionable recommendations based on the data.
+    6. **Visual Support**: When a visualization would enhance understanding, insert chart tags like:
+       `<generate_chart: "chart_type | specific description">`
+       Valid chart types: bar, pie, line, scatter, hist
+       Base every chart on actual columns: {cols}
+       Choose chart types strategically:
+       - bar: for categorical comparisons
+       - pie: for proportional breakdowns (when categories < 7)
+       - line: for time series or trends
+       - scatter: for correlation analysis
+       - hist: for distribution analysis
+    7. **Format Requirements**:
+       - Use professional business language
+       - Include relevant metrics and percentages
+       - Structure with clear headers (## Executive Summary, ## Key Insights, etc.)
+       - End with ## Next Steps section
+    **Domain-Specific Focus Areas:**
+    - If sales data: focus on revenue trends, customer segments, product performance
+    - If HR data: focus on workforce analytics, retention, performance metrics
+    - If financial data: focus on profitability, cost analysis, financial health
+    - If operational data: focus on efficiency, bottlenecks, process optimization
+    - If customer data: focus on behavior patterns, satisfaction, churn analysis
+    Generate insights that would be valuable to C-level executives and department heads.
+    """
     md = llm.invoke(report_prompt).content
     chart_descs = extract_chart_tags(md)[:MAX_CHARTS]
             with st.spinner(f"Generating chart: {d}"):
                 with plt.ioff():
                     try:
+                        # Enhanced chart generation prompt
+                        chart_prompt = f"""
+                        Create a professional {d} chart using matplotlib with these requirements:
+                        1. Use a clean, business-appropriate style
+                        2. Include proper title, axis labels, and legends
+                        3. Apply appropriate color schemes (avoid rainbow colors)
+                        4. Ensure text is readable (font size 10+)
+                        5. Format numbers appropriately (e.g., currency, percentages)
+                        6. Save the figure with high quality
+                        7. Handle any missing or null values appropriately
+                        """
+                        agent.run(chart_prompt)
                         fig = plt.gcf()
                         if fig.axes:
                             p = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
 # ─── VIDEO GENERATION ──────────────────────────────────────────────────────
 def build_story_prompt(ctx_dict):
     cols = ", ".join(ctx_dict["columns"][:6])
+    return f"""
+    You are a professional business storyteller and data analyst. Create a compelling script for a {VIDEO_SCENES}-scene business video presentation.
+    **Complete Dataset Context:**
+    {json.dumps(ctx_dict, indent=2)}
+    **Task Requirements:**
+    1. **Identify the Data Story**: Determine what business domain this data represents and what story it tells
+    2. **Create {VIDEO_SCENES} distinct scenes** that build a logical narrative arc
+    3. **Each scene must contain:**
+       - 1-2 sentences of clear, professional narration (plain English, no jargon)
+       - Exactly one chart tag: `<generate_chart: "chart_type | specific description">`
+    **Chart Guidelines:**
+    - Valid types: bar, pie, line, scatter, hist
+    - Base all charts on actual columns: {cols}
+    - Choose chart types that best tell the story:
+      * bar: categorical comparisons, rankings
+      * pie: proportional breakdowns (≤6 categories)
+      * line: trends over time, progression
+      * scatter: relationships, correlations
+      * hist: distributions, frequency analysis
+    **Narrative Structure:**
+    - Scene 1: Set the context and introduce the main story
+    - Middle scenes: Develop key insights and supporting evidence
+    - Final scene: Conclude with actionable takeaways or future outlook
+    **Content Standards:**
+    - Use conversational, executive-level language
+    - Include specific data insights (trends, percentages, comparisons)
+    - Avoid chart descriptions in narration ("as shown in the chart")
+    - Make each scene self-contained but connected to the overall story
+    - Focus on business impact and actionable insights
+    **Domain-Specific Approaches:**
+    - Sales data: Customer journey, revenue trends, market performance
+    - HR data: Workforce insights, talent analytics, organizational health
+    - Financial data: Performance indicators, cost analysis, profitability
+    - Operational data: Process efficiency, bottlenecks, optimization opportunities
+    - Customer data: Behavior patterns, satisfaction trends, retention analysis
+    **Output Format:**
+    Separate each scene with exactly [SCENE_BREAK]
+    **Example Structure:**
+    Our company's data reveals fascinating insights about market performance over the past year. Let's explore what the numbers tell us about our growth trajectory.
+    <generate_chart: "line | monthly revenue growth over 12 months">
+    [SCENE_BREAK]
+    Customer acquisition has shown remarkable patterns, with certain segments driving significantly more value than others. The data shows a clear preference emerging in our target markets.
+    <generate_chart: "bar | customer acquisition by segment">
+    Create a compelling, data-driven story that executives would find engaging and actionable.
+    """
 def generate_video(buf: bytes, name: str, ctx: str, key: str):
     llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
                                  google_api_key=API_KEY, temperature=0.2)
+    # Enhanced context with complete data insights
     ctx_dict = {
         "shape": df.shape,
         "columns": list(df.columns),
         "user_ctx": ctx or "General business analysis",
+        "full_dataframe": df.to_dict('records'),
+        "data_types": df.dtypes.to_dict(),
+        "numeric_summary": df.describe().to_dict() if len(df.select_dtypes(include=['number']).columns) > 0 else {}
     }
     script = llm.invoke(build_story_prompt(ctx_dict)).content
     scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
     return str(final_vid)
 # ─── UI ─────────────────────────────────────────────────────────────────────
 mode = st.radio("Select Output Format:", ["Report (PDF)", "Video Narrative"], horizontal=True)