Spaces:

cryogenic22
/

sales_analytics

Runtime error

App Files Files Community

cryogenic22 commited on Mar 15, 2025

Commit

72ca2bc

verified ·

1 Parent(s): ca8485b

Create insight_agent.py

Browse files

Files changed (1) hide show

agents/insight_agent.py +419 -0

agents/insight_agent.py ADDED Viewed

	@@ -0,0 +1,419 @@

+import os
+import json
+import pandas as pd
+import numpy as np
+from typing import Dict, List, Any, Tuple, Optional
+from pydantic import BaseModel, Field
+from langchain_anthropic import ChatAnthropic
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+import re
+from datetime import datetime
+class InsightRequest(BaseModel):
+    """Structure for an insight generation request"""
+    request_id: str
+    original_problem: str
+    analysis_results: Dict[str, Any]
+    validation_results: Dict[str, Any]
+    target_audience: str = "executive"  # Options: executive, analyst, data scientist
+class InsightCard(BaseModel):
+    """Structure for an insight card"""
+    card_id: str
+    title: str
+    description: str
+    key_findings: List[Dict[str, Any]]
+    charts: List[str] = None
+    metrics: Dict[str, Any] = None
+    action_items: List[Dict[str, Any]] = None
+    confidence: float
+    timestamp: datetime
+class InsightsAgent:
+    """Agent responsible for generating insight cards and visualizations"""
+    def __init__(self):
+        """Initialize the insights agent"""
+        # Set up Claude API client
+        api_key = os.getenv("ANTHROPIC_API_KEY")
+        if not api_key:
+            raise ValueError("ANTHROPIC_API_KEY not found in environment variables")
+        self.llm = ChatAnthropic(
+            model="claude-3-haiku-20240307",
+            anthropic_api_key=api_key,
+            temperature=0.2
+        )
+        # Create insight generation prompt
+        self.insight_prompt = ChatPromptTemplate.from_messages([
+            ("system", """You are an expert pharmaceutical analytics insights generator.
+Your task is to create clear, actionable insights from analysis results.
+For each insight request:
+1. Synthesize analysis findings into clear, concise insights
+2. Prioritize insights based on business impact
+3. Tailor communication style to the target audience
+4. Suggest concrete action items based on the findings
+5. Present balanced view including confidence levels and limitations
+Output your insights in JSON format with the following structure:
+```json
+{
+  "title": "DrugX Sales Decline Analysis",
+  "description": "Analysis of the 15% sales decline in the Northeast region",
+  "key_findings": [
+    {
+      "finding": "Competitor Launch Impact",
+      "details": "The launch of CompDrug2 by MedCorp 45 days ago has captured approximately 60% of our market share in the Northeast region.",
+      "evidence": "Strong correlation between sales decline and competitor sales growth, with 85% confidence.",
+      "impact": "Estimated $2.4M quarterly revenue impact"
+    },
+    {
+      "finding": "Supply Chain Issues",
+      "details": "Inventory shortages at 3 distribution centers in the Northeast have led to unfilled orders.",
+      "evidence": "25% of pharmacies experienced stockouts in the last 30 days.",
+      "impact": "Estimated $1.0M quarterly revenue impact"
+    },
+    {
+      "finding": "Seasonal Factors",
+      "details": "Normal seasonal variation accounts for a portion of the observed decline.",
+      "evidence": "Historical patterns show 5-7% seasonal decline in this period.",
+      "impact": "Estimated $0.6M quarterly revenue impact"
+    }
+  ],
+  "charts": [
+    "sales_trend_chart",
+    "competitor_comparison_chart",
+    "supply_chain_impact_chart"
+  ],
+  "metrics": {
+    "total_impact": "$4.0M quarterly",
+    "market_share_loss": "8.5 percentage points",
+    "affected_prescribers": "217 out of 934 (23%)",
+    "affected_territories": "3 out of 4 Northeast territories"
+  },
+  "action_items": [
+    {
+      "action": "Launch targeted co-pay program",
+      "owner": "Marketing",
+      "timeline": "Immediate (0-15 days)",
+      "expected_impact": "Recover 30-40% of lost prescriptions",
+      "priority": "High"
+    },
+    {
+      "action": "Resolve supply chain bottlenecks",
+      "owner": "Operations",
+      "timeline": "Short-term (15-45 days)",
+      "expected_impact": "Eliminate 90% of stockouts",
+      "priority": "High"
+    },
+    {
+      "action": "Develop competitive response strategy",
+      "owner": "Commercial Strategy",
+      "timeline": "Medium-term (30-90 days)",
+      "expected_impact": "Position for market share recovery",
+      "priority": "Medium"
+    }
+  ],
+  "confidence": 0.85
+}
+```
+Adapt your insights to the target audience:
+- For executives: Focus on business impact, actions, and strategic implications
+- For analysts: Include more detailed findings and evidence
+- For data scientists: Add methodological details and statistical significance
+Be concise but comprehensive, highlighting the most important insights first.
+"""),
+            ("human", """
+Original Problem Statement: {original_problem}
+Analysis Results:
+{analysis_results}
+Validation Results:
+{validation_results}
+Target Audience: {target_audience}
+Please generate actionable insights based on these results.
+""")
+        ])
+        # Set up the insight generation chain
+        self.insight_chain = (
+            self.insight_prompt
+            | self.llm
+            | StrOutputParser()
+        )
+        # Create visualization prompt
+        self.visualization_prompt = ChatPromptTemplate.from_messages([
+            ("system", """You are an expert data visualization designer specializing in pharmaceutical analytics.
+Your task is to generate Python code to create clear, insightful visualizations based on analysis results.
+For each visualization request:
+1. Create professional, publication-quality visualizations
+2. Choose appropriate chart types for the data and insights
+3. Use a consistent color scheme and styling
+4. Add clear labels, titles, and annotations
+5. Focus on communicating the key insights effectively
+The visualizations should tell a compelling story about the data.
+Make sure to include all the necessary code for styling and formatting.
+Format your response with a code block:
+```python
+# Visualization code
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+def create_visualizations(data_sources):
+    # Your visualization code here
+    # Create multiple figures as needed
+    # Return a list of figure objects
+    return [fig1, fig2, fig3]
+```
+The code should be complete and ready to execute with the provided data sources.
+"""),
+            ("human", """
+Visualization Request: {description}
+Key Insights:
+{key_insights}
+Available data sources:
+{data_sources}
+Target audience: {target_audience}
+Please generate Python code to create visualizations for these insights.
+""")
+        ])
+        # Set up the visualization chain
+        self.visualization_chain = (
+            self.visualization_prompt
+            | self.llm
+            | StrOutputParser()
+        )
+    def extract_json_from_response(self, response: str) -> Dict:
+        """Extract JSON from text that might contain additional content"""
+        try:
+            # First, try to parse the entire text as JSON
+            return json.loads(response)
+        except json.JSONDecodeError:
+            # If that fails, look for JSON block
+            import re
+            json_pattern = r'```json\s*([\s\S]*?)\s*```'
+            match = re.search(json_pattern, response, re.DOTALL)
+            if match:
+                try:
+                    return json.loads(match.group(1))
+                except json.JSONDecodeError:
+                    pass
+            # Try a more aggressive approach to find JSON-like content
+            json_pattern = r'({[\s\S]*})'
+            match = re.search(json_pattern, response)
+            if match:
+                try:
+                    return json.loads(match.group(1))
+                except json.JSONDecodeError:
+                    pass
+            raise ValueError(f"Could not extract JSON from response: {response}")
+    def extract_python_from_response(self, response: str) -> str:
+        """Extract Python code from LLM response"""
+        # Extract Python between ```python and ``` markers
+        python_match = re.search(r'```python\s*(.*?)\s*```', response, re.DOTALL)
+        if python_match:
+            return python_match.group(1).strip()
+        # If not found with python tag, try generic code block
+        python_match = re.search(r'```\s*(.*?)\s*```', response, re.DOTALL)
+        if python_match:
+            return python_match.group(1).strip()
+        # If all else fails, return empty string
+        return ""
+    def generate_insights(self, request: InsightRequest) -> InsightCard:
+        """Generate insights based on analysis and validation results"""
+        print(f"Insights Agent: Generating insights for problem: {request.original_problem}")
+        # Format analysis results for the prompt
+        analysis_results_str = json.dumps(request.analysis_results, indent=2)
+        # Format validation results for the prompt
+        validation_results_str = json.dumps(request.validation_results, indent=2)
+        # Format the request for the prompt
+        request_data = {
+            "original_problem": request.original_problem,
+            "analysis_results": analysis_results_str,
+            "validation_results": validation_results_str,
+            "target_audience": request.target_audience
+        }
+        # Generate insights
+        response = self.insight_chain.invoke(request_data)
+        # Extract and parse insights JSON
+        insights_dict = self.extract_json_from_response(response)
+        # Add missing fields
+        insights_dict["card_id"] = f"insight_{request.request_id}"
+        insights_dict["timestamp"] = datetime.now().isoformat()
+        # Ensure confidence exists
+        if "confidence" not in insights_dict:
+            # Use validation score if available, otherwise default to 0.7
+            insights_dict["confidence"] = request.validation_results.get("validation_score", 0.7)
+        return InsightCard(**insights_dict)
+    def generate_visualizations(self, insight_card: InsightCard, data_sources: Dict[str, Any]) -> List[str]:
+        """Generate visualizations based on insights"""
+        print(f"Insights Agent: Generating visualizations for insight card: {insight_card.title}")
+        # Extract key insights for visualization context
+        key_insights_str = json.dumps(insight_card.key_findings, indent=2)
+        # Format data sources description for the prompt
+        data_sources_desc = ""
+        for source_id, source in data_sources.items():
+            df = source.content
+            data_sources_desc += f"Data source '{source_id}' ({source.name}):\n"
+            data_sources_desc += f"- Shape: {df.shape[0]} rows, {df.shape[1]} columns\n"
+            data_sources_desc += f"- Columns: {', '.join(df.columns)}\n"
+            data_sources_desc += f"- Sample data:\n{df.head(3).to_string()}\n\n"
+        # Format the request for the prompt
+        request_data = {
+            "description": insight_card.title,
+            "key_insights": key_insights_str,
+            "data_sources": data_sources_desc,
+            "target_audience": "executive"  # Default to executive-level visualizations
+        }
+        # Generate visualization code
+        response = self.visualization_chain.invoke(request_data)
+        # Extract Python code
+        python_code = self.extract_python_from_response(response)
+        # Execute visualization code (with safety checks)
+        visualization_files = []
+        if not python_code:
+            print("Warning: No visualization code generated.")
+        else:
+            try:
+                # Prepare data sources for the visualizations
+                viz_data_sources = {src_id: src.content for src_id, src in data_sources.items()}
+                # Create a local namespace with access to pandas, numpy, etc.
+                local_namespace = {
+                    "pd": pd,
+                    "np": np,
+                    "plt": plt,
+                    "sns": sns,
+                    "data_sources": viz_data_sources
+                }
+                # Execute the code
+                exec(python_code, local_namespace)
+                # Look for a create_visualizations function and execute it
+                if "create_visualizations" in local_namespace:
+                    figures = local_namespace["create_visualizations"](viz_data_sources)
+                    # Save figures to files
+                    for i, fig in enumerate(figures):
+                        if hasattr(fig, 'savefig'):
+                            fig_filename = f"viz_{insight_card.card_id}_{i}.png"
+                            fig.savefig(fig_filename, dpi=300, bbox_inches='tight')
+                            visualization_files.append(fig_filename)
+            except Exception as e:
+                print(f"Visualization execution error: {e}")
+        return visualization_files
+# For testing
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+    import seaborn as sns
+    # Set API key for testing
+    os.environ["ANTHROPIC_API_KEY"] = "your_api_key_here"
+    # Create mock insight request
+    class MockInsightRequest:
+        def __init__(self):
+            self.request_id = "test"
+            self.original_problem = "Sales of DrugX down 15% in Northeast region over past 30 days"
+            self.analysis_results = {
+                "insights": [
+                    {"finding": "Competitor launch impact", "details": "New competing drug launched", "impact": "Estimated 60% of decline"},
+                    {"finding": "Supply chain issues", "details": "Inventory shortages in key distribution centers", "impact": "Estimated 25% of decline"}
+                ],
+                "attribution": {
+                    "competitor_launch": 0.60,
+                    "supply_issues": 0.25,
+                    "seasonal_factors": 0.15
+                },
+                "confidence": 0.85
+            }
+            self.validation_results = {
+                "validation_score": 0.82,
+                "critical_issues": [],
+                "recommendations": ["Consider analyzing prescriber-level data"]
+            }
+            self.target_audience = "executive"
+    # Create mock data sources
+    from dataclasses import dataclass
+    @dataclass
+    class MockDataSource:
+        content: pd.DataFrame
+        name: str
+    sales_df = pd.DataFrame({
+        'date': pd.date_range(start='2023-01-01', periods=12, freq='M'),
+        'region': ['Northeast'] * 12,
+        'sales': [100, 110, 105, 115, 120, 115, 110, 105, 95, 85, 80, 70],
+        'target': [100, 105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155]
+    })
+    competitor_df = pd.DataFrame({
+        'date': pd.date_range(start='2023-10-01', periods=3, freq='M'),
+        'competitor': ['CompDrug2'] * 3,
+        'launch_region': ['Northeast'] * 3,
+        'estimated_sales': [0, 50, 70]
+    })
+    data_sources = {
+        "sales_data": MockDataSource(content=sales_df, name="Monthly sales data"),
+        "competitor_data": MockDataSource(content=competitor_df, name="Competitor launch data")
+    }
+    agent = InsightsAgent()
+    insight_card = agent.generate_insights(MockInsightRequest())
+    print(f"Insight card title: {insight_card.title}")
+    print(f"Key findings: {json.dumps(insight_card.key_findings, indent=2)}")
+    print(f"Action items: {json.dumps(insight_card.action_items, indent=2)}")
+    visualizations = agent.generate_visualizations(insight_card, data_sources)
+    print(f"Generated visualizations: {visualizations}")