Spaces:

cryogenic22
/

sales_analytics

Runtime error

App Files Files Community

cryogenic22 commited on Mar 15, 2025

Commit

9524b20

verified ·

1 Parent(s): bbe7455

Create agents/planning_agent.py

Browse files

Files changed (1) hide show

agents/planning_agent.py +215 -0

agents/planning_agent.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import os
+from typing import Dict, List, Optional, Any, Tuple
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain_anthropic import ChatAnthropic
+from pydantic import BaseModel, Field
+import json
+# Define task types and output schema
+class AnalysisPlan(BaseModel):
+    """Planning agent output with analysis plan details"""
+    problem_statement: str = Field(description="Refined problem statement based on the alert")
+    required_data_sources: List[Dict[str, str]] = Field(
+        description="List of data sources needed with table name and purpose")
+    analysis_approaches: List[Dict[str, str]] = Field(
+        description="List of analytical approaches to be used with type and purpose")
+    tasks: List[Dict[str, Any]] = Field(
+        description="Ordered list of tasks to execute with dependencies")
+    expected_insights: List[str] = Field(
+        description="List of expected insights that would answer the problem")
+class PlanningAgent:
+    """Agent responsible for planning the analysis workflow"""
+    def __init__(self):
+        """Initialize the planning agent with Claude API"""
+        # Set up Claude API client
+        api_key = os.getenv("ANTHROPIC_API_KEY")
+        if not api_key:
+            raise ValueError("ANTHROPIC_API_KEY not found in environment variables")
+        self.llm = ChatAnthropic(
+            model="claude-3-haiku-20240307",
+            anthropic_api_key=api_key,
+            temperature=0.1
+        )
+        # Create planning prompt
+        self.planning_prompt = ChatPromptTemplate.from_messages([
+            ("system", """You are an expert pharmaceutical analytics planning agent.
+Your task is to create a detailed analysis plan to investigate sales anomalies.
+For pharmaceutical sales analysis:
+- Consider product performance, competitor activities, prescriber behavior
+- Include geographic, temporal, and demographic dimensions in your analysis
+- Consider both internal factors (supply, marketing) and external factors (market events, seasonality)
+Your output should be a complete JSON-formatted analysis plan following this structure:
+{
+  "problem_statement": "Clear definition of the problem to solve",
+  "required_data_sources": [
+    {"table": "sales", "purpose": "Core sales metrics analysis"},
+    ...
+  ],
+  "analysis_approaches": [
+    {"type": "time_series_decomposition", "purpose": "Separate trend from seasonality"},
+    ...
+  ],
+  "tasks": [
+    {
+      "id": 1,
+      "name": "Data acquisition",
+      "description": "Pull relevant data from sources",
+      "agent": "data_agent",
+      "dependencies": [],
+      "expected_output": "Cleaned datasets for analysis"
+    },
+    ...
+  ],
+  "expected_insights": [
+    "Primary factors contributing to sales decline",
+    ...
+  ]
+}
+Be thorough in your planning but focus on creating a practical analysis workflow.
+Tasks should follow a logical sequence with proper dependencies.
+"""),
+            ("human", "{input}")
+        ])
+        # Set up the planning chain
+        self.planning_chain = (
+            {"input": RunnablePassthrough()}
+            | self.planning_prompt
+            | self.llm
+            | StrOutputParser()
+        )
+    def extract_json_from_text(self, text: str) -> Dict:
+        """Extract JSON from text that might contain additional content"""
+        try:
+            # First, try to parse the entire text as JSON
+            return json.loads(text)
+        except json.JSONDecodeError:
+            # If that fails, look for JSON block
+            import re
+            json_pattern = r'```json\s*([\s\S]*?)\s*```'
+            match = re.search(json_pattern, text)
+            if match:
+                try:
+                    return json.loads(match.group(1))
+                except json.JSONDecodeError:
+                    pass
+            # Try a more aggressive approach to find JSON-like content
+            json_pattern = r'({[\s\S]*})'
+            match = re.search(json_pattern, text)
+            if match:
+                try:
+                    return json.loads(match.group(1))
+                except json.JSONDecodeError:
+                    pass
+            raise ValueError(f"Could not extract JSON from response: {text}")
+    def create_analysis_plan(self, alert_description: str) -> Tuple[AnalysisPlan, Dict]:
+        """Generate an analysis plan based on the alert description"""
+        print("Planning Agent: Creating analysis plan...")
+        # Format the input for the planning prompt
+        input_text = f"""
+Alert: {alert_description}
+Create a detailed analysis plan to investigate this issue. Include:
+1. A clear problem statement
+2. Required data sources from our pharma database
+3. Analytical approaches to identify root causes
+4. A sequence of tasks with dependencies
+5. Expected insights that would solve the problem
+Available data tables:
+- sales: Daily sales data (sale_date, product_id, region_id, territory_id, prescriber_id, pharmacy_id, units_sold, revenue, cost, margin)
+- products: Product information (product_id, product_name, therapeutic_area, molecule, launch_date, status, list_price)
+- regions: Geographic regions (region_id, region_name, country, division, population)
+- territories: Sales territories (territory_id, territory_name, region_id, sales_rep_id)
+- prescribers: Physician information (prescriber_id, name, specialty, practice_type, territory_id, decile)
+- pharmacies: Pharmacy information (pharmacy_id, name, address, territory_id, pharmacy_type, monthly_rx_volume)
+- competitor_products: Competitor information (competitor_product_id, product_name, manufacturer, therapeutic_area, molecule, launch_date, list_price, competing_with_product_id)
+- marketing_campaigns: Marketing activities (campaign_id, campaign_name, start_date, end_date, product_id, campaign_type, target_audience, channels, budget, spend)
+- market_events: Industry events (event_id, event_date, event_type, description, affected_products, affected_regions, impact_score)
+- sales_targets: Performance targets (target_id, product_id, region_id, period, target_units, target_revenue)
+- distribution_centers: Supply chain (dc_id, dc_name, region_id, inventory_capacity)
+- inventory: Stock levels (inventory_id, product_id, dc_id, date, units_available, units_allocated, units_in_transit, days_of_supply)
+- external_factors: External influences (factor_id, date, region_id, factor_type, factor_value, description)
+"""
+        # Execute the planning chain
+        response = self.planning_chain.invoke(input_text)
+        # Extract and parse the response as JSON
+        plan_dict = self.extract_json_from_text(response)
+        # Convert to Pydantic model for validation and structure
+        analysis_plan = AnalysisPlan.model_validate(plan_dict)
+        return analysis_plan, plan_dict
+    def visualize_plan(self, plan: AnalysisPlan) -> Dict:
+        """Generate visualization data for the analysis plan"""
+        # Create nodes representing tasks
+        nodes = []
+        edges = []
+        for task in plan.tasks:
+            nodes.append({
+                "id": f"task_{task['id']}",
+                "label": task['name'],
+                "type": "task",
+                "agent": task['agent']
+            })
+            # Create edges based on dependencies
+            for dep in task.get('dependencies', []):
+                edges.append({
+                    "source": f"task_{dep}",
+                    "target": f"task_{task['id']}",
+                    "label": "depends on"
+                })
+        # Add data source nodes
+        for i, src in enumerate(plan.required_data_sources):
+            src_id = f"data_{i}"
+            nodes.append({
+                "id": src_id,
+                "label": src['table'],
+                "type": "data_source"
+            })
+            # Connect data sources to the data acquisition task
+            data_task = next((t for t in plan.tasks if t['agent'] == 'data_agent'), None)
+            if data_task:
+                edges.append({
+                    "source": src_id,
+                    "target": f"task_{data_task['id']}",
+                    "label": "input"
+                })
+        return {
+            "nodes": nodes,
+            "edges": edges,
+            "problem_statement": plan.problem_statement,
+            "expected_insights": plan.expected_insights
+        }
+# For testing
+if __name__ == "__main__":
+    # Set API key for testing
+    os.environ["ANTHROPIC_API_KEY"] = "your_api_key_here"
+    agent = PlanningAgent()
+    alert = "Sales of DrugX down 15% in Northeast region over past 30 days compared to forecast."
+    plan, _ = agent.create_analysis_plan(alert)
+    print(json.dumps(plan.model_dump(), indent=2))