""" Task definitions for NBA data analysis workflow. """ from crewai import Task from config import NBA_DATA_PATH def create_data_engineering_task(engineer_agent, csv_path: str = None) -> Task: """ Create the data engineering task for processing and cleaning data. Args: engineer_agent: The Engineer Agent to assign this task to csv_path: Path to CSV file (defaults to NBA_DATA_PATH from config) Returns: Task: Configured data engineering task """ data_path = csv_path or NBA_DATA_PATH return Task( description=f""" Quickly examine the dataset located at {data_path}. Your tasks (BE EFFICIENT - use tools only once): 1. Get a brief summary of the dataset structure (use get_nba_data_summary ONCE) 2. Note the key columns available 3. Verify the data is ready for analysis IMPORTANT: - Use get_nba_data_summary ONCE only - it provides all needed info - Do NOT call read_nba_data or analyze_nba_data multiple times - Keep your report concise (2-3 sentences) - The data is already clean and ready for analysis Provide a brief confirmation that the dataset is loaded and ready for analysis. """, agent=engineer_agent, expected_output="A brief confirmation (2-3 sentences) that the dataset is loaded and ready for analysis, including key column names." ) def create_data_analysis_task(analyst_agent, data_engineering_task: Task) -> Task: """ Create the data analysis task for extracting insights from NBA data. Args: analyst_agent: The Analyst Agent to assign this task to data_engineering_task: The data engineering task for context Returns: Task: Configured data analysis task """ return Task( description=f""" Using the cleaned NBA 2024-25 dataset, perform comprehensive analysis: Your tasks: 1. Analyze player performance metrics: - Top performers by points, assists, rebounds - Shooting efficiency analysis (FG%, 3P%, FT%) - Player efficiency ratings 2. Team performance analysis: - Win/loss records by team - Team offensive and defensive statistics - Team performance trends 3. Game insights: - High-scoring games - Close games vs blowouts - Performance by date/period 4. Identify key patterns and trends: - Best performing players - Most efficient teams - Statistical outliers 5. Provide actionable insights and recommendations Create a comprehensive analysis report with key findings and insights. """, agent=analyst_agent, expected_output="A detailed analysis report with key insights, statistical findings, top performers, team analysis, and actionable recommendations based on the NBA 2024-25 data.", context=[data_engineering_task] ) def create_custom_analysis_task(analyst_agent, user_query: str, data_engineering_task: Task = None, csv_path: str = None) -> Task: """ Create a custom data analysis task based on user input. Args: analyst_agent: The Analyst Agent to assign this task to user_query: The user's custom analysis query/task data_engineering_task: The data engineering task for context (optional for parallel execution) csv_path: Path to CSV file (for reference in description) Returns: Task: Configured custom analysis task """ data_path = csv_path or NBA_DATA_PATH context = [data_engineering_task] if data_engineering_task else [] return Task( description=f""" Using the dataset located at {data_path}, perform the following analysis as requested by the user: {user_query} IMPORTANT INSTRUCTIONS: 1. For queries requiring aggregations (sum, count, average, top N, etc.), you MUST use the 'analyze_nba_data' tool. 2. The 'analyze_nba_data' tool allows you to execute pandas code for grouping, aggregating, sorting, and filtering. 3. Examples of when to use 'analyze_nba_data': - Finding top players by statistics (e.g., "top 5 three-point shooters") - Calculating totals or averages per player/team - Grouping and aggregating data - Statistical analysis requiring groupby operations 4. Use 'semantic_search_nba_data' only for finding specific game records or examples, NOT for aggregations. 5. Plan your analysis: First understand what data you need, then use the appropriate tool to get aggregated results. Steps to follow: 1. If the query asks for "top N" or aggregations, use analyze_nba_data with pandas groupby operations 2. For "top 5 three-point shooters": group by Player, sum the '3P' column, sort descending, take top 5 3. Present the results clearly with player names and their statistics Provide a clear, comprehensive answer with relevant statistics, insights, and any supporting data from the dataset. """, agent=analyst_agent, expected_output="A detailed analysis report addressing the user's query with relevant insights, statistics, and findings from the data.", context=context ) def create_storyteller_task(storyteller_agent, analysis_task: Task) -> Task: """ Create a storyteller task that creates headlines and storylines from the analysis results. Args: storyteller_agent: The Storyteller Agent to assign this task to analysis_task: The analysis task whose output will be used to create headlines and content Returns: Task: Configured storyteller task """ return Task( description=""" Review the data analysis results and create engaging headlines and compelling storylines that bring the data to life. Your tasks: 1. Read and understand the analysis results thoroughly 2. Identify the most important and interesting findings 3. Create 3-5 compelling headlines that: - Are catchy and attention-grabbing - Accurately reflect the key insights - Use engaging sports journalism language - Are suitable for display to users 4. Write engaging storylines/content for each headline that: - Tells a story about the findings - Provides context and narrative around the statistics - Makes the data come alive with compelling prose - Explains why these insights matter - Uses vivid language and storytelling techniques - Is 2-3 paragraphs per storyline (enough to be engaging but concise) 5. Format your output as follows: HEADLINES: [List of 3-5 headlines, one per line] STORYLINES: [For each headline, write 2-3 paragraphs of engaging content that tells the story behind the data] Make both the headlines and storylines exciting, memorable, and true to the data insights. Write like a sports journalist who knows how to make statistics compelling and human. """, agent=storyteller_agent, expected_output="A formatted output with 3-5 engaging headlines followed by detailed storylines (2-3 paragraphs each) that bring the data analysis to life with compelling narrative and context.", context=[analysis_task] )