File size: 5,178 Bytes
5a3fcad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""
DataFrame Analysis Agent

This module implements the pandas DataFrame analysis agent that processes
CSV files and answers natural language questions about the data.

The agent uses LangChain's create_pandas_dataframe_agent to enable
natural language interaction with pandas DataFrames.

Example:
    >>> from src.agents import ask_agent
    >>> result = ask_agent(files, "What is the average revenue by region?")
"""

import io
import contextlib
from typing import List, Optional, Any

import pandas as pd
from langchain.agents import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

from ..prompts import get_analysis_prompt


class DataFrameAgent:
    """
    Agent for analyzing pandas DataFrames using natural language queries.
    
    This agent wraps LangChain's pandas DataFrame agent and provides
    a simplified interface for data analysis tasks.
    
    Attributes:
        model: The LLM model to use for inference (e.g., AzureChatOpenAI).
        verbose: Whether to enable verbose logging for debugging.
        
    Example:
        >>> agent = DataFrameAgent(model=azure_llm)
        >>> df = pd.read_csv("sales.csv")
        >>> result = agent.analyze(df, "What are the top 5 products by revenue?")
    """
    
    def __init__(self, model: Any, verbose: bool = True):
        """
        Initialize the DataFrame agent.
        
        Args:
            model: The LLM model instance to use for inference.
                   Must be a LangChain-compatible chat model.
            verbose: Enable verbose output for debugging (default: True).
        """
        self.model = model
        self.verbose = verbose
    
    def analyze(self, df: pd.DataFrame, question: str) -> str:
        """
        Analyze a DataFrame and answer a natural language question.
        
        This method creates a LangChain pandas agent, constructs the full
        prompt, and invokes the agent to generate insights.
        
        Args:
            df: The pandas DataFrame to analyze.
            question: The natural language question about the data.
            
        Returns:
            str: The agent's analysis and answer in Markdown format.
            
        Raises:
            Exception: If the agent encounters an error during analysis.
            
        Example:
            >>> result = agent.analyze(sales_df, "Show monthly revenue trends")
            >>> print(result)  # Markdown formatted analysis
        """
        try:
            # Create the pandas DataFrame agent with ZERO_SHOT_REACT approach
            # This agent type can handle tasks without needing few-shot examples
            pandas_agent = create_pandas_dataframe_agent(
                llm=self.model,
                df=df,
                verbose=self.verbose,
                agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
                allow_dangerous_code=True,  # Required for code execution
                handle_parsing_errors=True,  # Gracefully handle LLM parsing issues
            )
            
            # Construct the full prompt with prefix and suffix
            full_prompt = get_analysis_prompt(question)
            
            # Capture stdout to get the agent's reasoning trace
            buffer = io.StringIO()
            with contextlib.redirect_stdout(buffer):
                result = pandas_agent.invoke(full_prompt)
            
            # Extract the final output from the agent response
            return result.get("output", str(result))
            
        except Exception as e:
            return f"Analysis error: {e}"


def ask_agent(
    files: List[Any],
    question: str,
    model: Optional[Any] = None
) -> str:
    """
    Analyze uploaded CSV files and answer a question about the data.
    
    This is a convenience function that handles file loading, DataFrame
    concatenation, and agent invocation in one call.
    
    Args:
        files: List of file objects with a .name attribute pointing to CSV paths.
               Typically comes from Gradio's file upload component.
        question: The natural language question to answer about the data.
        model: Optional LLM model to use. If None, uses the global model.
        
    Returns:
        str: The analysis result in Markdown format, or an error message.
        
    Note:
        Multiple CSV files are concatenated into a single DataFrame before
        analysis. Ensure files have compatible schemas for meaningful results.
        
    Example:
        >>> # With Gradio file input
        >>> result = ask_agent(uploaded_files, "What is the total revenue?")
    """
    # Step 1: Load and concatenate all uploaded CSV files
    try:
        dataframes = [pd.read_csv(f.name) for f in files]
        combined_df = pd.concat(dataframes, ignore_index=True)
    except Exception as e:
        return f"Could not read CSV files: {e}"
    
    # Step 2: Create agent and perform analysis
    if model is None:
        return "Error: No LLM model provided. Please configure the model first."
    
    agent = DataFrameAgent(model=model)
    return agent.analyze(combined_df, question)