File size: 2,929 Bytes
06ba83e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from agency_swarm.tools import BaseTool
from pydantic import Field
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import io

class DataAnalysisTool(BaseTool):
    """
    This tool utilizes data analysis libraries such as Pandas and NumPy to process and analyze structured data.
    It is capable of identifying trends, patterns, and anomalies in the data.
    The tool also generates reports and visualizations to communicate findings.
    """

    data: str = Field(
        ..., description="The structured data in CSV format to be analyzed."
    )

    def run(self):
        """
        Processes and analyzes the provided structured data.
        Identifies trends, patterns, and anomalies, and generates reports and visualizations.
        """
        # Load data into a Pandas DataFrame
        data_io = io.StringIO(self.data)
        df = pd.read_csv(data_io)

        # Perform basic data analysis
        summary = self._generate_summary(df)
        trends = self._identify_trends(df)
        anomalies = self._detect_anomalies(df)

        # Generate visualizations
        visualizations = self._generate_visualizations(df)

        # Compile the report
        report = {
            "summary": summary,
            "trends": trends,
            "anomalies": anomalies,
            "visualizations": visualizations
        }

        return report

    def _generate_summary(self, df):
        """
        Generates a summary of the data including basic statistics.
        """
        summary = df.describe().to_dict()
        return summary

    def _identify_trends(self, df):
        """
        Identifies trends in the data using rolling averages.
        """
        trends = {}
        for column in df.select_dtypes(include=[np.number]).columns:
            trends[column] = df[column].rolling(window=5).mean().dropna().tolist()
        return trends

    def _detect_anomalies(self, df):
        """
        Detects anomalies in the data using z-score method.
        """
        anomalies = {}
        for column in df.select_dtypes(include=[np.number]).columns:
            z_scores = np.abs((df[column] - df[column].mean()) / df[column].std())
            anomalies[column] = df[column][z_scores > 3].tolist()
        return anomalies

    def _generate_visualizations(self, df):
        """
        Generates visualizations for the data.
        """
        visualizations = {}
        for column in df.select_dtypes(include=[np.number]).columns:
            plt.figure(figsize=(10, 6))
            sns.lineplot(data=df, x=df.index, y=column)
            plt.title(f'Trend for {column}')
            plt.xlabel('Index')
            plt.ylabel(column)
            buf = io.BytesIO()
            plt.savefig(buf, format='png')
            buf.seek(0)
            visualizations[column] = buf.getvalue()
            plt.close()
        return visualizations