agent1 / DataAnalystAgent /tools /DataAnalysisTool.py
varun324242's picture
Upload folder using huggingface_hub
06ba83e verified
from agency_swarm.tools import BaseTool
from pydantic import Field
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import io
class DataAnalysisTool(BaseTool):
"""
This tool utilizes data analysis libraries such as Pandas and NumPy to process and analyze structured data.
It is capable of identifying trends, patterns, and anomalies in the data.
The tool also generates reports and visualizations to communicate findings.
"""
data: str = Field(
..., description="The structured data in CSV format to be analyzed."
)
def run(self):
"""
Processes and analyzes the provided structured data.
Identifies trends, patterns, and anomalies, and generates reports and visualizations.
"""
# Load data into a Pandas DataFrame
data_io = io.StringIO(self.data)
df = pd.read_csv(data_io)
# Perform basic data analysis
summary = self._generate_summary(df)
trends = self._identify_trends(df)
anomalies = self._detect_anomalies(df)
# Generate visualizations
visualizations = self._generate_visualizations(df)
# Compile the report
report = {
"summary": summary,
"trends": trends,
"anomalies": anomalies,
"visualizations": visualizations
}
return report
def _generate_summary(self, df):
"""
Generates a summary of the data including basic statistics.
"""
summary = df.describe().to_dict()
return summary
def _identify_trends(self, df):
"""
Identifies trends in the data using rolling averages.
"""
trends = {}
for column in df.select_dtypes(include=[np.number]).columns:
trends[column] = df[column].rolling(window=5).mean().dropna().tolist()
return trends
def _detect_anomalies(self, df):
"""
Detects anomalies in the data using z-score method.
"""
anomalies = {}
for column in df.select_dtypes(include=[np.number]).columns:
z_scores = np.abs((df[column] - df[column].mean()) / df[column].std())
anomalies[column] = df[column][z_scores > 3].tolist()
return anomalies
def _generate_visualizations(self, df):
"""
Generates visualizations for the data.
"""
visualizations = {}
for column in df.select_dtypes(include=[np.number]).columns:
plt.figure(figsize=(10, 6))
sns.lineplot(data=df, x=df.index, y=column)
plt.title(f'Trend for {column}')
plt.xlabel('Index')
plt.ylabel(column)
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
visualizations[column] = buf.getvalue()
plt.close()
return visualizations