jzou19950715's picture
Update components/analysis.py
0a37365 verified
raw
history blame
3.45 kB
# components/analysis.py
from typing import Dict, Optional
import pandas as pd
from .statistical import StatisticalAnalyzer
from .visualization import D3Visualizer
class DataAnalyzer:
"""Main analysis component with datetime handling"""
def __init__(self):
self.statistical = StatisticalAnalyzer()
self.visualizer = D3Visualizer()
def analyze_data(
self,
df: pd.DataFrame,
analysis_type: str,
params: Optional[Dict] = None
) -> Dict:
"""Analyze data based on type"""
params = params or {}
try:
if analysis_type == "distribution":
# Select column or default to first numeric column
column = params.get("column")
if not column or column not in df.columns:
numeric_cols = df.select_dtypes(include=['number']).columns
if len(numeric_cols) == 0:
raise ValueError("No numeric columns found for distribution analysis")
column = numeric_cols[0]
values = df[column].dropna().values
stats_result = self.statistical.analyze_distribution(values)
viz_result = self.visualizer.create_interactive_plot(
"distribution",
{"values": values.tolist()}
)
return {
"statistics": stats_result,
"visualization": viz_result
}
elif analysis_type == "forecast":
# Handle time series data
column = params.get("column")
if not column or column not in df.columns:
numeric_cols = df.select_dtypes(include=['number']).columns
if len(numeric_cols) == 0:
raise ValueError("No numeric columns found for forecasting")
column = numeric_cols[0]
values = df[column].dropna().values
forecast_result = self.statistical.forecast_probability_cone(
values,
steps=params.get("steps", 10)
)
viz_result = self.visualizer.create_interactive_plot(
"forecast",
forecast_result
)
return {
"forecast": forecast_result,
"visualization": viz_result
}
elif analysis_type == "correlation":
# Analyze correlations (datetime columns are handled in StatisticalAnalyzer)
corr_result = self.statistical.analyze_correlations(df)
viz_result = self.visualizer.create_interactive_plot(
"correlation",
{"matrix": corr_result["correlation_matrix"]}
)
return {
"correlations": corr_result,
"visualization": viz_result
}
return {"error": "Unsupported analysis type"}
except Exception as e:
return {
"error": str(e),
"visualization": f"<div class='error'>Error in analysis: {str(e)}</div>"
}