Spaces:

JatinAutonomousLabs
/

Excel_AI_Assistant

Paused

App Files Files Community

JatinAutonomousLabs commited on Oct 15, 2025

Commit

e5bc014

verified ·

1 Parent(s): bb2b847

Upload 2 files

Browse files

Files changed (2) hide show

plugins/analyzers/statistical_analyzer.py +27 -0
plugins/analyzers/time_series_analyzer.py +73 -0

plugins/analyzers/statistical_analyzer.py ADDED Viewed

	@@ -0,0 +1,27 @@

+#!/usr/bin/env python3
+"""Statistical Analysis Plugin"""
+import pandas as pd
+from typing import Dict, Any
+class StatisticalAnalyzer:
+    """Perform statistical analysis on data."""
+    def analyze(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """Generate comprehensive statistical summary."""
+        analysis = {"shape": {"rows": len(df), "columns": len(df.columns)}, "columns": {}}
+        for col in df.columns:
+            col_analysis = {"name": col, "dtype": str(df[col].dtype)}
+            col_analysis["missing_percent"] = float(df[col].isna().mean() * 100)
+            if pd.api.types.is_numeric_dtype(df[col]):
+                col_analysis.update({
+                    "mean": float(df[col].mean()),
+                    "std": float(df[col].std()),
+                    "min": float(df[col].min()),
+                    "max": float(df[col].max()),
+                    "median": float(df[col].median())
+                })
+            elif pd.api.types.is_datetime64_any_dtype(df[col]):
+                col_analysis.update({"min_date": str(df[col].min()), "max_date": str(df[col].max())})
+            else:
+                col_analysis.update({"unique_values": int(df[col].nunique())})
+            analysis["columns"][col] = col_analysis
+        return analysis

plugins/analyzers/time_series_analyzer.py ADDED Viewed

	@@ -0,0 +1,73 @@

+#!/usr/bin/env python3
+"""Time Series Analysis Plugin"""
+import pandas as pd
+import numpy as np
+from typing import Dict, Any, Tuple, Optional
+class TimeSeriesAnalyzer:
+    """Universal data science engine for any analytics task."""
+    def analyze_query(self, df: pd.DataFrame, query: str) -> Tuple[str, Optional[pd.DataFrame]]:
+        """
+        Process any data science query: summarize, aggregate, group, segment, trends.
+        """
+        query_lower = query.lower()
+        if df.empty:
+            return "❌ No data available.", None
+        # SUMMARIZATION
+        if any(kw in query_lower for kw in ['summarize', 'summary', 'overview', 'describe']):
+            summary = df.describe(include='all').transpose()
+            return f"📊 Data Summary: {df.shape[0]} rows × {df.shape[1]} columns", summary
+        # AGGREGATION
+        if any(kw in query_lower for kw in ['aggregate', 'sum', 'total', 'average', 'mean']):
+            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+            if numeric_cols:
+                agg_result = df[numeric_cols].agg(['sum', 'mean', 'count', 'min', 'max']).transpose()
+                return f"📈 Aggregated Statistics for {len(numeric_cols)} numeric columns", agg_result
+            return "⚠️ No numeric columns found for aggregation.", None
+        # GROUPING
+        if 'group' in query_lower:
+            cat_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
+            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+            if cat_cols and numeric_cols:
+                group_col = cat_cols[0]
+                grouped = df.groupby(group_col)[numeric_cols].agg(['sum', 'mean', 'count']).head(20)
+                return f"🔍 Grouped by `{group_col}` (top 20)", grouped
+            return "⚠️ Need categorical and numeric columns for grouping.", None
+        # SEGMENTATION
+        if any(kw in query_lower for kw in ['segment', 'cluster', 'categorize']):
+            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()[:3]
+            if numeric_cols:
+                segment_col = numeric_cols[0]
+                df_copy = df.copy()
+                df_copy['segment'] = pd.qcut(df[segment_col], q=4, labels=['Low', 'Medium', 'High', 'Very High'], duplicates='drop')
+                segment_summary = df_copy.groupby('segment').agg({segment_col: ['count', 'mean', 'min', 'max']})
+                return f"🎯 Segmentation based on `{segment_col}`", segment_summary
+            return "⚠️ Need numeric columns for segmentation.", None
+        # TREND ANALYSIS
+        if any(kw in query_lower for kw in ['trend', 'over time', 'pattern']):
+            date_cols = df.select_dtypes(include=['datetime64']).columns.tolist()
+            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+            if date_cols and numeric_cols:
+                df_sorted = df.sort_values(date_cols[0])
+                trend_data = df_sorted[[date_cols[0]] + numeric_cols[:2]].head(50)
+                return f"📉 Trend Analysis (first 50 records)", trend_data
+            return "⚠️ Need date and numeric columns for trends.", None
+        # CORRELATION
+        if 'correlat' in query_lower:
+            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+            if len(numeric_cols) >= 2:
+                corr_matrix = df[numeric_cols].corr()
+                return f"🔗 Correlation Matrix for {len(numeric_cols)} columns", corr_matrix
+            return "⚠️ Need at least 2 numeric columns.", None
+        # DEFAULT
+        basic_stats = df.describe().transpose()
+        return "📊 Basic Statistics (try: 'summarize', 'group', 'segment', 'trend')", basic_stats