SamadhiDBS commited on
Commit
d18f851
ยท
verified ยท
1 Parent(s): 2d9ee72

Upload 24 files

Browse files
app/__init__.py ADDED
File without changes
app/__pycache__/analyzer.cpython-311.pyc ADDED
Binary file (7.69 kB). View file
 
app/__pycache__/chart_customizer.cpython-311.pyc ADDED
Binary file (6.8 kB). View file
 
app/__pycache__/dashboard.cpython-311.pyc ADDED
Binary file (7.51 kB). View file
 
app/__pycache__/data_processor.cpython-311.pyc ADDED
Binary file (7.66 kB). View file
 
app/__pycache__/export_utils.cpython-311.pyc ADDED
Binary file (12.3 kB). View file
 
app/__pycache__/insight_generator.cpython-311.pyc ADDED
Binary file (10.7 kB). View file
 
app/__pycache__/query_engine.cpython-311.pyc ADDED
Binary file (23.3 kB). View file
 
app/__pycache__/session_manager.cpython-311.pyc ADDED
Binary file (5.02 kB). View file
 
app/analyzer.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##________automated analysis________##
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+ from scipy import stats
6
+
7
+ class Analyzer:
8
+ def __init__(self, df, schema):
9
+ self.df = df
10
+ self.schema = schema
11
+ self.insights = []
12
+
13
+ def run_full_analysis(self):
14
+ """run all analysis methods"""
15
+ print("Running automated analysis....")
16
+
17
+ analysis = {
18
+ 'descriptive_stats': self.descriptive_statistics(),
19
+ 'correlations': self.correlation_analysis(),
20
+ 'trends': self.trend_detection(),
21
+ 'group_analysis': self.group_by_analysis(),
22
+ 'outliers': self.detect_outliers(),
23
+ 'distributions': self.get_distributions()
24
+ }
25
+
26
+ return analysis
27
+
28
+ def descriptive_statistics(self):
29
+ """basic statistics for numeric columns"""
30
+
31
+ stats = {}
32
+ for col in self.schema['numeric']:
33
+ stats[col] = {
34
+ 'mean': self.df[col].mean(),
35
+ 'median': self.df[col].median(),
36
+ 'std': self.df[col].std(),
37
+ 'min': self.df[col].min(),
38
+ 'max': self.df[col].max(),
39
+ 'q1': self.df[col].quantile(0.25),
40
+ 'q3': self.df[col].quantile(0.75)
41
+ }
42
+ return stats
43
+
44
+ def correlation_analysis(self):
45
+ """fins correlations between numeric columns"""
46
+
47
+ if len(self.schema['numeric']) >= 2:
48
+ corr_matrix = self.df[self.schema['numeric']].corr()
49
+
50
+ ## ind strong correlations
51
+ strong_corrs = []
52
+ for i in range(len(corr_matrix.columns)):
53
+ for j in range(i+1, len(corr_matrix.columns)):
54
+ corr_value = corr_matrix.iloc[i,j]
55
+ if abs(corr_value) > 0.5: # strong correlation threshold
56
+ strong_corrs.append({
57
+ 'col1': corr_matrix.columns[i],
58
+ 'col2': corr_matrix.columns[j],
59
+ 'correlation': corr_value,
60
+ 'strength': 'positive' if corr_value > 0 else 'negative'
61
+ })
62
+ return strong_corrs
63
+ return []
64
+
65
+ def trend_detection(self):
66
+ """detect trends in time series data"""
67
+
68
+ trends = []
69
+ for date_col in self.schema['datetime']:
70
+ for num_col in self.schema['numeric']:
71
+ #group by date and calculate mean
72
+ trend_data = self.df.groupby(pd.Grouper(key=date_col, freq='M'))[num_col].mean()
73
+
74
+ if len(trend_data) > 1:
75
+ # simple trend detection: compare first and last
76
+ first_val = trend_data.iloc[0]
77
+ last_val = trend_data.iloc[-1]
78
+ percent_change = ((last_val - first_val) / first_val) * 100 if first_val != 0 else 0
79
+
80
+ trends.append({
81
+ 'column': num_col,
82
+ 'time_column': date_col,
83
+ 'percent_change': percent_change,
84
+ 'direction': 'increasing' if percent_change > 0 else 'decreasing',
85
+ 'first_value': first_val,
86
+ 'last_value': last_val
87
+ })
88
+ return trends
89
+
90
+ def group_by_analysis(self):
91
+ """analyze data by categorical groups"""
92
+
93
+ group_analysis = {}
94
+
95
+ for cat_col in self.schema['categorical']:
96
+ group_analysis[cat_col] = {}
97
+ for num_col in self.schema['numeric']:
98
+ grouped = self.df.groupby(cat_col)[num_col].agg(['mean', 'sum', 'count'])
99
+
100
+ #find top performer
101
+ top_category = grouped['mean'].idxmax() if len(grouped) > 0 else None
102
+ top_value = grouped['mean'].max() if len(grouped) > 0 else 0
103
+
104
+ group_analysis[cat_col][num_col] = {
105
+ 'grouped_data': grouped.to_dict(),
106
+ 'top_category': top_category,
107
+ 'top_value': top_value,
108
+ 'total_categories': len(grouped)
109
+ }
110
+
111
+ return group_analysis
112
+
113
+ def detect_outliers(self):
114
+ """detect outliers using IQR method"""
115
+
116
+ outliers = {}
117
+
118
+ for col in self.schema['numeric']:
119
+ Q1 = self.df[col].quantile(0.25)
120
+ Q3 = self.df[col].quantile(0.75)
121
+ IQR = Q3 - Q1
122
+ lower_bound = Q1 - 1.5 * IQR
123
+ upper_bound = Q3 + 1.5 * IQR
124
+
125
+ outlier_count = len(self.df[(self.df[col] < lower_bound) | (self.df[col] > upper_bound)])
126
+
127
+ if outlier_count > 0:
128
+ outliers[col] = {
129
+ 'count': outlier_count,
130
+ 'percentage': (outlier_count / len(self.df)) * 100,
131
+ 'lower_bound': lower_bound,
132
+ 'upper_bound': upper_bound
133
+ }
134
+
135
+ return outliers
136
+
137
+ def get_distributions(self):
138
+ """get distribution information for numeric columns"""
139
+
140
+ distributions = {}
141
+
142
+ for col in self.schema['numeric']:
143
+ distributions[col] = {
144
+ 'skewness': self.df[col].skew(),
145
+ 'kurtosis': self.df[col].kurtosis(),
146
+ 'unique_values': self.df[col].nunique()
147
+ }
148
+
149
+ #determine distribution shape
150
+ skew = distributions[col]['skewness']
151
+ if skew > 1:
152
+ distributions[col]['shape'] = 'right-skewed'
153
+ elif skew < -1:
154
+ distributions[col]['shape'] = 'left-skewed'
155
+ else:
156
+ distributions[col]['shape'] = 'approximately normal'
157
+
158
+ return distributions
159
+
app/chart_customizer.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Chart Customizer - Let users choose chart types
3
+ """
4
+
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
7
+ import pandas as pd
8
+
9
+ class ChartCustomizer:
10
+ def __init__(self, df):
11
+ self.df = df
12
+
13
+ def get_available_charts(self):
14
+ """Return available chart types based on data"""
15
+ charts = []
16
+
17
+ if len(self.df.select_dtypes(include=['number']).columns) > 0:
18
+ charts.append('๐Ÿ“Š Histogram')
19
+ charts.append('๐Ÿ“ˆ Line Chart')
20
+ charts.append('๐Ÿ“‰ Scatter Plot')
21
+ charts.append('๐Ÿ“ฆ Box Plot')
22
+
23
+ if len(self.df.select_dtypes(include=['object']).columns) > 0:
24
+ charts.append('๐Ÿฅง Bar Chart')
25
+ charts.append('๐Ÿฉ Pie Chart')
26
+
27
+ if len(self.df.select_dtypes(include=['datetime64']).columns) > 0:
28
+ charts.append('๐Ÿ“… Time Series')
29
+
30
+ charts.append('๐Ÿ”ฅ Heatmap')
31
+
32
+ return charts
33
+
34
+ def create_chart(self, chart_type, x_col, y_col=None, color_col=None, title=None):
35
+ """Create customized chart"""
36
+
37
+ if title is None:
38
+ title = f"{chart_type}: {x_col}"
39
+ if y_col:
40
+ title += f" vs {y_col}"
41
+
42
+ # Histogram
43
+ if 'Histogram' in chart_type:
44
+ fig = px.histogram(
45
+ self.df, x=x_col,
46
+ title=title,
47
+ color=color_col if color_col else None,
48
+ nbins=30,
49
+ color_discrete_sequence=px.colors.sequential.Plasma
50
+ )
51
+
52
+ # Bar Chart
53
+ elif 'Bar Chart' in chart_type:
54
+ if y_col and y_col in self.df.columns:
55
+ # Grouped bar chart
56
+ agg_data = self.df.groupby(x_col)[y_col].mean().reset_index()
57
+ fig = px.bar(
58
+ agg_data, x=x_col, y=y_col,
59
+ title=title,
60
+ color=color_col if color_col else None,
61
+ color_discrete_sequence=px.colors.qualitative.Set2
62
+ )
63
+ else:
64
+ # Count bar chart
65
+ counts = self.df[x_col].value_counts().head(20).reset_index()
66
+ counts.columns = [x_col, 'count']
67
+ fig = px.bar(
68
+ counts, x=x_col, y='count',
69
+ title=f"Count of {x_col}",
70
+ color_discrete_sequence=['#2E86AB']
71
+ )
72
+
73
+ # Line Chart
74
+ elif 'Line Chart' in chart_type:
75
+ if y_col and y_col in self.df.columns:
76
+ fig = px.line(
77
+ self.df, x=x_col, y=y_col,
78
+ title=title,
79
+ color=color_col if color_col else None,
80
+ markers=True
81
+ )
82
+ else:
83
+ fig = px.line(
84
+ self.df, x=x_col,
85
+ title=title,
86
+ markers=True
87
+ )
88
+
89
+ # Scatter Plot (without trendline to avoid statsmodels)
90
+ elif 'Scatter' in chart_type:
91
+ if y_col and y_col in self.df.columns:
92
+ fig = px.scatter(
93
+ self.df, x=x_col, y=y_col,
94
+ title=title,
95
+ color=color_col if color_col else None,
96
+ size=y_col if y_col else None,
97
+ hover_data=[x_col, y_col] if y_col else [x_col]
98
+ # Removed trendline to avoid statsmodels
99
+ )
100
+ else:
101
+ fig = px.scatter(
102
+ self.df, x=x_col, y=x_col,
103
+ title=title,
104
+ color=color_col if color_col else None
105
+ )
106
+
107
+ # Box Plot
108
+ elif 'Box' in chart_type:
109
+ if y_col and y_col in self.df.columns:
110
+ fig = px.box(
111
+ self.df, x=x_col, y=y_col,
112
+ title=title,
113
+ color=color_col if color_col else None,
114
+ points="all"
115
+ )
116
+ else:
117
+ fig = px.box(
118
+ self.df, y=x_col,
119
+ title=f"Box Plot of {x_col}",
120
+ points="all"
121
+ )
122
+
123
+ # Pie Chart
124
+ elif 'Pie' in chart_type:
125
+ counts = self.df[x_col].value_counts().head(10).reset_index()
126
+ counts.columns = [x_col, 'count']
127
+ fig = px.pie(
128
+ counts, values='count', names=x_col,
129
+ title=f"Distribution of {x_col}",
130
+ hole=0.3
131
+ )
132
+
133
+ # Heatmap
134
+ elif 'Heatmap' in chart_type:
135
+ numeric_cols = self.df.select_dtypes(include=['number']).columns
136
+ if len(numeric_cols) > 1:
137
+ corr = self.df[numeric_cols].corr()
138
+ fig = px.imshow(
139
+ corr,
140
+ text_auto='.2f',
141
+ aspect='auto',
142
+ color_continuous_scale='RdBu',
143
+ title="Correlation Heatmap"
144
+ )
145
+ else:
146
+ return None
147
+
148
+ # Time Series
149
+ elif 'Time Series' in chart_type:
150
+ date_cols = self.df.select_dtypes(include=['datetime64']).columns
151
+ if len(date_cols) > 0:
152
+ date_col = date_cols[0]
153
+ if y_col and y_col in self.df.columns:
154
+ time_data = self.df.groupby(date_col)[y_col].mean().reset_index()
155
+ fig = px.line(
156
+ time_data, x=date_col, y=y_col,
157
+ title=f"{y_col} Over Time",
158
+ markers=True
159
+ )
160
+ else:
161
+ fig = None
162
+ else:
163
+ fig = None
164
+
165
+ else:
166
+ fig = None
167
+
168
+ if fig:
169
+ # Apply common styling
170
+ fig.update_layout(
171
+ template='plotly_white',
172
+ height=500,
173
+ title_font_size=16,
174
+ title_x=0.5
175
+ )
176
+
177
+ return fig
app/dashboard.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ###____________ Chart selection logic and dashboard generation___________
2
+
3
+
4
+ import plotly.express as px
5
+ import plotly.graph_objects as go
6
+ from plotly.subplots import make_subplots
7
+ import pandas as pd
8
+
9
+ class DashboardGenerator:
10
+ def __init__(self, df, schema):
11
+ self.df = df
12
+ self.schema = schema
13
+ self.charts = []
14
+
15
+ def generate_all_charts(self):
16
+ """
17
+ Generate appropriate charts for each column type
18
+ """
19
+ print(" Generating charts...")
20
+
21
+ # Numeric columns - Histograms
22
+ for col in self.schema['numeric'][:5]: # Limit to 5 charts
23
+ fig = self.create_histogram(col)
24
+ self.charts.append({
25
+ 'title': f'Distribution of {col}',
26
+ 'figure': fig,
27
+ 'type': 'histogram'
28
+ })
29
+
30
+ # Categorical columns - Bar charts (top 10)
31
+ for col in self.schema['categorical'][:3]:
32
+ fig = self.create_bar_chart(col)
33
+ self.charts.append({
34
+ 'title': f'Top values in {col}',
35
+ 'figure': fig,
36
+ 'type': 'bar'
37
+ })
38
+
39
+ # Time series - Line charts
40
+ for date_col in self.schema['datetime']:
41
+ for num_col in self.schema['numeric'][:2]:
42
+ fig = self.create_time_series(date_col, num_col)
43
+ self.charts.append({
44
+ 'title': f'{num_col} over time',
45
+ 'figure': fig,
46
+ 'type': 'line'
47
+ })
48
+
49
+ # Correlation heatmap
50
+ if len(self.schema['numeric']) >= 2:
51
+ fig = self.create_correlation_heatmap()
52
+ self.charts.append({
53
+ 'title': 'Correlation Heatmap',
54
+ 'figure': fig,
55
+ 'type': 'heatmap'
56
+ })
57
+
58
+ return self.charts
59
+
60
+ def create_histogram(self, column):
61
+ """Create histogram for numeric column"""
62
+ fig = px.histogram(
63
+ self.df,
64
+ x=column,
65
+ title=f'Distribution of {column}',
66
+ color_discrete_sequence=['#2E86AB'],
67
+ nbins=30
68
+ )
69
+ fig.update_layout(
70
+ showlegend=False,
71
+ height=400,
72
+ template='plotly_white'
73
+ )
74
+ return fig
75
+
76
+ def create_bar_chart(self, column):
77
+ """Create bar chart for categorical column"""
78
+ value_counts = self.df[column].value_counts().head(10)
79
+
80
+ fig = px.bar(
81
+ x=value_counts.values,
82
+ y=value_counts.index,
83
+ orientation='h',
84
+ title=f'Top 10 values in {column}',
85
+ color=value_counts.values,
86
+ color_continuous_scale='Blues'
87
+ )
88
+ fig.update_layout(
89
+ xaxis_title='Count',
90
+ yaxis_title=column,
91
+ height=400,
92
+ template='plotly_white'
93
+ )
94
+ return fig
95
+
96
+ def create_time_series(self, date_col, value_col):
97
+ """Create time series line chart"""
98
+ # Group by date
99
+ time_data = self.df.groupby(pd.Grouper(key=date_col, freq='D'))[value_col].mean().reset_index()
100
+
101
+ fig = px.line(
102
+ time_data,
103
+ x=date_col,
104
+ y=value_col,
105
+ title=f'{value_col} over time',
106
+ markers=True
107
+ )
108
+ fig.update_layout(
109
+ xaxis_title='Date',
110
+ yaxis_title=value_col,
111
+ height=400,
112
+ template='plotly_white'
113
+ )
114
+ return fig
115
+
116
+ def create_correlation_heatmap(self):
117
+ """Create correlation heatmap"""
118
+ corr_matrix = self.df[self.schema['numeric']].corr()
119
+
120
+ fig = px.imshow(
121
+ corr_matrix,
122
+ text_auto='.2f',
123
+ aspect='auto',
124
+ color_continuous_scale='RdBu',
125
+ title='Correlation Heatmap'
126
+ )
127
+ fig.update_layout(
128
+ height=500,
129
+ template='plotly_white'
130
+ )
131
+ return fig
132
+
133
+ def create_key_metrics(self):
134
+ """
135
+ Create KPI cards for important metrics
136
+ """
137
+ metrics = []
138
+
139
+ for col in self.schema['numeric'][:4]: # Top 4 numeric columns
140
+ mean_val = self.df[col].mean()
141
+ std_val = self.df[col].std()
142
+ min_val = self.df[col].min()
143
+ max_val = self.df[col].max()
144
+
145
+ metrics.append({
146
+ 'name': col.upper(),
147
+ 'value': f"{mean_val:,.0f}",
148
+ 'change': f"ยฑ{std_val:,.0f}",
149
+ 'min': f"{min_val:,.0f}",
150
+ 'max': f"{max_val:,.0f}",
151
+ 'type': 'average'
152
+ })
153
+
154
+ return metrics
155
+
156
+ def create_summary_table(self):
157
+ """
158
+ Create summary statistics table
159
+ """
160
+ summary = []
161
+ for col in self.schema['numeric']:
162
+ summary.append({
163
+ 'Column': col,
164
+ 'Mean': round(self.df[col].mean(), 2),
165
+ 'Median': round(self.df[col].median(), 2),
166
+ 'Std Dev': round(self.df[col].std(), 2),
167
+ 'Min': round(self.df[col].min(), 2),
168
+ 'Max': round(self.df[col].max(), 2)
169
+ })
170
+
171
+ return pd.DataFrame(summary)
app/data_processor.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## data ingestion & preprocessing & schema detection
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+ from pathlib import Path
6
+ import json
7
+
8
+ class DataProcessor:
9
+ def __init__(self):
10
+ self.df = None
11
+ self.schema = {}
12
+
13
+ def load_data(self, file_path):
14
+ ##______________load csv or json file________________________
15
+ file_ext = Path(file_path).suffix.lower()
16
+
17
+ if file_ext == '.csv':
18
+ self.df = pd.read_csv(file_path)
19
+ elif file_ext == '.json':
20
+ self.df = pd.read_json(file_path)
21
+ else:
22
+ raise ValueError("Unsupported file type. Use CSV or JSON file")
23
+
24
+ return self.df
25
+
26
+ def load_from_upload(self, uploaded_file):
27
+ ###__________load from stramlit upload_____________
28
+
29
+ if uploaded_file.name.endswith('.csv'):
30
+ self.df = pd.read_csv(uploaded_file)
31
+ elif uploaded_file.name.endswith('.json'):
32
+ self.df = pd.read_json(uploaded_file)
33
+ else:
34
+ raise ValueError("Unsupported file type")
35
+
36
+ return self.df
37
+
38
+ def preprocess(self):
39
+ """
40
+ Step 2: Clean the data - Enhanced version
41
+ """
42
+ print("๐Ÿ”„ Preprocessing data...")
43
+
44
+ # FIRST: Replace '?' and other placeholders with NaN
45
+ placeholder_values = ['?', 'None', 'null', 'NULL', 'NaN', 'nan', '', ' ', 'Unknown', 'unknown']
46
+ self.df = self.df.replace(placeholder_values, pd.NA)
47
+
48
+ # Remove duplicate rows
49
+ initial_rows = len(self.df)
50
+ self.df = self.df.drop_duplicates()
51
+ print(f" Removed {initial_rows - len(self.df)} duplicates")
52
+
53
+ # Handle missing values
54
+ missing_before = self.df.isnull().sum().sum()
55
+
56
+ # For numeric columns: fill with median
57
+ numeric_cols = self.df.select_dtypes(include=[np.number]).columns
58
+ for col in numeric_cols:
59
+ self.df[col] = self.df[col].fillna(self.df[col].median())
60
+
61
+ # For categorical columns: fill with mode or 'Unknown'
62
+ categorical_cols = self.df.select_dtypes(include=['object']).columns
63
+ for col in categorical_cols:
64
+ if not self.df[col].isnull().all():
65
+ mode_val = self.df[col].mode()
66
+ if len(mode_val) > 0:
67
+ self.df[col] = self.df[col].fillna(mode_val[0])
68
+ else:
69
+ self.df[col] = self.df[col].fillna("Unknown")
70
+
71
+ missing_after = self.df.isnull().sum().sum()
72
+ print(f" Filled {missing_before - missing_after} missing values")
73
+
74
+ # Convert data types intelligently
75
+ self._convert_types()
76
+
77
+ return self.df
78
+
79
+ def _convert_types(self):
80
+ ##________auto-convert data typpes_______
81
+
82
+ # try to convert object columns to datetime
83
+ for col in self.df.columns:
84
+ if self.df[col].dtype == 'object':
85
+ try:
86
+ self.df[col] = pd.to_datetime(self.df[col])
87
+ print(f" Converted {col} to datetime")
88
+ except:
89
+ pass
90
+
91
+ def detect_schema(self):
92
+ """
93
+ Step 3: Detect schema - identify column types
94
+ """
95
+ self.schema = {
96
+ 'numeric': [],
97
+ 'categorical': [],
98
+ 'datetime': [],
99
+ 'text': []
100
+ }
101
+
102
+ for col in self.df.columns:
103
+ if pd.api.types.is_datetime64_any_dtype(self.df[col]):
104
+ self.schema['datetime'].append(col)
105
+ elif pd.api.types.is_numeric_dtype(self.df[col]):
106
+ self.schema['numeric'].append(col)
107
+ elif pd.api.types.is_object_dtype(self.df[col]):
108
+ # Check if it's categorical (few unique values)
109
+ unique_ratio = self.df[col].nunique() / len(self.df)
110
+ # Lower threshold to catch more categories (0.05 = 5%)
111
+ if unique_ratio < 0.5: # Changed from 0.05 to 0.5 to catch product, category, region
112
+ self.schema['categorical'].append(col)
113
+ else:
114
+ self.schema['text'].append(col)
115
+
116
+ print("\n๐Ÿ“Š Schema Detected:")
117
+ print(f" Numeric columns: {self.schema['numeric']}")
118
+ print(f" Categorical columns: {self.schema['categorical']}")
119
+ print(f" Date columns: {self.schema['datetime']}")
120
+
121
+ return self.schema
122
+
123
+ def get_summary(self):
124
+ ##__________get basic data summary_________
125
+
126
+ return{
127
+ 'rows': len(self.df),
128
+ 'columns': len(self.df.columns),
129
+ 'column_names': list(self.df.columns),
130
+ 'missing_values': self.df.isnull().sum().to_dict(),
131
+ 'memory_usage': self.df.memory_usage(deep=True).sum() / 1024**2 # MB
132
+ }
app/export_utils.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Export Utilities - CSV, Excel, and REAL Power BI export
3
+ """
4
+
5
+ import pandas as pd
6
+ import io
7
+ import json
8
+ from datetime import datetime
9
+
10
+ class ExportUtils:
11
+ def __init__(self, df):
12
+ self.df = df
13
+
14
+ def to_csv(self):
15
+ """Export to CSV"""
16
+ return self.df.to_csv(index=False).encode('utf-8')
17
+
18
+ def to_excel(self):
19
+ """Export to Excel with formatting"""
20
+ output = io.BytesIO()
21
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
22
+ # Write main data
23
+ self.df.to_excel(writer, sheet_name='Data', index=False)
24
+
25
+ # Add summary sheet
26
+ numeric_cols = self.df.select_dtypes(include=['number']).columns
27
+ if len(numeric_cols) > 0:
28
+ summary = self.df[numeric_cols].describe()
29
+ summary.to_excel(writer, sheet_name='Summary', index=True)
30
+
31
+ # Add column info sheet
32
+ col_info = pd.DataFrame({
33
+ 'Column': self.df.columns,
34
+ 'Type': self.df.dtypes.astype(str),
35
+ 'Nulls': self.df.isnull().sum(),
36
+ 'Unique': self.df.nunique()
37
+ })
38
+ col_info.to_excel(writer, sheet_name='Column Info', index=False)
39
+
40
+ output.seek(0)
41
+ return output.getvalue()
42
+
43
+ def to_powerbi_ready(self):
44
+ """Prepare data for Power BI - Creates CSV optimized for Power BI"""
45
+ df_powerbi = self.df.copy()
46
+
47
+ # Clean column names (Power BI friendly)
48
+ df_powerbi.columns = [col.replace(' ', '_').replace('-', '_').replace('/', '_') for col in df_powerbi.columns]
49
+
50
+ # Clean datetime columns for Power BI
51
+ for col in df_powerbi.columns:
52
+ if 'datetime' in col.lower() or 'date' in col.lower() or 'time' in col.lower():
53
+ try:
54
+ df_powerbi[col] = pd.to_datetime(df_powerbi[col])
55
+ except:
56
+ pass
57
+
58
+ # Convert to CSV for Power BI import
59
+ return df_powerbi.to_csv(index=False).encode('utf-8')
60
+
61
+ def to_powerbi_with_metadata(self):
62
+ """Export to Power BI with metadata file"""
63
+ # Main data CSV
64
+ data_csv = self.to_powerbi_ready()
65
+
66
+ # Create metadata JSON
67
+ numeric_cols = self.df.select_dtypes(include=['number']).columns
68
+ categorical_cols = self.df.select_dtypes(include=['object']).columns
69
+ date_cols = self.df.select_dtypes(include=['datetime64']).columns
70
+
71
+ metadata = {
72
+ 'export_date': datetime.now().isoformat(),
73
+ 'table_name': 'Cleaned_Data',
74
+ 'row_count': len(self.df),
75
+ 'column_count': len(self.df.columns),
76
+ 'columns': list(self.df.columns),
77
+ 'numeric_columns': list(numeric_cols),
78
+ 'categorical_columns': list(categorical_cols),
79
+ 'date_columns': list(date_cols),
80
+ 'recommended_measures': {},
81
+ 'recommended_visuals': []
82
+ }
83
+
84
+ # Add recommended measures
85
+ for col in numeric_cols[:10]:
86
+ metadata['recommended_measures'][f'Total_{col}'] = f'SUM(Cleaned_Data[{col}])'
87
+ metadata['recommended_measures'][f'Average_{col}'] = f'AVERAGE(Cleaned_Data[{col}])'
88
+
89
+ # Add recommended visuals
90
+ if len(categorical_cols) > 0 and len(numeric_cols) > 0:
91
+ metadata['recommended_visuals'].append({
92
+ 'type': 'bar_chart',
93
+ 'category': categorical_cols[0],
94
+ 'value': numeric_cols[0],
95
+ 'title': f'{numeric_cols[0]} by {categorical_cols[0]}'
96
+ })
97
+
98
+ if len(date_cols) > 0 and len(numeric_cols) > 0:
99
+ metadata['recommended_visuals'].append({
100
+ 'type': 'line_chart',
101
+ 'date': date_cols[0],
102
+ 'value': numeric_cols[0],
103
+ 'title': f'{numeric_cols[0]} Over Time'
104
+ })
105
+
106
+ metadata_json = json.dumps(metadata, indent=2).encode('utf-8')
107
+
108
+ return {
109
+ 'data': data_csv,
110
+ 'metadata': metadata_json,
111
+ 'instructions': self._get_powerbi_instructions()
112
+ }
113
+
114
+ def _get_powerbi_instructions(self):
115
+ """Get step-by-step Power BI import instructions"""
116
+ instructions = """
117
+ === POWER BI IMPORT INSTRUCTIONS ===
118
+
119
+ METHOD 1: Direct Import (Recommended)
120
+ 1. Open Power BI Desktop
121
+ 2. Click "Get Data" โ†’ "Text/CSV"
122
+ 3. Select the exported CSV file
123
+ 4. Click "Load"
124
+ 5. Power BI will auto-detect data types
125
+
126
+ METHOD 2: Advanced Import
127
+ 1. Click "Get Data" โ†’ "More..."
128
+ 2. Search for "CSV" or "Text"
129
+ 3. Select your file
130
+ 4. Configure:
131
+ - First row as headers: YES
132
+ - Data type detection: Based on first 200 rows
133
+ 5. Click "Load"
134
+
135
+ === AFTER IMPORT ===
136
+
137
+ Recommended DAX Measures to Create:
138
+
139
+ """
140
+ return instructions
141
+
142
+ def to_powerbi_zip(self):
143
+ """Create a zip file with all Power BI resources"""
144
+ import zipfile
145
+
146
+ output = io.BytesIO()
147
+ with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zipf:
148
+ # Add data CSV
149
+ data_csv = self.to_powerbi_ready()
150
+ zipf.writestr('data.csv', data_csv)
151
+
152
+ # Add metadata
153
+ powerbi_data = self.to_powerbi_with_metadata()
154
+ zipf.writestr('metadata.json', powerbi_data['metadata'])
155
+
156
+ # Add instructions
157
+ zipf.writestr('instructions.txt', powerbi_data['instructions'])
158
+
159
+ # Add sample DAX file
160
+ dax_content = self._generate_dax_file()
161
+ zipf.writestr('measures.dax', dax_content)
162
+
163
+ output.seek(0)
164
+ return output.getvalue()
165
+
166
+ def _generate_dax_file(self):
167
+ """Generate DAX file for Power BI"""
168
+ numeric_cols = self.df.select_dtypes(include=['number']).columns
169
+
170
+ dax = f"""// DAX Measures for Power BI
171
+ // Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
172
+ // Table Name: Cleaned_Data
173
+
174
+ // ============ BASIC MEASURES ============
175
+
176
+ Total Records = COUNTROWS(Cleaned_Data)
177
+
178
+ """
179
+
180
+ for col in numeric_cols[:15]:
181
+ dax += f"""
182
+ // {col} Measures
183
+ Total {col} = SUM(Cleaned_Data[{col}])
184
+ Average {col} = AVERAGE(Cleaned_Data[{col}])
185
+ Min {col} = MIN(Cleaned_Data[{col}])
186
+ Max {col} = MAX(Cleaned_Data[{col}])
187
+
188
+ """
189
+
190
+ dax += """
191
+ // ============ HOW TO USE ============
192
+ // 1. In Power BI, go to "Modeling" tab
193
+ // 2. Click "New Measure"
194
+ // 3. Copy-paste any measure above
195
+ // 4. Press Enter to save
196
+
197
+ // ============ EXAMPLE VISUALS ============
198
+ // - Card Visual: Total Records
199
+ // - Bar Chart: Category vs Total Sales
200
+ // - Line Chart: Date vs Average Value
201
+ """
202
+
203
+ return dax
204
+
205
+ def to_json(self):
206
+ """Export to JSON"""
207
+ return self.df.to_json(orient='records', indent=2).encode('utf-8')
208
+
209
+ def get_powerbi_template(self):
210
+ """Get Power BI DAX template (legacy - kept for compatibility)"""
211
+ numeric_cols = self.df.select_dtypes(include=['number']).columns
212
+ categorical_cols = self.df.select_dtypes(include=['object']).columns
213
+
214
+ template = f"""// Power BI DAX Template for your data
215
+ // Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
216
+ // Table name: Cleaned_Data
217
+
218
+ // ============ BASIC MEASURES ============
219
+
220
+ Total Records = COUNTROWS(Cleaned_Data)
221
+
222
+ """
223
+
224
+ for col in numeric_cols[:10]:
225
+ template += f"""
226
+ Total {col} = SUM(Cleaned_Data[{col}])
227
+ Average {col} = AVERAGE(Cleaned_Data[{col}])
228
+ """
229
+
230
+ template += """
231
+ // ============ HOW TO USE ============
232
+ // 1. Export your data as CSV first
233
+ // 2. In Power BI: Get Data โ†’ CSV โ†’ Select your file
234
+ // 3. Go to Modeling tab โ†’ New Measure
235
+ // 4. Copy and paste any measure above
236
+ // 5. Drag measures to visuals
237
+
238
+ // ============ RECOMMENDED VISUALS ============
239
+ """
240
+
241
+ if len(categorical_cols) > 0 and len(numeric_cols) > 0:
242
+ template += f"""
243
+ - Bar Chart: {categorical_cols[0]} vs {numeric_cols[0]}
244
+ """
245
+
246
+ if len(self.df.select_dtypes(include=['datetime64']).columns) > 0:
247
+ template += f"""
248
+ - Line Chart: Date vs {numeric_cols[0] if len(numeric_cols) > 0 else 'Value'}
249
+ """
250
+
251
+ return template
app/insight_generator.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##________generate natural language insights from analysis_________##
2
+
3
+ import os
4
+ import json
5
+ from typing import Dict, Any
6
+
7
+
8
+ ##___________________________
9
+ class InsightGenerator:
10
+ def __init__(self, use_openai=False, api_key=None):
11
+ self.use_openai = use_openai
12
+ if use_openai and api_key:
13
+ import openai
14
+ openai.api_key = api_key
15
+ self.openai = openai
16
+ else:
17
+ print(" Using template-based insight generation")
18
+
19
+ def generate_insights(self, df, schema, analysis):
20
+ """generate human readable insights"""
21
+ insights = []
22
+
23
+ # 1.dataset overview
24
+ insights.append(f" **Dataset Overview**: Your dataset has {len(df)} rows and {len(df.columns)} columns.")
25
+
26
+ # 2. Key statistics
27
+ insights.extend(self._generate_statistical_insights(analysis['descriptive_stats']))
28
+
29
+ # 3. Correlation insights
30
+ insights.extend(self._generate_correlation_insights(analysis['correlations']))
31
+
32
+ # 4. Trend insights
33
+ insights.extend(self._generate_trend_insights(analysis['trends']))
34
+
35
+ # 5. Group analysis insights
36
+ insights.extend(self._generate_group_insights(analysis['group_analysis']))
37
+
38
+ # 6. Outlier insights
39
+ insights.extend(self._generate_outlier_insights(analysis['outliers']))
40
+
41
+ # 7. Distribution insights
42
+ insights.extend(self._generate_distribution_insights(analysis['distributions']))
43
+
44
+ # 8. Actionable recommendations
45
+ insights.extend(self._generate_recommendations(analysis))
46
+
47
+ return insights
48
+
49
+ def _generate_statistical_insights(self, stats):
50
+ """generate insights from descriptive statistics"""
51
+
52
+ insights = []
53
+
54
+ for col, values in stats.items():
55
+ if values['mean'] > values['median'] * 1.2:
56
+ insights.append(f" **{col}** is right-skewed (mean {values['mean']:.2f} > median {values['median']:.2f}), suggesting some high values pulling the average up.")
57
+ elif values['median'] > values['mean'] * 1.2:
58
+ insights.append(f" **{col}** is left-skewed (median {values['median']:.2f} > mean {values['mean']:.2f}).")
59
+
60
+ return insights[:3] ### limit to top 3
61
+
62
+ def _generate_correlation_insights(self, correlations):
63
+ """generate insights from correlations"""
64
+ insights = []
65
+
66
+ for corr in correlations[:3]: # Top 3 correlations
67
+ strength = "strong positive" if corr['strength'] == 'positive' else "strong negative"
68
+ insights.append(f" **{corr['col1']}** and **{corr['col2']}** show a {strength} correlation ({corr['correlation']:.2f}).")
69
+
70
+ if corr['strength'] == 'positive':
71
+ insights.append(f" โ†’ When {corr['col1']} increases, {corr['col2']} tends to increase as well.")
72
+ else:
73
+ insights.append(f" โ†’ When {corr['col1']} increases, {corr['col2']} tends to decrease.")
74
+
75
+ return insights
76
+
77
+ def _generate_trend_insights(self, trends):
78
+ """generate insights from trends"""
79
+
80
+ insights =[]
81
+
82
+ for trend in trends:
83
+ direction = "increased" if trend['direction'] == 'increasing' else "decreased"
84
+ change_abs = abs(trend['percent_change'])
85
+
86
+ if change_abs > 20:
87
+ insights.append(f" **{trend['column']}** has {direction} significantly by {change_abs:.1f}% over time.")
88
+ elif change_abs > 5:
89
+ insights.append(f" **{trend['column']}** has {direction} by {change_abs:.1f}% over the period.")
90
+
91
+ return insights
92
+
93
+ def _generate_group_insights(self, group_analysis):
94
+ """generate insights from group analysis"""
95
+
96
+ insights = []
97
+
98
+ for cat_col, analyses in group_analysis.items():
99
+ for num_col, analysis in analyses.items():
100
+ if analysis['top_category']:
101
+ insights.append(f" **{analysis['top_category']}** is the top performer in {cat_col} for {num_col} with {analysis['top_value']:.2f}.")
102
+
103
+ return insights[:3]
104
+
105
+ def _generate_outlier_insights(self, outliers):
106
+ """generate insights about outliers"""
107
+
108
+ insights = []
109
+
110
+ for col, data in outliers.items():
111
+ if data['percentage'] < 5:
112
+ insights.append(f" **{col}** contains {data['count']} outliers ({data['percentage']:.1f}% of data). These might be worth investigating.")
113
+
114
+ return insights
115
+
116
+ def _generate_distribution_insights(self, distributions):
117
+ """generate insights about distributions"""
118
+
119
+ insights = []
120
+
121
+ for col, dist in distributions.items():
122
+ if dist['shape'] != 'approximately normal':
123
+ insights.append(f" **{col}** has a {dist['shape']} distribution (skewness: {dist['skewness']:.2f}).")
124
+
125
+ return insights[:2]
126
+
127
+ def _generate_recommendations(self, analysis):
128
+ """generate actionable recommendations"""
129
+ recommendations = []
130
+
131
+ # Check for opportunities
132
+ if analysis['correlations']:
133
+ strong_corr = analysis['correlations'][0]
134
+ if strong_corr['strength'] == 'positive':
135
+ recommendations.append(f" **Recommendation**: Focus on increasing {strong_corr['col1']} to potentially boost {strong_corr['col2']}.")
136
+
137
+ # Check for declining trends
138
+ for trend in analysis['trends']:
139
+ if trend['direction'] == 'decreasing' and abs(trend['percent_change']) > 10:
140
+ recommendations.append(f" **Action Required**: {trend['column']} is declining. Consider investigating causes.")
141
+ break
142
+
143
+ if not recommendations:
144
+ recommendations.append(" **Status**: No urgent issues detected. Continue monitoring key metrics.")
145
+
146
+ return recommendations
147
+
148
+ def generate_openai_insights(self, df_summary, analysis):
149
+ """use OpenAI to generate insights"""
150
+
151
+ if not self.use_openai:
152
+ return self.generate_insights(df_summary, analysis)
153
+
154
+ prompt = f"""
155
+ You are a data analyst. Analyze this dataset and provide key business insights:
156
+
157
+ Dataset: {df_summary['rows']} rows, {df_summary['columns']} columns
158
+ Columns: {df_summary['column_names']}
159
+
160
+ Key Statistics: {analysis.get('descriptive_stats', {})}
161
+ Correlations: {analysis.get('correlations', [])}
162
+ Trends: {analysis.get('trends', [])}
163
+
164
+ Provide:
165
+ 1. Top 3 key findings
166
+ 2. One actionable recommendation
167
+ 3. One question the user should explore further
168
+
169
+ Keep it concise and business-friendly.
170
+ """
171
+
172
+ try:
173
+ response = self.openai.ChatCompletion.create(
174
+ model="gpt-3.5-turbo",
175
+ messages=[{"role": "user", "content": prompt}],
176
+ max_tokens=300
177
+ )
178
+ return [response.choices[0].message.content]
179
+ except Exception as e:
180
+ print(f"OpenAI error: {e}")
181
+ return self.generate_insights(df_summary, analysis)
app/main.py ADDED
@@ -0,0 +1,646 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Smart Analytics Copilot - Complete Version
3
+ With Export, OpenAI, Save/Load, Chart Customization, Power BI Export
4
+ """
5
+
6
+ import streamlit as st
7
+ import pandas as pd
8
+ import os
9
+ from datetime import datetime
10
+ from dotenv import load_dotenv
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+
15
+ from data_processor import DataProcessor
16
+ from analyzer import Analyzer
17
+ from insight_generator import InsightGenerator
18
+ from dashboard import DashboardGenerator
19
+ from query_engine import QueryEngine
20
+ from export_utils import ExportUtils
21
+ from session_manager import SessionManager
22
+ from chart_customizer import ChartCustomizer
23
+
24
+ # Page config
25
+ st.set_page_config(
26
+ page_title="Smart Analytics Copilot",
27
+ page_icon="๐Ÿš€",
28
+ layout="wide",
29
+ initial_sidebar_state="expanded"
30
+ )
31
+
32
+ # ============ DARK THEME CSS ============
33
+ st.markdown("""
34
+ <style>
35
+ /* Main background */
36
+ .stApp {
37
+ background-color: #0a0e17 !important;
38
+ }
39
+
40
+ /* All text - light color */
41
+ .stMarkdown, .stMarkdown p, .stMarkdown div, .stMarkdown span,
42
+ .stText, p, div, span, label {
43
+ color: #e8e8e8 !important;
44
+ }
45
+
46
+ /* Headers */
47
+ h1, h2, h3, h4, h5, h6 {
48
+ color: #00ff9d !important;
49
+ font-weight: 600 !important;
50
+ }
51
+
52
+ /* Main header */
53
+ .main-header {
54
+ font-size: 2.8rem;
55
+ font-weight: bold;
56
+ background: linear-gradient(135deg, #00ff9d 0%, #00d4ff 100%);
57
+ -webkit-background-clip: text;
58
+ -webkit-text-fill-color: transparent;
59
+ margin-bottom: 1rem;
60
+ text-align: center;
61
+ }
62
+
63
+ /* Sidebar */
64
+ .css-1d391kg, .stSidebar, .sidebar-content {
65
+ background-color: #111827 !important;
66
+ }
67
+
68
+ /* Metrics */
69
+ div[data-testid="stMetricValue"] {
70
+ color: #00ff9d !important;
71
+ font-size: 2rem !important;
72
+ font-weight: bold !important;
73
+ }
74
+
75
+ div[data-testid="stMetricLabel"] {
76
+ color: #a0aec0 !important;
77
+ font-size: 0.9rem !important;
78
+ }
79
+
80
+ /* Tabs */
81
+ .stTabs [data-baseweb="tab-list"] {
82
+ gap: 4px;
83
+ background-color: #111827;
84
+ border-radius: 10px;
85
+ padding: 6px;
86
+ }
87
+
88
+ .stTabs [data-baseweb="tab"] {
89
+ background-color: #1f2937;
90
+ border-radius: 8px;
91
+ padding: 8px 24px;
92
+ color: #e8e8e8 !important;
93
+ }
94
+
95
+ .stTabs [aria-selected="true"] {
96
+ background: linear-gradient(135deg, #00ff9d 0%, #00d4ff 100%) !important;
97
+ color: #0a0e17 !important;
98
+ font-weight: bold;
99
+ }
100
+
101
+ /* Buttons */
102
+ .stButton button {
103
+ background: linear-gradient(135deg, #00ff9d 0%, #00d4ff 100%) !important;
104
+ color: #0a0e17 !important;
105
+ font-weight: bold !important;
106
+ border: none !important;
107
+ border-radius: 8px !important;
108
+ }
109
+
110
+ /* File uploader */
111
+ .stFileUploader {
112
+ background-color: #1f2937 !important;
113
+ border: 2px dashed #374151 !important;
114
+ border-radius: 12px !important;
115
+ }
116
+
117
+ /* Expander */
118
+ .streamlit-expanderHeader {
119
+ background-color: #1f2937 !important;
120
+ color: #00ff9d !important;
121
+ border-radius: 8px;
122
+ }
123
+
124
+ /* Success/Info/Warning boxes */
125
+ .stAlert {
126
+ background-color: #1f2937 !important;
127
+ border: 1px solid #374151 !important;
128
+ border-radius: 10px !important;
129
+ }
130
+
131
+ .stAlert p, .stAlert div {
132
+ color: #e8e8e8 !important;
133
+ }
134
+
135
+ /* Dataframe */
136
+ .stDataFrame {
137
+ background-color: #111827 !important;
138
+ }
139
+
140
+ .stDataFrame thead th {
141
+ background-color: #1f2937 !important;
142
+ color: #00ff9d !important;
143
+ }
144
+
145
+ /* Text input */
146
+ .stTextInput input {
147
+ background-color: #1f2937 !important;
148
+ color: #e8e8e8 !important;
149
+ border: 1px solid #374151 !important;
150
+ border-radius: 8px !important;
151
+ }
152
+
153
+ /* Select box */
154
+ .stSelectbox div[data-baseweb="select"] {
155
+ background-color: #1f2937 !important;
156
+ border-color: #374151 !important;
157
+ }
158
+
159
+ /* Download button */
160
+ .stDownloadButton button {
161
+ background: linear-gradient(135deg, #00ff9d 0%, #00d4ff 100%) !important;
162
+ color: #0a0e17 !important;
163
+ }
164
+ </style>
165
+ """, unsafe_allow_html=True)
166
+
167
+ # Initialize session state
168
+ if 'data_loaded' not in st.session_state:
169
+ st.session_state.data_loaded = False
170
+ if 'df' not in st.session_state:
171
+ st.session_state.df = None
172
+ if 'schema' not in st.session_state:
173
+ st.session_state.schema = None
174
+ if 'analysis' not in st.session_state:
175
+ st.session_state.analysis = None
176
+ if 'insights' not in st.session_state:
177
+ st.session_state.insights = None
178
+ if 'charts' not in st.session_state:
179
+ st.session_state.charts = None
180
+ if 'use_openai' not in st.session_state:
181
+ st.session_state.use_openai = False
182
+
183
+ # Initialize managers
184
+ session_mgr = SessionManager()
185
+
186
+
187
+ def main():
188
+ st.markdown('<div class="main-header">๐Ÿš€ Smart Analytics Copilot</div>', unsafe_allow_html=True)
189
+ st.caption("โœจ Upload any CSV/JSON - AI analyzes, visualizes, and answers questions")
190
+ st.markdown("---")
191
+
192
+ # Sidebar
193
+ with st.sidebar:
194
+ st.markdown("### ๐Ÿ“ Data Source")
195
+
196
+ # Data source selection
197
+ source = st.radio("Choose data source:", ["๐Ÿ“ค Upload File", "๐Ÿ’พ Load Saved Session"])
198
+
199
+ if source == "๐Ÿ“ค Upload File":
200
+ uploaded_file = st.file_uploader("Choose CSV or JSON", type=['csv', 'json'])
201
+ if uploaded_file and not st.session_state.data_loaded:
202
+ with st.spinner("๐Ÿ”„ Processing your data..."):
203
+ process_data(uploaded_file)
204
+ else:
205
+ # Load saved sessions
206
+ sessions = session_mgr.list_sessions()
207
+ if sessions:
208
+ session_names = [s['name'] for s in sessions]
209
+ selected_session = st.selectbox("Select saved session:", session_names)
210
+ if st.button("๐Ÿ“‚ Load Session"):
211
+ with st.spinner("Loading..."):
212
+ load_session(selected_session)
213
+ else:
214
+ st.info("No saved sessions found")
215
+
216
+ st.markdown("---")
217
+
218
+ # Settings
219
+ with st.expander("โš™๏ธ Settings"):
220
+ st.session_state.use_openai = st.checkbox("Use OpenAI (better insights)",
221
+ value=st.session_state.use_openai)
222
+ if st.session_state.use_openai:
223
+ api_key = st.text_input("OpenAI API Key:", type="password")
224
+ if api_key:
225
+ os.environ['OPENAI_API_KEY'] = api_key
226
+ st.success("API Key set!")
227
+
228
+ st.markdown("---")
229
+
230
+ # Export section (only if data loaded)
231
+ if st.session_state.data_loaded:
232
+ st.markdown("### ๐Ÿ’พ Export Options")
233
+ export_utils = ExportUtils(st.session_state.df)
234
+
235
+ export_format = st.selectbox("Export format:",
236
+ ["CSV", "Excel", "JSON", "Power BI CSV", "Power BI ZIP (Complete)"])
237
+
238
+ if st.button("๐Ÿ“ฅ Download"):
239
+ if export_format == "CSV":
240
+ data = export_utils.to_csv()
241
+ mime = "text/csv"
242
+ ext = "csv"
243
+ elif export_format == "Excel":
244
+ data = export_utils.to_excel()
245
+ mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
246
+ ext = "xlsx"
247
+ elif export_format == "JSON":
248
+ data = export_utils.to_json()
249
+ mime = "application/json"
250
+ ext = "json"
251
+ elif export_format == "Power BI CSV":
252
+ data = export_utils.to_powerbi_ready()
253
+ mime = "text/csv"
254
+ ext = "csv"
255
+ else: # Power BI ZIP (Complete)
256
+ data = export_utils.to_powerbi_zip()
257
+ mime = "application/zip"
258
+ ext = "zip"
259
+
260
+ st.download_button(
261
+ label="โœ… Click to Download",
262
+ data=data,
263
+ file_name=f"export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.{ext}",
264
+ mime=mime
265
+ )
266
+
267
+ # Save session button
268
+ st.markdown("---")
269
+ if st.button("๐Ÿ’พ Save Current Session"):
270
+ name, path = session_mgr.save_session(st.session_state.df, st.session_state.schema)
271
+ st.success(f"โœ… Session saved as: {name}")
272
+
273
+ # Main content
274
+ if st.session_state.data_loaded:
275
+ tab1, tab2, tab3, tab4, tab5 = st.tabs([
276
+ "๐Ÿ“Š Dashboard", "๐Ÿ’ก AI Insights", "๐ŸŽจ Custom Charts", "๐Ÿ” Query", "๐Ÿ“‹ Data"
277
+ ])
278
+
279
+ with tab1:
280
+ show_dashboard()
281
+
282
+ with tab2:
283
+ show_insights()
284
+
285
+ with tab3:
286
+ show_chart_customizer()
287
+
288
+ with tab4:
289
+ show_query_interface()
290
+
291
+ with tab5:
292
+ show_data_preview()
293
+
294
+ else:
295
+ show_welcome()
296
+
297
+
298
+ def process_data(uploaded_file):
299
+ """Process uploaded data"""
300
+ try:
301
+ processor = DataProcessor()
302
+ st.session_state.df = processor.load_from_upload(uploaded_file)
303
+ st.session_state.df = processor.preprocess()
304
+ st.session_state.schema = processor.detect_schema()
305
+
306
+ analyzer = Analyzer(st.session_state.df, st.session_state.schema)
307
+ st.session_state.analysis = analyzer.run_full_analysis()
308
+
309
+ # Use OpenAI if enabled
310
+ api_key = os.environ.get('OPENAI_API_KEY')
311
+ insight_gen = InsightGenerator(use_openai=st.session_state.use_openai, api_key=api_key)
312
+ st.session_state.insights = insight_gen.generate_insights(
313
+ st.session_state.df,
314
+ st.session_state.schema,
315
+ st.session_state.analysis
316
+ )
317
+
318
+ dashboard_gen = DashboardGenerator(st.session_state.df, st.session_state.schema)
319
+ st.session_state.charts = dashboard_gen.generate_all_charts()
320
+
321
+ st.session_state.data_loaded = True
322
+ st.success(f"โœ… Successfully loaded {len(st.session_state.df):,} rows with {len(st.session_state.df.columns)} columns")
323
+ st.balloons()
324
+ st.rerun()
325
+ except Exception as e:
326
+ st.error(f"Error: {e}")
327
+
328
+
329
+ def load_session(session_name):
330
+ """Load saved session and regenerate insights"""
331
+ session = session_mgr.load_session(session_name)
332
+ if session:
333
+ st.session_state.df = session['df']
334
+ st.session_state.schema = session['schema']
335
+
336
+ # Regenerate analysis and insights for loaded session
337
+ with st.spinner("๐Ÿ”„ Regenerating analysis..."):
338
+ analyzer = Analyzer(st.session_state.df, st.session_state.schema)
339
+ st.session_state.analysis = analyzer.run_full_analysis()
340
+
341
+ # Regenerate insights
342
+ api_key = os.environ.get('OPENAI_API_KEY')
343
+ insight_gen = InsightGenerator(use_openai=st.session_state.use_openai, api_key=api_key)
344
+ st.session_state.insights = insight_gen.generate_insights(
345
+ st.session_state.df,
346
+ st.session_state.schema,
347
+ st.session_state.analysis
348
+ )
349
+
350
+ # Regenerate charts
351
+ dashboard_gen = DashboardGenerator(st.session_state.df, st.session_state.schema)
352
+ st.session_state.charts = dashboard_gen.generate_all_charts()
353
+
354
+ st.session_state.data_loaded = True
355
+ st.success(f"โœ… Loaded session: {session_name}")
356
+ st.rerun()
357
+ else:
358
+ st.error("Failed to load session")
359
+
360
+
361
+ def show_dashboard():
362
+ """Display dashboard"""
363
+ st.markdown("### ๐Ÿ“ˆ Key Metrics")
364
+ st.markdown("---")
365
+
366
+ # Check if data exists
367
+ if st.session_state.df is None:
368
+ st.warning("No data loaded. Please upload a file first.")
369
+ return
370
+
371
+ # Display metrics
372
+ if st.session_state.schema['numeric']:
373
+ cols = st.columns(min(4, len(st.session_state.schema['numeric'])))
374
+ for idx, col in enumerate(st.session_state.schema['numeric'][:4]):
375
+ with cols[idx]:
376
+ total = st.session_state.df[col].sum()
377
+ avg = st.session_state.df[col].mean()
378
+ st.metric(
379
+ label=f"๐Ÿ’ฐ {col.upper()}",
380
+ value=f"{total:,.0f}",
381
+ delta=f"Avg: {avg:,.0f}"
382
+ )
383
+
384
+ st.markdown("---")
385
+ st.markdown("### ๐Ÿ“Š Visualizations")
386
+
387
+ if st.session_state.charts:
388
+ for chart in st.session_state.charts[:4]:
389
+ st.plotly_chart(chart['figure'], use_container_width=True)
390
+ else:
391
+ st.info("No charts available. Try uploading data first.")
392
+
393
+ st.markdown("---")
394
+ st.markdown("### ๐Ÿ“‹ Summary Statistics")
395
+ if st.session_state.schema['numeric']:
396
+ summary = st.session_state.df[st.session_state.schema['numeric']].describe()
397
+ st.dataframe(summary, use_container_width=True)
398
+
399
+
400
+ def show_insights():
401
+ """Display AI insights"""
402
+ st.markdown("### ๐Ÿง  AI-Powered Insights")
403
+ st.markdown("Here's what we discovered in your data:")
404
+ st.markdown("---")
405
+
406
+ # Check if insights exist
407
+ if st.session_state.insights is None:
408
+ st.info("๐Ÿ’ก Insights will appear after data is analyzed.")
409
+ return
410
+
411
+ for insight in st.session_state.insights:
412
+ if "Dataset" in insight:
413
+ st.info(f"๐Ÿ“Š {insight}")
414
+ elif "correlation" in insight.lower():
415
+ st.success(f"โœ… {insight}")
416
+ elif "skewed" in insight.lower():
417
+ st.warning(f"๐Ÿ“ˆ {insight}")
418
+ elif "Recommendation" in insight:
419
+ st.info(f"๐Ÿ’ก {insight}")
420
+ else:
421
+ st.markdown(f"โ€ข {insight}")
422
+
423
+ # Power BI template section
424
+ st.markdown("---")
425
+ with st.expander("๐Ÿ“Š Power BI Resources"):
426
+ export_utils = ExportUtils(st.session_state.df)
427
+
428
+ col1, col2 = st.columns(2)
429
+
430
+ with col1:
431
+ # Show DAX template
432
+ template = export_utils.get_powerbi_template()
433
+ st.code(template, language="dax")
434
+
435
+ st.download_button(
436
+ label="๐Ÿ“ฅ Download DAX Template",
437
+ data=template,
438
+ file_name="powerbi_measures.dax",
439
+ mime="text/plain"
440
+ )
441
+
442
+ with col2:
443
+ # Show instructions
444
+ instructions = """
445
+ **Power BI Import Steps:**
446
+
447
+ 1. **Export Data**: Use sidebar to export as "Power BI CSV"
448
+ 2. **Open Power BI Desktop**
449
+ 3. **Get Data** โ†’ **Text/CSV**
450
+ 4. **Select your exported CSV**
451
+ 5. **Click Load**
452
+ 6. **Copy DAX measures** from above
453
+ 7. **Create visuals** using the measures
454
+ """
455
+ st.info(instructions)
456
+
457
+
458
+ def show_chart_customizer():
459
+ """Show chart customization interface"""
460
+ st.markdown("### ๐ŸŽจ Custom Chart Builder")
461
+ st.markdown("Create your own custom visualizations")
462
+ st.markdown("---")
463
+
464
+ customizer = ChartCustomizer(st.session_state.df)
465
+ available_charts = customizer.get_available_charts()
466
+
467
+ col1, col2, col3 = st.columns([1, 1, 1])
468
+
469
+ with col1:
470
+ chart_type = st.selectbox("Chart Type:", available_charts)
471
+
472
+ with col2:
473
+ # Get appropriate columns
474
+ if 'Histogram' in chart_type or 'Box' in chart_type:
475
+ columns = st.session_state.schema['numeric']
476
+ if not columns:
477
+ columns = list(st.session_state.df.select_dtypes(include=['number']).columns)
478
+ elif 'Pie' in chart_type or 'Bar' in chart_type:
479
+ columns = st.session_state.schema['categorical']
480
+ if not columns:
481
+ columns = list(st.session_state.df.select_dtypes(include=['object']).columns)
482
+ else:
483
+ columns = list(st.session_state.df.columns)
484
+
485
+ if columns:
486
+ x_col = st.selectbox("X-Axis / Category:", columns)
487
+ else:
488
+ x_col = None
489
+ st.warning("No suitable columns found")
490
+
491
+ with col3:
492
+ # For charts that need Y-axis
493
+ if any(t in chart_type for t in ['Line', 'Scatter', 'Bar']) and 'Histogram' not in chart_type:
494
+ y_cols = ['None'] + st.session_state.schema['numeric']
495
+ y_col = st.selectbox("Y-Axis / Value:", y_cols)
496
+ y_col = None if y_col == 'None' else y_col
497
+ else:
498
+ y_col = None
499
+
500
+ # Color column (optional)
501
+ color_cols = ['None'] + st.session_state.schema['categorical']
502
+ color_col = st.selectbox("Color By (optional):", color_cols)
503
+ color_col = None if color_col == 'None' else color_col
504
+
505
+ # Title
506
+ title = st.text_input("Chart Title:", value=f"{chart_type} of {x_col if x_col else 'data'}")
507
+
508
+ if st.button("๐ŸŽจ Generate Chart", use_container_width=True):
509
+ if x_col:
510
+ with st.spinner("Creating chart..."):
511
+ fig = customizer.create_chart(chart_type, x_col, y_col, color_col, title)
512
+ if fig:
513
+ st.plotly_chart(fig, use_container_width=True)
514
+
515
+ # Download chart button
516
+ try:
517
+ st.download_button(
518
+ label="๐Ÿ“ธ Download as PNG",
519
+ data=fig.to_image(format="png"),
520
+ file_name="custom_chart.png",
521
+ mime="image/png"
522
+ )
523
+ except:
524
+ st.info("๐Ÿ’ก Install kaleido for PNG export: `pip install kaleido`")
525
+ else:
526
+ st.error("Could not create chart. Try different settings.")
527
+ else:
528
+ st.error("Please select a column for X-Axis")
529
+
530
+
531
+ def show_query_interface():
532
+ """Natural language query interface"""
533
+ st.markdown("### ๐Ÿ’ฌ Natural Language Query")
534
+ st.markdown("Ask any question about your data in plain English:")
535
+ st.markdown("---")
536
+
537
+ query_engine = QueryEngine(st.session_state.df, st.session_state.schema)
538
+
539
+ # Example questions
540
+ with st.expander("๐Ÿ” View Example Questions"):
541
+ if st.session_state.schema['numeric']:
542
+ example_col = st.session_state.schema['numeric'][0]
543
+ st.markdown(f"โ€ข 'Statistics {example_col}'")
544
+ st.markdown(f"โ€ข 'Total {example_col}'")
545
+ st.markdown(f"โ€ข 'Average {example_col}'")
546
+
547
+ if st.session_state.schema['categorical'] and st.session_state.schema['numeric']:
548
+ st.markdown(f"โ€ข 'Top 5 {st.session_state.schema['categorical'][0]} by {st.session_state.schema['numeric'][0]}'")
549
+
550
+ st.markdown("โ€ข 'Summary statistics'")
551
+ st.markdown("โ€ข 'Show me the data'")
552
+
553
+ st.markdown("---")
554
+
555
+ question = st.text_input("Ask a question:", placeholder="e.g., What is the average of time_in_hospital?")
556
+
557
+ if question:
558
+ with st.spinner("๐Ÿค” Analyzing your question..."):
559
+ answer = query_engine.answer_question(question)
560
+ st.markdown("### โœ… Answer")
561
+ st.success(answer)
562
+
563
+
564
+ def show_data_preview():
565
+ """Show data preview and info with better formatting"""
566
+ st.markdown("### ๐Ÿ“‹ Data Preview")
567
+ st.markdown("---")
568
+
569
+ col1, col2, col3 = st.columns(3)
570
+ with col1:
571
+ st.metric("๐Ÿ“Š Total Rows", f"{len(st.session_state.df):,}")
572
+ with col2:
573
+ st.metric("๐Ÿ“‹ Total Columns", len(st.session_state.df.columns))
574
+ with col3:
575
+ memory = st.session_state.df.memory_usage(deep=True).sum() / 1024**2
576
+ st.metric("๐Ÿ’พ Memory Usage", f"{memory:.2f} MB")
577
+
578
+ st.markdown("---")
579
+ st.markdown("### ๐Ÿ“„ Data Sample (First 100 rows)")
580
+
581
+ # Create a copy for display
582
+ display_df = st.session_state.df.head(100).copy()
583
+
584
+ # Clean datetime columns for better display
585
+ for col in display_df.columns:
586
+ if 'datetime' in col.lower() or 'date' in col.lower() or 'time' in col.lower():
587
+ try:
588
+ display_df[col] = pd.to_datetime(display_df[col]).dt.strftime('%Y-%m-%d %H:%M:%S')
589
+ except:
590
+ pass
591
+
592
+ st.dataframe(display_df, use_container_width=True)
593
+
594
+ st.markdown("---")
595
+ st.markdown("### ๐Ÿ“Š Column Information")
596
+
597
+ col_info = pd.DataFrame({
598
+ 'Column': st.session_state.df.columns,
599
+ 'Type': st.session_state.df.dtypes.astype(str),
600
+ 'Non-Null': st.session_state.df.count().values,
601
+ 'Nulls': st.session_state.df.isnull().sum().values,
602
+ 'Unique': st.session_state.df.nunique().values
603
+ })
604
+ st.dataframe(col_info, use_container_width=True)
605
+
606
+
607
+ def show_welcome():
608
+ """Welcome screen"""
609
+ st.markdown("""
610
+ <div style="text-align: center; padding: 2rem; background: linear-gradient(135deg, #111827 0%, #0a0e17 100%); border-radius: 20px; margin: 2rem 0;">
611
+ <h2 style="color: #00ff9d;">๐Ÿš€ Welcome to Smart Analytics Copilot</h2>
612
+ <p style="font-size: 1.1rem;">Upload any CSV or JSON file and let AI analyze it instantly</p>
613
+ <hr>
614
+ <p>๐Ÿ‘ˆ <strong>Get Started</strong>: Upload a file or load a saved session from the sidebar</p>
615
+ </div>
616
+ """, unsafe_allow_html=True)
617
+
618
+ col1, col2, col3 = st.columns(3)
619
+
620
+ with col1:
621
+ st.markdown("""
622
+ <div style="background: linear-gradient(135deg, #1f2937 0%, #111827 100%); padding: 1.5rem; border-radius: 15px; text-align: center;">
623
+ <h3 style="color: #00ff9d;">๐Ÿ“Š Auto Dashboard</h3>
624
+ <p>Smart charts based on your data</p>
625
+ </div>
626
+ """, unsafe_allow_html=True)
627
+
628
+ with col2:
629
+ st.markdown("""
630
+ <div style="background: linear-gradient(135deg, #1f2937 0%, #111827 100%); padding: 1.5rem; border-radius: 15px; text-align: center;">
631
+ <h3 style="color: #00ff9d;">๐Ÿ’ก AI Insights</h3>
632
+ <p>Natural language explanations</p>
633
+ </div>
634
+ """, unsafe_allow_html=True)
635
+
636
+ with col3:
637
+ st.markdown("""
638
+ <div style="background: linear-gradient(135deg, #1f2937 0%, #111827 100%); padding: 1.5rem; border-radius: 15px; text-align: center;">
639
+ <h3 style="color: #00ff9d;">๐ŸŽจ Custom Charts</h3>
640
+ <p>Build your own visualizations</p>
641
+ </div>
642
+ """, unsafe_allow_html=True)
643
+
644
+
645
+ if __name__ == "__main__":
646
+ main()
app/query_engine.py ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Smart Query Engine - Answers ANY question about your data
3
+ Automatically excludes ID columns and handles statistics properly
4
+ """
5
+
6
+ import pandas as pd
7
+ import re
8
+
9
+ class QueryEngine:
10
+ def __init__(self, df, schema):
11
+ self.df = df
12
+ self.schema = schema
13
+
14
+ # Filter out ID columns from numeric columns
15
+ self.numeric_columns = [col for col in self.schema['numeric'] if not self._is_id_column(col)]
16
+ self.id_columns = [col for col in self.schema['numeric'] if self._is_id_column(col)]
17
+
18
+ # Also check text columns that might be IDs
19
+ for col in self.schema['text']:
20
+ if self._is_id_column(col):
21
+ self.id_columns.append(col)
22
+
23
+ # Print warning about excluded ID columns
24
+ if self.id_columns:
25
+ print(f"โš ๏ธ Excluded ID columns from calculations: {self.id_columns}")
26
+
27
+ def _is_id_column(self, col_name):
28
+ """Check if a column is likely an ID (should not be aggregated)"""
29
+ col_lower = col_name.lower()
30
+
31
+ # Pattern-based detection
32
+ id_patterns = ['id', '_id', 'id_', 'key', '_key', 'pk', 'sk', 'uuid', 'guid',
33
+ 'code', 'number', 'nbr', '_nbr', 'patient', 'encounter']
34
+
35
+ for pattern in id_patterns:
36
+ if pattern == col_lower or col_lower.endswith(pattern) or col_lower.startswith(pattern):
37
+ return True
38
+
39
+ # Specific column names
40
+ exact_id_names = ['id', 'uid', 'uuid', 'row_id', 'record_id', 'encounter_id',
41
+ 'patient_id', 'customer_id', 'product_id', 'user_id', 'employee_id',
42
+ 'patient_nbr', 'encounter_nbr', 'member_id']
43
+ if col_lower in exact_id_names:
44
+ return True
45
+
46
+ # Uniqueness-based detection (for columns with enough data)
47
+ if len(self.df) > 10:
48
+ try:
49
+ uniqueness = self.df[col_name].nunique() / len(self.df[col_name])
50
+ # If >80% unique values, it's likely an ID
51
+ if uniqueness > 0.8:
52
+ return True
53
+ except:
54
+ pass
55
+
56
+ return False
57
+
58
+ def _get_meaningful_numeric_columns(self):
59
+ """Return only meaningful numeric columns (exclude IDs)"""
60
+ if self.numeric_columns:
61
+ return self.numeric_columns
62
+ return []
63
+
64
+ def answer_question(self, question):
65
+ """Answer ANY question about the data"""
66
+ question_lower = question.lower().strip()
67
+
68
+ # ============ STEP 1: FULL SUMMARY FIRST! ============
69
+ if any(word in question_lower for word in ['summary statistics', 'summary', 'statistics', 'describe', 'overview', 'tell me about', 'what is in', 'dataset summary']):
70
+ return self._format_full_summary()
71
+
72
+ # ============ STEP 2: STATISTICS FOR SPECIFIC COLUMN ============
73
+ stat_patterns = [
74
+ r'(?:statistics|statistic|summary|stats?|describe)\s+(\w+)',
75
+ r'(\w+)\s+(?:statistics|statistic|summary|stats?|describe)'
76
+ ]
77
+
78
+ for pattern in stat_patterns:
79
+ match = re.search(pattern, question_lower)
80
+ if match:
81
+ col_candidate = match.group(1)
82
+ for col in self.df.columns:
83
+ if col.lower() == col_candidate or col_candidate in col.lower():
84
+ return self._handle_column_statistics(col)
85
+
86
+ # ============ STEP 3: CHECK FOR ID COLUMN QUESTIONS ============
87
+ for id_col in self.id_columns:
88
+ if id_col.lower() in question_lower:
89
+ return self._handle_id_question(id_col)
90
+
91
+ # ============ STEP 4: NUMERIC CALCULATIONS ============
92
+ if any(word in question_lower for word in ['total', 'sum', 'add up', 'combined']):
93
+ result = self._handle_total_question(question_lower)
94
+ if result:
95
+ return result
96
+
97
+ if any(word in question_lower for word in ['average', 'mean', 'avg']):
98
+ result = self._handle_average_question(question_lower)
99
+ if result:
100
+ return result
101
+
102
+ if any(word in question_lower for word in ['minimum', 'min', 'lowest', 'smallest', 'least']):
103
+ result = self._handle_min_question(question_lower)
104
+ if result:
105
+ return result
106
+
107
+ if any(word in question_lower for word in ['maximum', 'max', 'highest', 'largest', 'most', 'greatest']):
108
+ result = self._handle_max_question(question_lower)
109
+ if result:
110
+ return result
111
+
112
+ if any(word in question_lower for word in ['top', 'best']):
113
+ result = self._handle_ranking_question(question_lower)
114
+ if result:
115
+ return result
116
+
117
+ # ============ STEP 5: GROUP BY ============
118
+ if any(word in question_lower for word in ['by', 'per', 'for each', 'grouped by']):
119
+ result = self._handle_group_question(question_lower)
120
+ if result:
121
+ return result
122
+
123
+ # ============ STEP 6: COUNT ============
124
+ if any(word in question_lower for word in ['count', 'how many', 'number of']):
125
+ result = self._handle_count_question(question_lower)
126
+ if result:
127
+ return result
128
+
129
+ # ============ STEP 7: DATA PREVIEW ============
130
+ if any(word in question_lower for word in ['show', 'display', 'view', 'preview', 'see', 'list']):
131
+ result = self._handle_show_question(question_lower)
132
+ if result:
133
+ return result
134
+
135
+ # ============ STEP 8: SMART RESPONSE ============
136
+ return self._smart_response(question_lower)
137
+
138
+ def _handle_column_statistics(self, col_name):
139
+ """Provide detailed statistics for a specific column"""
140
+
141
+ # Check if it's an ID column
142
+ if col_name in self.id_columns:
143
+ return f"""โš ๏ธ **'{col_name}' is an ID column**
144
+
145
+ Statistics for ID columns are not meaningful because:
146
+ โ€ข IDs are unique identifiers, not measurements
147
+ โ€ข Each ID appears only once typically
148
+
149
+ **What you CAN do:**
150
+ โ€ข Count how many IDs: "{col_name} count"
151
+ โ€ข View the data: "Show {col_name}"
152
+ โ€ข Analyze other columns: {', '.join(self._get_meaningful_numeric_columns()[:3]) if self._get_meaningful_numeric_columns() else 'None found'}"""
153
+
154
+ # Check if it's a meaningful numeric column
155
+ elif col_name in self._get_meaningful_numeric_columns():
156
+ stats = self.df[col_name].describe()
157
+ output = f"๐Ÿ“Š **Statistics for {col_name}**\n\n"
158
+ output += f"โ€ข **Count**: {stats['count']:,.0f}\n"
159
+ output += f"โ€ข **Mean**: {stats['mean']:,.2f}\n"
160
+ output += f"โ€ข **Standard Deviation**: {stats['std']:,.2f}\n"
161
+ output += f"โ€ข **Minimum**: {stats['min']:,.2f}\n"
162
+ output += f"โ€ข **25th Percentile**: {stats['25%']:,.2f}\n"
163
+ output += f"โ€ข **Median (50th)**: {stats['50%']:,.2f}\n"
164
+ output += f"โ€ข **75th Percentile**: {stats['75%']:,.2f}\n"
165
+ output += f"โ€ข **Maximum**: {stats['max']:,.2f}\n"
166
+ output += f"โ€ข **Total**: {self.df[col_name].sum():,.2f}"
167
+ return output
168
+
169
+ # Check if it's a categorical/text column
170
+ elif col_name in self.df.columns:
171
+ output = f"๐Ÿ“Š **Statistics for {col_name}**\n\n"
172
+ output += f"โ€ข **Unique values**: {self.df[col_name].nunique():,}\n"
173
+ output += f"โ€ข **Most common**: {self.df[col_name].mode()[0] if len(self.df[col_name].mode()) > 0 else 'N/A'}\n"
174
+ output += f"โ€ข **Missing values**: {self.df[col_name].isnull().sum():,}\n"
175
+ output += "\n**Top 5 values:**\n"
176
+ for val, count in self.df[col_name].value_counts().head(5).items():
177
+ output += f" โ€ข {val}: {count} ({count/len(self.df)*100:.1f}%)\n"
178
+ return output
179
+
180
+ return f"โŒ Column '{col_name}' not found. Available columns: {', '.join(self.df.columns[:10])}..."
181
+
182
+ def _handle_id_question(self, id_col):
183
+ """Handle questions about ID columns"""
184
+ unique_count = self.df[id_col].nunique()
185
+ return f"""โš ๏ธ **'{id_col}' is an ID column** (unique identifier)
186
+
187
+ Averages, sums, or other mathematical calculations on ID values are **not meaningful** because:
188
+ โ€ข IDs are just labels, not measurements
189
+ โ€ข Each ID is typically unique
190
+
191
+ **What you can do instead:**
192
+ โ€ข Count how many unique IDs: {unique_count} unique values
193
+ โ€ข Group data by other columns: "Show [category] by [metric]"
194
+ โ€ข Analyze meaningful numeric columns: {', '.join(self._get_meaningful_numeric_columns()[:3]) if self._get_meaningful_numeric_columns() else 'None found'}"""
195
+
196
+ def _handle_total_question(self, question):
197
+ """Handle total/sum questions"""
198
+ for col in self._get_meaningful_numeric_columns():
199
+ if col.lower() in question:
200
+ total = self.df[col].sum()
201
+ return f"๐Ÿ’ฐ **Total {col}**: {total:,.2f}"
202
+
203
+ if self._get_meaningful_numeric_columns():
204
+ col = self._get_meaningful_numeric_columns()[0]
205
+ total = self.df[col].sum()
206
+ return f"๐Ÿ’ฐ **Total {col}**: {total:,.2f}"
207
+ return None
208
+
209
+ def _handle_average_question(self, question):
210
+ """Handle average/mean questions"""
211
+ for col in self._get_meaningful_numeric_columns():
212
+ if col.lower() in question:
213
+ avg = self.df[col].mean()
214
+ return f"๐Ÿ“Š **Average {col}**: {avg:,.2f}"
215
+
216
+ if self._get_meaningful_numeric_columns():
217
+ col = self._get_meaningful_numeric_columns()[0]
218
+ avg = self.df[col].mean()
219
+ return f"๐Ÿ“Š **Average {col}**: {avg:,.2f}"
220
+ return None
221
+
222
+ def _handle_min_question(self, question):
223
+ """Handle minimum questions"""
224
+ for col in self._get_meaningful_numeric_columns():
225
+ if col.lower() in question:
226
+ min_val = self.df[col].min()
227
+ return f"๐Ÿ“‰ **Minimum {col}**: {min_val:,.2f}"
228
+ return None
229
+
230
+ def _handle_max_question(self, question):
231
+ """Handle maximum questions"""
232
+ for col in self._get_meaningful_numeric_columns():
233
+ if col.lower() in question:
234
+ max_val = self.df[col].max()
235
+ return f"๐Ÿ† **Maximum {col}**: {max_val:,.2f}"
236
+ return None
237
+
238
+ def _handle_ranking_question(self, question):
239
+ """Handle top/best questions"""
240
+ n_match = re.search(r'top\s+(\d+)', question)
241
+ n = int(n_match.group(1)) if n_match else 5
242
+
243
+ metric = None
244
+ for col in self._get_meaningful_numeric_columns():
245
+ if col.lower() in question:
246
+ metric = col
247
+ break
248
+
249
+ if not metric and self._get_meaningful_numeric_columns():
250
+ metric = self._get_meaningful_numeric_columns()[0]
251
+
252
+ category = None
253
+ for col in self.schema['categorical']:
254
+ if col.lower() in question:
255
+ category = col
256
+ break
257
+
258
+ if not category and self.schema['categorical']:
259
+ category = self.schema['categorical'][0]
260
+
261
+ if metric and category:
262
+ result = self.df.groupby(category)[metric].sum().sort_values(ascending=False).head(n)
263
+ output = f"๐Ÿ† **Top {n} {category} by {metric}**\n\n"
264
+ for idx, (item, val) in enumerate(result.items(), 1):
265
+ output += f"{idx}. **{item}**: {val:,.2f}\n"
266
+ return output
267
+
268
+ return None
269
+
270
+ def _handle_group_question(self, question):
271
+ """Handle group by questions"""
272
+ metric = None
273
+ category = None
274
+
275
+ for col in self._get_meaningful_numeric_columns():
276
+ if col.lower() in question:
277
+ metric = col
278
+ break
279
+
280
+ for col in self.schema['categorical']:
281
+ if col.lower() in question:
282
+ category = col
283
+ break
284
+
285
+ if metric and category:
286
+ result = self.df.groupby(category)[metric].sum().sort_values(ascending=False)
287
+ output = f"๐Ÿ“Š **{metric} by {category}**\n\n"
288
+ for idx, (item, val) in enumerate(result.items(), 1):
289
+ output += f"{idx}. **{item}**: {val:,.2f}\n"
290
+ output += f"\n**Total**: {result.sum():,.2f}"
291
+ return output
292
+
293
+ return None
294
+
295
+ def _handle_count_question(self, question):
296
+ """Handle count questions"""
297
+ for col in self.df.columns:
298
+ if col.lower() in question:
299
+ unique_count = self.df[col].nunique()
300
+ return f"๐Ÿ“Š **{col}**: {unique_count} unique values"
301
+
302
+ if 'rows' in question or 'records' in question:
303
+ return f"๐Ÿ“Š **Total records**: {len(self.df):,} rows"
304
+
305
+ return None
306
+
307
+ def _handle_show_question(self, question):
308
+ """Handle show/display questions"""
309
+ n_match = re.search(r'(\d+)', question)
310
+ n = int(n_match.group(1)) if n_match else 5
311
+
312
+ output = f"**๐Ÿ“Š Data Preview (First {n} rows)**\n\n```\n"
313
+ output += self.df.head(n).to_string()
314
+ output += "\n```"
315
+ return output
316
+
317
+ def _format_full_summary(self):
318
+ """Provide complete dataset summary"""
319
+ meaningful_numeric = self._get_meaningful_numeric_columns()
320
+
321
+ output = "๐Ÿ“Š **Complete Data Summary**\n\n"
322
+ output += f"**Dataset Size**: {len(self.df):,} rows ร— {len(self.df.columns)} columns\n\n"
323
+
324
+ output += "**Column Types:**\n"
325
+ output += f"โ€ข Meaningful numeric columns: {len(meaningful_numeric)}\n"
326
+ output += f"โ€ข ID columns (excluded): {len(self.id_columns)}\n"
327
+ output += f"โ€ข Categorical columns: {len(self.schema['categorical'])}\n"
328
+
329
+ if meaningful_numeric:
330
+ output += "\n**Key Numeric Statistics:**\n"
331
+ for col in meaningful_numeric[:5]:
332
+ output += f"โ€ข {col}: Mean={self.df[col].mean():.2f}, Total={self.df[col].sum():,.0f}\n"
333
+
334
+ if self.schema['categorical']:
335
+ output += "\n**Categorical Columns:**\n"
336
+ for col in self.schema['categorical'][:3]:
337
+ output += f"โ€ข {col}: {self.df[col].nunique()} unique values\n"
338
+
339
+ return output
340
+
341
+ def _smart_response(self, question):
342
+ """Generate intelligent response for unrecognized questions"""
343
+ meaningful_numeric = self._get_meaningful_numeric_columns()
344
+
345
+ output = "๐Ÿ’ก **I understand you're asking about your data.**\n\n"
346
+
347
+ output += "๐Ÿ“Š **Here's what's available:**\n"
348
+ output += f"โ€ข {len(self.df):,} rows, {len(self.df.columns)} columns\n"
349
+
350
+ if meaningful_numeric:
351
+ output += f"โ€ข Numeric columns to analyze: {', '.join(meaningful_numeric[:5])}\n"
352
+
353
+ if self.schema['categorical']:
354
+ output += f"โ€ข Categories to group by: {', '.join(self.schema['categorical'][:3])}\n"
355
+
356
+ output += "\n๐Ÿ“ **Try these example questions:**\n\n"
357
+
358
+ if meaningful_numeric:
359
+ example = meaningful_numeric[0]
360
+ output += f"โ€ข 'Statistics {example}'\n"
361
+ output += f"โ€ข 'Total {example}'\n"
362
+ output += f"โ€ข 'Average {example}'\n"
363
+
364
+ if self.schema['categorical'] and meaningful_numeric:
365
+ output += f"โ€ข 'Top 5 {self.schema['categorical'][0]} by {meaningful_numeric[0]}'\n"
366
+
367
+ output += "โ€ข 'Summary statistics'\n"
368
+ output += "โ€ข 'Show me the data'"
369
+
370
+ return output
app/session_manager.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Session Manager - Save and load analysis sessions
3
+ """
4
+
5
+ import json
6
+ import pickle
7
+ import os
8
+ from datetime import datetime
9
+ import pandas as pd
10
+
11
+ class SessionManager:
12
+ def __init__(self, session_dir="saved_sessions"):
13
+ self.session_dir = session_dir
14
+ os.makedirs(session_dir, exist_ok=True)
15
+
16
+ def save_session(self, df, schema, name=None):
17
+ """Save current session"""
18
+ if name is None:
19
+ name = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
20
+
21
+ session_data = {
22
+ 'name': name,
23
+ 'timestamp': datetime.now().isoformat(),
24
+ 'data': df.to_dict('records'),
25
+ 'columns': list(df.columns),
26
+ 'dtypes': df.dtypes.astype(str).to_dict(),
27
+ 'schema': schema,
28
+ 'shape': df.shape
29
+ }
30
+
31
+ filepath = os.path.join(self.session_dir, f"{name}.pkl")
32
+ with open(filepath, 'wb') as f:
33
+ pickle.dump(session_data, f)
34
+
35
+ return name, filepath
36
+
37
+ def load_session(self, name):
38
+ """Load saved session"""
39
+ filepath = os.path.join(self.session_dir, name)
40
+ if not os.path.exists(filepath):
41
+ # Try with .pkl extension
42
+ filepath = f"{filepath}.pkl"
43
+ if not os.path.exists(filepath):
44
+ return None
45
+
46
+ with open(filepath, 'rb') as f:
47
+ session_data = pickle.load(f)
48
+
49
+ # Reconstruct DataFrame
50
+ df = pd.DataFrame(session_data['data'])
51
+
52
+ return {
53
+ 'df': df,
54
+ 'schema': session_data['schema'],
55
+ 'name': session_data['name'],
56
+ 'timestamp': session_data['timestamp']
57
+ }
58
+
59
+ def list_sessions(self):
60
+ """List all saved sessions"""
61
+ sessions = []
62
+ for file in os.listdir(self.session_dir):
63
+ if file.endswith('.pkl'):
64
+ filepath = os.path.join(self.session_dir, file)
65
+ with open(filepath, 'rb') as f:
66
+ data = pickle.load(f)
67
+ sessions.append({
68
+ 'name': data['name'],
69
+ 'timestamp': data['timestamp'],
70
+ 'rows': data['shape'][0],
71
+ 'columns': data['shape'][1],
72
+ 'file': file
73
+ })
74
+ return sorted(sessions, key=lambda x: x['timestamp'], reverse=True)
75
+
76
+ def delete_session(self, name):
77
+ """Delete a saved session"""
78
+ filepath = os.path.join(self.session_dir, name)
79
+ if os.path.exists(filepath):
80
+ os.remove(filepath)
81
+ return True
82
+ return False
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Save this as requirements.txt
2
+ fastapi==0.104.1
3
+ uvicorn==0.24.0
4
+ pandas==2.1.3
5
+ numpy==1.24.3
6
+ scipy==1.11.4
7
+ plotly==5.18.0
8
+ streamlit==1.29.0
9
+ openai==1.3.0
10
+ python-multipart==0.0.6
11
+ sqlalchemy==2.0.23
12
+ jinja2==3.1.2
13
+ openai==1.3.0
14
+ openpyxl==3.1.2
15
+ python-dotenv==1.0.0
run.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Run the Smart Analytics Copilot
3
+ """
4
+
5
+ import subprocess
6
+ import sys
7
+
8
+ def main():
9
+ print("๐Ÿš€ Starting Smart Analytics Copilot...")
10
+ print("๐Ÿ“Š Your dashboard will open in your browser")
11
+ print("")
12
+
13
+ # Run streamlit
14
+ subprocess.run([
15
+ sys.executable, "-m", "streamlit", "run",
16
+ "app/main.py",
17
+ "--server.port", "8501",
18
+ "--server.address", "localhost"
19
+ ])
20
+
21
+ if __name__ == "__main__":
22
+ main()
saved_sessions/session_20260418_131145.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55f78a61b8f7c53962476b6d487b8be7b9139e4d925df527ee3ad37c5a746b00
3
+ size 95913946
saved_sessions/session_20260418_132524.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c982dc574744594a1292c45e67c38b85cb5d8b7e8a7d8f96f35350743245041
3
+ size 30056318
saved_sessions/session_20260418_135615.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f50bab79550ce31f3304228b8d0f5eac2c786b86d2f48ff09967eec9bd0c52c
3
+ size 30056318
saved_sessions/session_20260418_135934.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bac5b4a06a4abe31fcdf26c1999bcdd5fb288e5b150fdc52665de8b998be62d
3
+ size 360452