SamadhiDBS commited on
Commit
7a809d2
·
verified ·
1 Parent(s): 80d4fc5

Update app/dashboard.py

Browse files
Files changed (1) hide show
  1. app/dashboard.py +166 -170
app/dashboard.py CHANGED
@@ -1,171 +1,167 @@
1
- ###____________ Chart selection logic and dashboard generation___________
2
-
3
-
4
- import plotly.express as px
5
- import plotly.graph_objects as go
6
- from plotly.subplots import make_subplots
7
- import pandas as pd
8
-
9
- class DashboardGenerator:
10
- def __init__(self, df, schema):
11
- self.df = df
12
- self.schema = schema
13
- self.charts = []
14
-
15
- def generate_all_charts(self):
16
- """
17
- Generate appropriate charts for each column type
18
- """
19
- print(" Generating charts...")
20
-
21
- # Numeric columns - Histograms
22
- for col in self.schema['numeric'][:5]: # Limit to 5 charts
23
- fig = self.create_histogram(col)
24
- self.charts.append({
25
- 'title': f'Distribution of {col}',
26
- 'figure': fig,
27
- 'type': 'histogram'
28
- })
29
-
30
- # Categorical columns - Bar charts (top 10)
31
- for col in self.schema['categorical'][:3]:
32
- fig = self.create_bar_chart(col)
33
- self.charts.append({
34
- 'title': f'Top values in {col}',
35
- 'figure': fig,
36
- 'type': 'bar'
37
- })
38
-
39
- # Time series - Line charts
40
- for date_col in self.schema['datetime']:
41
- for num_col in self.schema['numeric'][:2]:
42
- fig = self.create_time_series(date_col, num_col)
43
- self.charts.append({
44
- 'title': f'{num_col} over time',
45
- 'figure': fig,
46
- 'type': 'line'
47
- })
48
-
49
- # Correlation heatmap
50
- if len(self.schema['numeric']) >= 2:
51
- fig = self.create_correlation_heatmap()
52
- self.charts.append({
53
- 'title': 'Correlation Heatmap',
54
- 'figure': fig,
55
- 'type': 'heatmap'
56
- })
57
-
58
- return self.charts
59
-
60
- def create_histogram(self, column):
61
- """Create histogram for numeric column"""
62
- fig = px.histogram(
63
- self.df,
64
- x=column,
65
- title=f'Distribution of {column}',
66
- color_discrete_sequence=['#2E86AB'],
67
- nbins=30
68
- )
69
- fig.update_layout(
70
- showlegend=False,
71
- height=400,
72
- template='plotly_white'
73
- )
74
- return fig
75
-
76
- def create_bar_chart(self, column):
77
- """Create bar chart for categorical column"""
78
- value_counts = self.df[column].value_counts().head(10)
79
-
80
- fig = px.bar(
81
- x=value_counts.values,
82
- y=value_counts.index,
83
- orientation='h',
84
- title=f'Top 10 values in {column}',
85
- color=value_counts.values,
86
- color_continuous_scale='Blues'
87
- )
88
- fig.update_layout(
89
- xaxis_title='Count',
90
- yaxis_title=column,
91
- height=400,
92
- template='plotly_white'
93
- )
94
- return fig
95
-
96
- def create_time_series(self, date_col, value_col):
97
- """Create time series line chart"""
98
- # Group by date
99
- time_data = self.df.groupby(pd.Grouper(key=date_col, freq='D'))[value_col].mean().reset_index()
100
-
101
- fig = px.line(
102
- time_data,
103
- x=date_col,
104
- y=value_col,
105
- title=f'{value_col} over time',
106
- markers=True
107
- )
108
- fig.update_layout(
109
- xaxis_title='Date',
110
- yaxis_title=value_col,
111
- height=400,
112
- template='plotly_white'
113
- )
114
- return fig
115
-
116
- def create_correlation_heatmap(self):
117
- """Create correlation heatmap"""
118
- corr_matrix = self.df[self.schema['numeric']].corr()
119
-
120
- fig = px.imshow(
121
- corr_matrix,
122
- text_auto='.2f',
123
- aspect='auto',
124
- color_continuous_scale='RdBu',
125
- title='Correlation Heatmap'
126
- )
127
- fig.update_layout(
128
- height=500,
129
- template='plotly_white'
130
- )
131
- return fig
132
-
133
- def create_key_metrics(self):
134
- """
135
- Create KPI cards for important metrics
136
- """
137
- metrics = []
138
-
139
- for col in self.schema['numeric'][:4]: # Top 4 numeric columns
140
- mean_val = self.df[col].mean()
141
- std_val = self.df[col].std()
142
- min_val = self.df[col].min()
143
- max_val = self.df[col].max()
144
-
145
- metrics.append({
146
- 'name': col.upper(),
147
- 'value': f"{mean_val:,.0f}",
148
- 'change': f"±{std_val:,.0f}",
149
- 'min': f"{min_val:,.0f}",
150
- 'max': f"{max_val:,.0f}",
151
- 'type': 'average'
152
- })
153
-
154
- return metrics
155
-
156
- def create_summary_table(self):
157
- """
158
- Create summary statistics table
159
- """
160
- summary = []
161
- for col in self.schema['numeric']:
162
- summary.append({
163
- 'Column': col,
164
- 'Mean': round(self.df[col].mean(), 2),
165
- 'Median': round(self.df[col].median(), 2),
166
- 'Std Dev': round(self.df[col].std(), 2),
167
- 'Min': round(self.df[col].min(), 2),
168
- 'Max': round(self.df[col].max(), 2)
169
- })
170
-
171
  return pd.DataFrame(summary)
 
1
+ ###____________ Chart selection logic and dashboard generation___________
2
+
3
+
4
+ import plotly.express as px
5
+ import plotly.graph_objects as go
6
+ from plotly.subplots import make_subplots
7
+ import pandas as pd
8
+
9
+ class DashboardGenerator:
10
+ def __init__(self, df, schema):
11
+ self.df = df
12
+ self.schema = schema
13
+ self.charts = []
14
+
15
+ def generate_all_charts(self):
16
+ """
17
+ Generate appropriate charts for each column type
18
+ """
19
+ print(" Generating charts...")
20
+
21
+ #numeric columns - Histograms
22
+ for col in self.schema['numeric'][:5]: # Limit to 5 charts
23
+ fig = self.create_histogram(col)
24
+ self.charts.append({
25
+ 'title': f'Distribution of {col}',
26
+ 'figure': fig,
27
+ 'type': 'histogram'
28
+ })
29
+
30
+ # categorical columns - Bar charts (top 10)
31
+ for col in self.schema['categorical'][:3]:
32
+ fig = self.create_bar_chart(col)
33
+ self.charts.append({
34
+ 'title': f'Top values in {col}',
35
+ 'figure': fig,
36
+ 'type': 'bar'
37
+ })
38
+
39
+ # time series - Line charts
40
+ for date_col in self.schema['datetime']:
41
+ for num_col in self.schema['numeric'][:2]:
42
+ fig = self.create_time_series(date_col, num_col)
43
+ self.charts.append({
44
+ 'title': f'{num_col} over time',
45
+ 'figure': fig,
46
+ 'type': 'line'
47
+ })
48
+
49
+ # correlation heatmap
50
+ if len(self.schema['numeric']) >= 2:
51
+ fig = self.create_correlation_heatmap()
52
+ self.charts.append({
53
+ 'title': 'Correlation Heatmap',
54
+ 'figure': fig,
55
+ 'type': 'heatmap'
56
+ })
57
+
58
+ return self.charts
59
+
60
+ def create_histogram(self, column):
61
+ """Create histogram for numeric column"""
62
+ fig = px.histogram(
63
+ self.df,
64
+ x=column,
65
+ title=f'Distribution of {column}',
66
+ color_discrete_sequence=['#2E86AB'],
67
+ nbins=30
68
+ )
69
+ fig.update_layout(
70
+ showlegend=False,
71
+ height=400,
72
+ template='plotly_white'
73
+ )
74
+ return fig
75
+
76
+ def create_bar_chart(self, column):
77
+ """Create bar chart for categorical column"""
78
+ value_counts = self.df[column].value_counts().head(10)
79
+
80
+ fig = px.bar(
81
+ x=value_counts.values,
82
+ y=value_counts.index,
83
+ orientation='h',
84
+ title=f'Top 10 values in {column}',
85
+ color=value_counts.values,
86
+ color_continuous_scale='Blues'
87
+ )
88
+ fig.update_layout(
89
+ xaxis_title='Count',
90
+ yaxis_title=column,
91
+ height=400,
92
+ template='plotly_white'
93
+ )
94
+ return fig
95
+
96
+ def create_time_series(self, date_col, value_col):
97
+ """Create time series line chart"""
98
+ # group by date
99
+ time_data = self.df.groupby(pd.Grouper(key=date_col, freq='D'))[value_col].mean().reset_index()
100
+
101
+ fig = px.line(
102
+ time_data,
103
+ x=date_col,
104
+ y=value_col,
105
+ title=f'{value_col} over time',
106
+ markers=True
107
+ )
108
+ fig.update_layout(
109
+ xaxis_title='Date',
110
+ yaxis_title=value_col,
111
+ height=400,
112
+ template='plotly_white'
113
+ )
114
+ return fig
115
+
116
+ def create_correlation_heatmap(self):
117
+ """Create correlation heatmap"""
118
+ corr_matrix = self.df[self.schema['numeric']].corr()
119
+
120
+ fig = px.imshow(
121
+ corr_matrix,
122
+ text_auto='.2f',
123
+ aspect='auto',
124
+ color_continuous_scale='RdBu',
125
+ title='Correlation Heatmap'
126
+ )
127
+ fig.update_layout(
128
+ height=500,
129
+ template='plotly_white'
130
+ )
131
+ return fig
132
+
133
+ def create_key_metrics(self):
134
+ """Create KPI cards for important metrics"""
135
+ metrics = []
136
+
137
+ for col in self.schema['numeric'][:4]: # Top 4 numeric columns
138
+ mean_val = self.df[col].mean()
139
+ std_val = self.df[col].std()
140
+ min_val = self.df[col].min()
141
+ max_val = self.df[col].max()
142
+
143
+ metrics.append({
144
+ 'name': col.upper(),
145
+ 'value': f"{mean_val:,.0f}",
146
+ 'change': f"±{std_val:,.0f}",
147
+ 'min': f"{min_val:,.0f}",
148
+ 'max': f"{max_val:,.0f}",
149
+ 'type': 'average'
150
+ })
151
+
152
+ return metrics
153
+
154
+ def create_summary_table(self):
155
+ """Create summary statistics table"""
156
+ summary = []
157
+ for col in self.schema['numeric']:
158
+ summary.append({
159
+ 'Column': col,
160
+ 'Mean': round(self.df[col].mean(), 2),
161
+ 'Median': round(self.df[col].median(), 2),
162
+ 'Std Dev': round(self.df[col].std(), 2),
163
+ 'Min': round(self.df[col].min(), 2),
164
+ 'Max': round(self.df[col].max(), 2)
165
+ })
166
+
 
 
 
 
167
  return pd.DataFrame(summary)