TahaRasouli commited on
Commit
6ed0dfa
·
verified ·
1 Parent(s): 0f6b94b

Upload 2 files

Browse files
Files changed (2) hide show
  1. requirements.txt.txt +3 -0
  2. visualization_tool.py +362 -0
requirements.txt.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pandas
2
+ streamlit
3
+ plotly
visualization_tool.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ import numpy as np
6
+
7
+ def rgb_to_hex(r, g, b):
8
+ return f'#{r:02X}{g:02X}{b:02X}'
9
+
10
+ def interpolate_color(start_color, end_color, factor):
11
+ r1, g1, b1 = start_color
12
+ r2, g2, b2 = end_color
13
+ r = int(r1 + (r2 - r1) * factor)
14
+ g = int(g1 + (g2 - g1) * factor)
15
+ b = int(b1 + (b2 - b1) * factor)
16
+ return rgb_to_hex(r, g, b)
17
+
18
+ def get_warm_to_cold_colors(n):
19
+ hot = (124, 21, 77) # Hot color RGB
20
+ medium = (0, 66, 76) # Medium color RGB
21
+ cold = (197, 246, 235) # Cold color RGB
22
+
23
+ colors = []
24
+ for i in range(n):
25
+ if i < n // 2:
26
+ factor = i / (n // 2)
27
+ color = interpolate_color(hot, medium, factor)
28
+ else:
29
+ factor = (i - n // 2) / (n // 2)
30
+ color = interpolate_color(medium, cold, factor)
31
+ colors.append(color)
32
+
33
+ return colors
34
+
35
+ PREDEFINED_COLORS = {
36
+ 'SC_Q_Origin': '#A7BCC6', #specified random color
37
+ 'SC_Q_H_state':'#A7BCC6', #specified random color
38
+ 'SC_Q_H_scope': '#A7BCC6', #specified random color
39
+ 'H_sector': '#1A636B',
40
+ 'SC_Q_H_sector': '#1A636B',
41
+ "H_companysize": '#A7BCC6', #specified random color
42
+ 'H_revenue': '#49677B',
43
+ 'H_employee': '#125F51',
44
+ 'SC_Q_H_employee': '#125F51',
45
+ 'I_importance': '#074057',
46
+ 'IB_imp_weighted': '#5B8394',
47
+ 'I_importance_fut': '#01626F',
48
+ 'IB_imp_fut_weighted': '#39808B',
49
+ 'I_invest_share': '#7B8D24',
50
+ 'IB_invest_share_weighted': '#BCCFD6',
51
+ 'I_invest_share_fut': '#16936D',
52
+ 'IB_invest_share_fut_weighted': '#1D6073',
53
+ 'I_eneffincrease_fut': '#007B86',
54
+ 'H_energyuse': '#245B60',
55
+ 'IB_energyuse_fut': '#587081',
56
+ 'IB_energyuse_weighted': '#146153',
57
+ 'IB_energyuse_fut_weighted': '#035263',
58
+ 'H_energyuse_classes': '#66889A',
59
+ 'SC_Q_H_energyuse_classes': '#66889A',
60
+ 'SC_Q_S23_turnover_energycost': '#245B60', #specified random color
61
+ 'H_energyintensity': '#186B77',
62
+ 'Des_Gesamtumsatz': '#4A8A95',
63
+ 'Prod_Erw': '#064B55',
64
+ 'Prod_BDB': '#A7BCC6'
65
+ }
66
+
67
+ column_name_map = {
68
+ 'H_sector': 'Sector',
69
+ 'SC_Q_H_sector': 'Sector',
70
+ 'SC_Q_Origin': 'Origin',
71
+ 'SC_Q_H_state': 'State',
72
+ 'SC_Q_H_scope': 'SC_Q_H_scope',
73
+ 'H_revenue': 'Revenue',
74
+ 'H_employee': 'Employees',
75
+ 'SC_Q_H_employee': 'Employees',
76
+ "H_companysize": 'Company Size',
77
+ 'I_importance': 'Importance',
78
+ 'IB_imp_weighted': 'Weighted Importance',
79
+ 'I_importance_fut': 'Future Importance',
80
+ 'IB_imp_fut_weighted': 'Future Weighted Importance',
81
+ 'I_invest_share': 'Investment Share (Past)',
82
+ 'IB_invest_share_weighted': 'Weighted Investment Share (Past)',
83
+ 'I_invest_share_fut': 'Investment Share (Future)',
84
+ 'IB_invest_share_fut_weighted': 'Weighted Investment Share (Future)',
85
+ 'I_eneffincrease_fut': 'Future Energy Efficiency Increase',
86
+ 'H_energyuse': 'Energy Use',
87
+ 'SC_Q_S23_turnover_energycost': 'Turn over Energy Cose', #must be reviewed
88
+ 'IB_energyuse_fut': 'Future Energy Use',
89
+ 'IB_energyuse_weighted': 'Weighted Energy Use',
90
+ 'IB_energyuse_fut_weighted': 'Weighted Future Energy Use',
91
+ 'H_energyuse_classes': 'Energy Use Classes',
92
+ "SC_Q_H_energyuse_classes": 'Energy Use Classes',
93
+ 'H_energyintensity': 'Energy Intensity',
94
+ 'Des_Gesamtumsatz': 'Total Revenue',
95
+ 'Prod_Erw': 'Product Development',
96
+ 'Prod_BDB': 'Product BDB',
97
+ }
98
+
99
+ def create_visualizations(df, selected_column):
100
+ # Line graph
101
+ fig_line = px.line(df, y=selected_column, title=f"Line Graph for {selected_column}")
102
+ line_color = PREDEFINED_COLORS.get(selected_column, 'black')
103
+ fig_line.update_traces(line=dict(color=line_color))
104
+ fig_line.update_layout(width=800, height=400)
105
+ st.plotly_chart(fig_line)
106
+
107
+ # Bar chart
108
+ fig_bar = px.bar(df, y=selected_column, title=f"Bar Chart for {selected_column}")
109
+ bar_color = PREDEFINED_COLORS.get(selected_column, 'blue')
110
+ fig_bar.update_traces(marker_color=bar_color)
111
+ fig_bar.update_layout(width=800, height=400)
112
+ st.plotly_chart(fig_bar)
113
+
114
+ # Pie chart (for categorical data or numerical data with few unique values)
115
+ if df[selected_column].dtype == 'object' or df[selected_column].nunique() < 10:
116
+ value_counts = df[selected_column].value_counts()
117
+ fig_pie = px.pie(names=value_counts.index, values=value_counts.values, title=f"Distribution of {selected_column}")
118
+
119
+ # Use predefined colors for the pie chart slices
120
+ pie_colors = [PREDEFINED_COLORS.get(c, 'grey') for c in value_counts.index]
121
+ fig_pie.update_traces(marker=dict(colors=pie_colors))
122
+
123
+ fig_pie.update_layout(width=800, height=400)
124
+ st.plotly_chart(fig_pie)
125
+ else:
126
+ st.write(f"Pie chart not applicable for {selected_column} due to high number of unique values.")
127
+
128
+ # Streamlit app
129
+ st.title('Fraunhofer Database')
130
+
131
+ # File uploader
132
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
133
+
134
+ if uploaded_file is not None:
135
+ # Load the data
136
+ df = pd.read_csv(uploaded_file)
137
+
138
+ # Get column names
139
+ columns = df.columns.tolist()
140
+
141
+ df_renamed = df.rename(columns=column_name_map)
142
+
143
+ # Sidebar for user input
144
+ st.sidebar.header('Options')
145
+ view_option = st.sidebar.radio('Select View Option',
146
+ ['View All Data', 'Select Columns', 'Filter Data', 'Natural Language Query', 'Visualize Data', 'General Visualization'])
147
+
148
+ if view_option == 'View All Data':
149
+ st.write(df)
150
+
151
+ elif view_option == 'Select Columns':
152
+ selected_columns = st.sidebar.multiselect('Select Columns', columns)
153
+ if selected_columns:
154
+ st.write(df[selected_columns])
155
+ else:
156
+ st.write('Please select at least one column.')
157
+
158
+ elif view_option == 'Filter Data':
159
+ filter_column = st.sidebar.selectbox('Select Column to Filter', columns)
160
+ filter_operator = st.sidebar.selectbox('Select Operator', ['==', '>', '<', '>=', '<=', '!='])
161
+ filter_value = st.sidebar.text_input('Enter Filter Value')
162
+
163
+ if st.sidebar.button('Apply Filter'):
164
+ if filter_value:
165
+ column_type = df[filter_column].dtype
166
+ if column_type == 'int64':
167
+ filter_value = int(filter_value)
168
+ elif column_type == 'float64':
169
+ filter_value = float(filter_value)
170
+
171
+ # Apply filter
172
+ filtered_df = df.query(f"`{filter_column}` {filter_operator} @filter_value")
173
+ st.write(filtered_df)
174
+ else:
175
+ st.write('Please enter a filter value.')
176
+
177
+ elif view_option == 'Natural Language Query':
178
+ query = st.text_input('Enter your query (e.g., "Show me rows where age is greater than 30")')
179
+ if query:
180
+ column, operator, value = parse_query(query, df)
181
+ if column and operator and value is not None:
182
+ filtered_df = df.query(f"`{column}` {operator} @value")
183
+ st.write(filtered_df)
184
+ else:
185
+ st.write("Couldn't understand the query. Please try rephrasing.")
186
+
187
+ elif view_option == 'Visualize Data':
188
+ st.sidebar.subheader('Visualization Options')
189
+ chart_type = st.sidebar.selectbox('Select Chart Type', ['Bar Chart', 'Stacked Bar Chart', 'Line Graph', 'Pie Chart'])
190
+
191
+ # Sliders for figure size
192
+ width = st.sidebar.slider('Select Figure Width', 400, 1200, 800)
193
+ height = st.sidebar.slider('Select Figure Height', 300, 800, 600)
194
+
195
+ # Bar Chart Example
196
+ if chart_type == 'Bar Chart':
197
+ x_axis = st.sidebar.selectbox('Select X-axis', columns)
198
+ y_axis = st.sidebar.selectbox('Select Y-axis', columns)
199
+ if x_axis and y_axis:
200
+ fig = px.bar(df, x=x_axis, y=y_axis, title=f'{column_name_map.get(y_axis, "Undefined Y-axis")} by {x_axis}')
201
+
202
+ # Apply predefined colors
203
+ if y_axis in PREDEFINED_COLORS:
204
+ fig.update_traces(marker_color=PREDEFINED_COLORS[y_axis])
205
+ fig.update_layout(
206
+ title=f'{column_name_map.get(y_axis, "Undefined Y-axis")} by {column_name_map.get(x_axis, "Undefined X-axis")}',
207
+ xaxis_title=column_name_map.get(x_axis, "Undefined X-axis"),
208
+ yaxis_title=column_name_map.get(y_axis, "Undefined Y-axis"),
209
+ width=width,
210
+ height=height
211
+ )
212
+ st.plotly_chart(fig)
213
+
214
+ elif chart_type == 'Stacked Bar Chart':
215
+ x_axis = st.sidebar.selectbox('Select X-axis', columns)
216
+ y_axis = st.sidebar.selectbox('Select Y-axis', columns)
217
+ secondary_y_axis = st.sidebar.selectbox('Select Secondary Y-axis (for stacking)', columns)
218
+ if x_axis and y_axis and secondary_y_axis:
219
+ fig = go.Figure(data=[
220
+ go.Bar(name=f'{column_name_map.get(y_axis, "Undefined Y-axis")}', x=df[x_axis], y=df[y_axis], marker_color=PREDEFINED_COLORS.get(y_axis, 'blue')),
221
+ go.Bar(name=f'{column_name_map.get(secondary_y_axis, "Undefined Secondary Y-axis")}', x=df[x_axis], y=df[secondary_y_axis], marker_color=PREDEFINED_COLORS.get(secondary_y_axis, 'green'))
222
+ ])
223
+ fig.update_layout(barmode='stack', title=f'{column_name_map.get(y_axis, "Undefined Y-axis")} and {column_name_map.get(secondary_y_axis, "Undefined Secondary Y-axis")} by {column_name_map.get(x_axis, "Undefined X-axis")}', width=width, height=height)
224
+ st.plotly_chart(fig)
225
+
226
+ elif chart_type == 'Line Graph':
227
+ x_axis = st.sidebar.selectbox('Select X-axis for Line Graph', columns)
228
+ y_axes = st.sidebar.multiselect('Select Y-axes for Line Graph', columns)
229
+ if x_axis and y_axes:
230
+ fig = go.Figure()
231
+
232
+ for y_axis in y_axes:
233
+ fig.add_trace(go.Scatter(
234
+ x=df[x_axis],
235
+ y=df[y_axis],
236
+ mode='lines',
237
+ name=column_name_map.get(y_axis, y_axis), # Use mapped name for the legend
238
+ line=dict(color=PREDEFINED_COLORS.get(y_axis, 'black'))
239
+ ))
240
+
241
+ # Update layout with custom titles
242
+ fig.update_layout(
243
+ title=f'{", ".join(column_name_map.get(y_axis, y_axis) for y_axis in y_axes)} over {column_name_map.get(x_axis, x_axis)}',
244
+ xaxis_title=column_name_map.get(x_axis, "Undefined X-axis"),
245
+ yaxis_title="Values", # You can customize this or use a different mapping
246
+ width=width,
247
+ height=height
248
+ )
249
+
250
+ st.plotly_chart(fig)
251
+
252
+ # Pie Chart
253
+ elif chart_type == 'Pie Chart':
254
+ category_column = st.sidebar.selectbox('Select Category for Pie Chart', columns)
255
+ values_column = st.sidebar.selectbox('Select Values for Pie Chart', columns)
256
+ if category_column and values_column:
257
+ st.subheader(f'Pie Chart: {column_name_map.get(values_column, values_column)} by {column_name_map.get(category_column, category_column)}')
258
+
259
+ # Get the value counts
260
+ value_counts = df.groupby(category_column)[values_column].sum().sort_values(ascending=False)
261
+
262
+ # Generate warm to cold colors based on the number of slices
263
+ n_slices = len(value_counts)
264
+ color_map = get_warm_to_cold_colors(n_slices)
265
+
266
+ # Create the pie chart using the generated color map
267
+ fig = px.pie(df, names=value_counts.index, values=value_counts.values,
268
+ title=f'{column_name_map.get(values_column, values_column)} distribution by {column_name_map.get(category_column, category_column)}',
269
+ color_discrete_sequence=color_map)
270
+
271
+ fig.update_layout(width=width, height=height)
272
+ st.plotly_chart(fig)
273
+
274
+ elif view_option == 'General Visualization':
275
+ st.subheader('General Visualization')
276
+
277
+ # Function to get columns starting with a specific prefix
278
+ def get_columns_with_prefix(df, prefix):
279
+ return [col for col in df.columns if col.startswith(prefix)]
280
+
281
+ # Get columns for each category
282
+ h_columns = get_columns_with_prefix(df, 'H_')
283
+ i_columns = get_columns_with_prefix(df, 'I_')
284
+ ib_columns = get_columns_with_prefix(df, 'IB_')
285
+
286
+ # Create visualizations for a selected column
287
+ def create_visualizations(df, selected_column):
288
+ # Line graph
289
+ fig_line = px.line(df, y=selected_column,
290
+ title=f"Line Graph for {column_name_map.get(selected_column, selected_column)}")
291
+ line_color = PREDEFINED_COLORS.get(selected_column, 'black')
292
+ fig_line.update_traces(line=dict(color=line_color))
293
+ fig_line.update_layout(
294
+ xaxis_title=column_name_map.get('x_axis', 'Index'), # Optional: update X-axis title if relevant
295
+ yaxis_title=column_name_map.get(selected_column, 'Y-axis'), # Map Y-axis title
296
+ width=800, height=400
297
+ )
298
+ st.plotly_chart(fig_line)
299
+
300
+ # Bar chart
301
+ fig_bar = px.bar(df, y=selected_column,
302
+ title=f"Bar Chart for {column_name_map.get(selected_column, selected_column)}")
303
+ bar_color = PREDEFINED_COLORS.get(selected_column, 'blue')
304
+ fig_bar.update_traces(marker_color=bar_color)
305
+ fig_bar.update_layout(
306
+ xaxis_title=column_name_map.get('x_axis', 'Index'), # Optional: update X-axis title if relevant
307
+ yaxis_title=column_name_map.get(selected_column, 'Y-axis'), # Map Y-axis title
308
+ width=800, height=400
309
+ )
310
+ st.plotly_chart(fig_bar)
311
+
312
+ # Pie chart (for categorical data or numerical data with few unique values)
313
+ if df[selected_column].dtype == 'object' or df[selected_column].nunique() < 10:
314
+ value_counts = df[selected_column].value_counts()
315
+ fig_pie = px.pie(names=value_counts.index, values=value_counts.values,
316
+ title=f"Distribution of {column_name_map.get(selected_column, selected_column)}")
317
+
318
+ # Use predefined colors for the pie chart slices
319
+ pie_colors = [PREDEFINED_COLORS.get(c, 'grey') for c in value_counts.index]
320
+ fig_pie.update_traces(marker=dict(colors=pie_colors))
321
+
322
+ fig_pie.update_layout(width=800, height=400)
323
+ st.plotly_chart(fig_pie)
324
+ else:
325
+ st.write(f"Pie chart not applicable for {column_name_map.get(selected_column, selected_column)} due to high number of unique values.")
326
+
327
+
328
+ # Create tabs for different visualizations
329
+ tabs = st.tabs(["H Columns", "I Columns", "IB Columns"])
330
+
331
+ with tabs[0]:
332
+ st.subheader("H Columns Visualization")
333
+ h_selected = st.selectbox("Select an H column to visualize", h_columns)
334
+ if h_selected:
335
+ create_visualizations(df, h_selected)
336
+
337
+ with tabs[1]:
338
+ st.subheader("I Columns Visualization")
339
+ i_selected = st.selectbox("Select an I column to visualize", i_columns)
340
+ if i_selected:
341
+ create_visualizations(df, i_selected)
342
+
343
+ with tabs[2]:
344
+ st.subheader("IB Columns Visualization")
345
+ ib_selected = st.selectbox("Select an IB column to visualize", ib_columns)
346
+ if ib_selected:
347
+ create_visualizations(df, ib_selected)
348
+
349
+ # Summary statistics
350
+ st.subheader("Summary Statistics")
351
+ all_selected = h_columns + i_columns + ib_columns
352
+ if all_selected:
353
+ # Create a new DataFrame for summary statistics with mapped column names
354
+ summary_df = df[all_selected].describe().T # Transpose for better readability
355
+ summary_df.index = [column_name_map.get(col, col) for col in summary_df.index] # Update index names
356
+
357
+ st.write(summary_df)
358
+
359
+ else:
360
+ st.write('Please upload your data.')
361
+
362
+ st.sidebar.info('Designed by Taha Rasouli at Fraunhofer')