File size: 16,674 Bytes
6ed0dfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

def rgb_to_hex(r, g, b):
    return f'#{r:02X}{g:02X}{b:02X}'

def interpolate_color(start_color, end_color, factor):
    r1, g1, b1 = start_color
    r2, g2, b2 = end_color
    r = int(r1 + (r2 - r1) * factor)
    g = int(g1 + (g2 - g1) * factor)
    b = int(b1 + (b2 - b1) * factor)
    return rgb_to_hex(r, g, b)

def get_warm_to_cold_colors(n):
    hot = (124, 21, 77)  # Hot color RGB
    medium = (0, 66, 76)  # Medium color RGB
    cold = (197, 246, 235)  # Cold color RGB
    
    colors = []
    for i in range(n):
        if i < n // 2:
            factor = i / (n // 2)
            color = interpolate_color(hot, medium, factor)
        else:
            factor = (i - n // 2) / (n // 2)
            color = interpolate_color(medium, cold, factor)
        colors.append(color)
    
    return colors

PREDEFINED_COLORS = {
    'SC_Q_Origin': '#A7BCC6', #specified random color
    'SC_Q_H_state':'#A7BCC6', #specified random color
    'SC_Q_H_scope': '#A7BCC6', #specified random color
    'H_sector': '#1A636B',
    'SC_Q_H_sector': '#1A636B',
    "H_companysize": '#A7BCC6', #specified random color
    'H_revenue': '#49677B',
    'H_employee': '#125F51',
    'SC_Q_H_employee': '#125F51',
    'I_importance': '#074057',
    'IB_imp_weighted': '#5B8394',
    'I_importance_fut': '#01626F',
    'IB_imp_fut_weighted': '#39808B',
    'I_invest_share': '#7B8D24',
    'IB_invest_share_weighted': '#BCCFD6',
    'I_invest_share_fut': '#16936D',
    'IB_invest_share_fut_weighted': '#1D6073',
    'I_eneffincrease_fut': '#007B86',
    'H_energyuse': '#245B60',
    'IB_energyuse_fut': '#587081',
    'IB_energyuse_weighted': '#146153',
    'IB_energyuse_fut_weighted': '#035263',
    'H_energyuse_classes': '#66889A',
    'SC_Q_H_energyuse_classes': '#66889A',
    'SC_Q_S23_turnover_energycost': '#245B60', #specified random color  
    'H_energyintensity': '#186B77',
    'Des_Gesamtumsatz': '#4A8A95',
    'Prod_Erw': '#064B55',
    'Prod_BDB': '#A7BCC6'
}

column_name_map = {
    'H_sector': 'Sector',
    'SC_Q_H_sector': 'Sector',
    'SC_Q_Origin': 'Origin', 
    'SC_Q_H_state': 'State',
    'SC_Q_H_scope': 'SC_Q_H_scope',
    'H_revenue': 'Revenue',
    'H_employee': 'Employees',
    'SC_Q_H_employee': 'Employees', 
    "H_companysize": 'Company Size',
    'I_importance': 'Importance',
    'IB_imp_weighted': 'Weighted Importance',
    'I_importance_fut': 'Future Importance',
    'IB_imp_fut_weighted': 'Future Weighted Importance',
    'I_invest_share': 'Investment Share (Past)',
    'IB_invest_share_weighted': 'Weighted Investment Share (Past)',
    'I_invest_share_fut': 'Investment Share (Future)',
    'IB_invest_share_fut_weighted': 'Weighted Investment Share (Future)',
    'I_eneffincrease_fut': 'Future Energy Efficiency Increase',
    'H_energyuse': 'Energy Use',
    'SC_Q_S23_turnover_energycost': 'Turn over Energy Cose', #must be reviewed
    'IB_energyuse_fut': 'Future Energy Use',
    'IB_energyuse_weighted': 'Weighted Energy Use',
    'IB_energyuse_fut_weighted': 'Weighted Future Energy Use',
    'H_energyuse_classes': 'Energy Use Classes',
    "SC_Q_H_energyuse_classes": 'Energy Use Classes',
    'H_energyintensity': 'Energy Intensity',
    'Des_Gesamtumsatz': 'Total Revenue',
    'Prod_Erw': 'Product Development',
    'Prod_BDB': 'Product BDB',
}

def create_visualizations(df, selected_column):
    # Line graph
    fig_line = px.line(df, y=selected_column, title=f"Line Graph for {selected_column}")
    line_color = PREDEFINED_COLORS.get(selected_column, 'black')
    fig_line.update_traces(line=dict(color=line_color))
    fig_line.update_layout(width=800, height=400)
    st.plotly_chart(fig_line)
    
    # Bar chart
    fig_bar = px.bar(df, y=selected_column, title=f"Bar Chart for {selected_column}")
    bar_color = PREDEFINED_COLORS.get(selected_column, 'blue')
    fig_bar.update_traces(marker_color=bar_color)
    fig_bar.update_layout(width=800, height=400)
    st.plotly_chart(fig_bar)
    
    # Pie chart (for categorical data or numerical data with few unique values)
    if df[selected_column].dtype == 'object' or df[selected_column].nunique() < 10:
        value_counts = df[selected_column].value_counts()
        fig_pie = px.pie(names=value_counts.index, values=value_counts.values, title=f"Distribution of {selected_column}")
        
        # Use predefined colors for the pie chart slices
        pie_colors = [PREDEFINED_COLORS.get(c, 'grey') for c in value_counts.index]
        fig_pie.update_traces(marker=dict(colors=pie_colors))
        
        fig_pie.update_layout(width=800, height=400)
        st.plotly_chart(fig_pie)
    else:
        st.write(f"Pie chart not applicable for {selected_column} due to high number of unique values.")

# Streamlit app
st.title('Fraunhofer Database')

# File uploader
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

if uploaded_file is not None:
    # Load the data
    df = pd.read_csv(uploaded_file)
    
    # Get column names
    columns = df.columns.tolist()

    df_renamed = df.rename(columns=column_name_map)

    # Sidebar for user input
    st.sidebar.header('Options')
    view_option = st.sidebar.radio('Select View Option', 
                               ['View All Data', 'Select Columns', 'Filter Data', 'Natural Language Query', 'Visualize Data', 'General Visualization'])

    if view_option == 'View All Data':
        st.write(df)

    elif view_option == 'Select Columns':
        selected_columns = st.sidebar.multiselect('Select Columns', columns)
        if selected_columns:
            st.write(df[selected_columns])
        else:
            st.write('Please select at least one column.')

    elif view_option == 'Filter Data':
        filter_column = st.sidebar.selectbox('Select Column to Filter', columns)
        filter_operator = st.sidebar.selectbox('Select Operator', ['==', '>', '<', '>=', '<=', '!='])
        filter_value = st.sidebar.text_input('Enter Filter Value')
        
        if st.sidebar.button('Apply Filter'):
            if filter_value:
                column_type = df[filter_column].dtype
                if column_type == 'int64':
                    filter_value = int(filter_value)
                elif column_type == 'float64':
                    filter_value = float(filter_value)
                
                # Apply filter
                filtered_df = df.query(f"`{filter_column}` {filter_operator} @filter_value")
                st.write(filtered_df)
            else:
                st.write('Please enter a filter value.')

    elif view_option == 'Natural Language Query':
        query = st.text_input('Enter your query (e.g., "Show me rows where age is greater than 30")')
        if query:
            column, operator, value = parse_query(query, df)
            if column and operator and value is not None:
                filtered_df = df.query(f"`{column}` {operator} @value")
                st.write(filtered_df)
            else:
                st.write("Couldn't understand the query. Please try rephrasing.")

    elif view_option == 'Visualize Data':
        st.sidebar.subheader('Visualization Options')
        chart_type = st.sidebar.selectbox('Select Chart Type', ['Bar Chart', 'Stacked Bar Chart', 'Line Graph', 'Pie Chart'])
        
        # Sliders for figure size
        width = st.sidebar.slider('Select Figure Width', 400, 1200, 800)
        height = st.sidebar.slider('Select Figure Height', 300, 800, 600)
    
        # Bar Chart Example
        if chart_type == 'Bar Chart':
            x_axis = st.sidebar.selectbox('Select X-axis', columns)
            y_axis = st.sidebar.selectbox('Select Y-axis', columns)
            if x_axis and y_axis:
                fig = px.bar(df, x=x_axis, y=y_axis, title=f'{column_name_map.get(y_axis, "Undefined Y-axis")} by {x_axis}')
                
                # Apply predefined colors
                if y_axis in PREDEFINED_COLORS:
                    fig.update_traces(marker_color=PREDEFINED_COLORS[y_axis])
                fig.update_layout(
                    title=f'{column_name_map.get(y_axis, "Undefined Y-axis")} by {column_name_map.get(x_axis, "Undefined X-axis")}',
                    xaxis_title=column_name_map.get(x_axis, "Undefined X-axis"),
                    yaxis_title=column_name_map.get(y_axis, "Undefined Y-axis"),
                    width=width,
                    height=height
                )
                st.plotly_chart(fig)
    
        elif chart_type == 'Stacked Bar Chart':
            x_axis = st.sidebar.selectbox('Select X-axis', columns)
            y_axis = st.sidebar.selectbox('Select Y-axis', columns)
            secondary_y_axis = st.sidebar.selectbox('Select Secondary Y-axis (for stacking)', columns)
            if x_axis and y_axis and secondary_y_axis:
                fig = go.Figure(data=[
                    go.Bar(name=f'{column_name_map.get(y_axis, "Undefined Y-axis")}', x=df[x_axis], y=df[y_axis], marker_color=PREDEFINED_COLORS.get(y_axis, 'blue')),
                    go.Bar(name=f'{column_name_map.get(secondary_y_axis, "Undefined Secondary Y-axis")}', x=df[x_axis], y=df[secondary_y_axis], marker_color=PREDEFINED_COLORS.get(secondary_y_axis, 'green'))
                ])
                fig.update_layout(barmode='stack', title=f'{column_name_map.get(y_axis, "Undefined Y-axis")} and {column_name_map.get(secondary_y_axis, "Undefined Secondary Y-axis")} by {column_name_map.get(x_axis, "Undefined X-axis")}', width=width, height=height)
                st.plotly_chart(fig)
    
        elif chart_type == 'Line Graph':
            x_axis = st.sidebar.selectbox('Select X-axis for Line Graph', columns)
            y_axes = st.sidebar.multiselect('Select Y-axes for Line Graph', columns)
            if x_axis and y_axes:
                fig = go.Figure()
                
                for y_axis in y_axes:
                    fig.add_trace(go.Scatter(
                        x=df[x_axis], 
                        y=df[y_axis], 
                        mode='lines', 
                        name=column_name_map.get(y_axis, y_axis),  # Use mapped name for the legend
                        line=dict(color=PREDEFINED_COLORS.get(y_axis, 'black'))
                    ))
                
                # Update layout with custom titles
                fig.update_layout(
                    title=f'{", ".join(column_name_map.get(y_axis, y_axis) for y_axis in y_axes)} over {column_name_map.get(x_axis, x_axis)}',
                    xaxis_title=column_name_map.get(x_axis, "Undefined X-axis"),
                    yaxis_title="Values",  # You can customize this or use a different mapping
                    width=width,
                    height=height
                )
                
                st.plotly_chart(fig)
    
        # Pie Chart
        elif chart_type == 'Pie Chart':
            category_column = st.sidebar.selectbox('Select Category for Pie Chart', columns)
            values_column = st.sidebar.selectbox('Select Values for Pie Chart', columns)
            if category_column and values_column:
                st.subheader(f'Pie Chart: {column_name_map.get(values_column, values_column)} by {column_name_map.get(category_column, category_column)}')
                
                # Get the value counts
                value_counts = df.groupby(category_column)[values_column].sum().sort_values(ascending=False)
                
                # Generate warm to cold colors based on the number of slices
                n_slices = len(value_counts)
                color_map = get_warm_to_cold_colors(n_slices)
                
                # Create the pie chart using the generated color map
                fig = px.pie(df, names=value_counts.index, values=value_counts.values, 
                             title=f'{column_name_map.get(values_column, values_column)} distribution by {column_name_map.get(category_column, category_column)}',
                             color_discrete_sequence=color_map)
                
                fig.update_layout(width=width, height=height)
                st.plotly_chart(fig)

    elif view_option == 'General Visualization':
        st.subheader('General Visualization')
        
        # Function to get columns starting with a specific prefix
        def get_columns_with_prefix(df, prefix):
            return [col for col in df.columns if col.startswith(prefix)]
        
        # Get columns for each category
        h_columns = get_columns_with_prefix(df, 'H_')
        i_columns = get_columns_with_prefix(df, 'I_')
        ib_columns = get_columns_with_prefix(df, 'IB_')
        
        # Create visualizations for a selected column
        def create_visualizations(df, selected_column):
            # Line graph
            fig_line = px.line(df, y=selected_column, 
                               title=f"Line Graph for {column_name_map.get(selected_column, selected_column)}")
            line_color = PREDEFINED_COLORS.get(selected_column, 'black')
            fig_line.update_traces(line=dict(color=line_color))
            fig_line.update_layout(
                xaxis_title=column_name_map.get('x_axis', 'Index'),  # Optional: update X-axis title if relevant
                yaxis_title=column_name_map.get(selected_column, 'Y-axis'),  # Map Y-axis title
                width=800, height=400
            )
            st.plotly_chart(fig_line)
            
            # Bar chart
            fig_bar = px.bar(df, y=selected_column, 
                             title=f"Bar Chart for {column_name_map.get(selected_column, selected_column)}")
            bar_color = PREDEFINED_COLORS.get(selected_column, 'blue')
            fig_bar.update_traces(marker_color=bar_color)
            fig_bar.update_layout(
                xaxis_title=column_name_map.get('x_axis', 'Index'),  # Optional: update X-axis title if relevant
                yaxis_title=column_name_map.get(selected_column, 'Y-axis'),  # Map Y-axis title
                width=800, height=400
            )
            st.plotly_chart(fig_bar)
            
            # Pie chart (for categorical data or numerical data with few unique values)
            if df[selected_column].dtype == 'object' or df[selected_column].nunique() < 10:
                value_counts = df[selected_column].value_counts()
                fig_pie = px.pie(names=value_counts.index, values=value_counts.values, 
                                 title=f"Distribution of {column_name_map.get(selected_column, selected_column)}")
                
                # Use predefined colors for the pie chart slices
                pie_colors = [PREDEFINED_COLORS.get(c, 'grey') for c in value_counts.index]
                fig_pie.update_traces(marker=dict(colors=pie_colors))
                
                fig_pie.update_layout(width=800, height=400)
                st.plotly_chart(fig_pie)
            else:
                st.write(f"Pie chart not applicable for {column_name_map.get(selected_column, selected_column)} due to high number of unique values.")

        
        # Create tabs for different visualizations
        tabs = st.tabs(["H Columns", "I Columns", "IB Columns"])
        
        with tabs[0]:
            st.subheader("H Columns Visualization")
            h_selected = st.selectbox("Select an H column to visualize", h_columns)
            if h_selected:
                create_visualizations(df, h_selected)
        
        with tabs[1]:
            st.subheader("I Columns Visualization")
            i_selected = st.selectbox("Select an I column to visualize", i_columns)
            if i_selected:
                create_visualizations(df, i_selected)
        
        with tabs[2]:
            st.subheader("IB Columns Visualization")
            ib_selected = st.selectbox("Select an IB column to visualize", ib_columns)
            if ib_selected:
                create_visualizations(df, ib_selected)
        
        # Summary statistics
        st.subheader("Summary Statistics")
        all_selected = h_columns + i_columns + ib_columns
        if all_selected:
            # Create a new DataFrame for summary statistics with mapped column names
            summary_df = df[all_selected].describe().T  # Transpose for better readability
            summary_df.index = [column_name_map.get(col, col) for col in summary_df.index]  # Update index names

            st.write(summary_df)

else:
    st.write('Please upload your data.')

st.sidebar.info('Designed by Taha Rasouli at Fraunhofer')