import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go import numpy as np def rgb_to_hex(r, g, b): return f'#{r:02X}{g:02X}{b:02X}' def interpolate_color(start_color, end_color, factor): r1, g1, b1 = start_color r2, g2, b2 = end_color r = int(r1 + (r2 - r1) * factor) g = int(g1 + (g2 - g1) * factor) b = int(b1 + (b2 - b1) * factor) return rgb_to_hex(r, g, b) def get_warm_to_cold_colors(n): hot = (124, 21, 77) # Hot color RGB medium = (0, 66, 76) # Medium color RGB cold = (197, 246, 235) # Cold color RGB colors = [] for i in range(n): if i < n // 2: factor = i / (n // 2) color = interpolate_color(hot, medium, factor) else: factor = (i - n // 2) / (n // 2) color = interpolate_color(medium, cold, factor) colors.append(color) return colors PREDEFINED_COLORS = { 'SC_Q_Origin': '#A7BCC6', #specified random color 'SC_Q_H_state':'#A7BCC6', #specified random color 'SC_Q_H_scope': '#A7BCC6', #specified random color 'H_sector': '#1A636B', 'SC_Q_H_sector': '#1A636B', "H_companysize": '#A7BCC6', #specified random color 'H_revenue': '#49677B', 'H_employee': '#125F51', 'SC_Q_H_employee': '#125F51', 'I_importance': '#074057', 'IB_imp_weighted': '#5B8394', 'I_importance_fut': '#01626F', 'IB_imp_fut_weighted': '#39808B', 'I_invest_share': '#7B8D24', 'IB_invest_share_weighted': '#BCCFD6', 'I_invest_share_fut': '#16936D', 'IB_invest_share_fut_weighted': '#1D6073', 'I_eneffincrease_fut': '#007B86', 'H_energyuse': '#245B60', 'IB_energyuse_fut': '#587081', 'IB_energyuse_weighted': '#146153', 'IB_energyuse_fut_weighted': '#035263', 'H_energyuse_classes': '#66889A', 'SC_Q_H_energyuse_classes': '#66889A', 'SC_Q_S23_turnover_energycost': '#245B60', #specified random color 'H_energyintensity': '#186B77', 'Des_Gesamtumsatz': '#4A8A95', 'Prod_Erw': '#064B55', 'Prod_BDB': '#A7BCC6' } column_name_map = { 'H_sector': 'Sector', 'SC_Q_H_sector': 'Sector', 'SC_Q_Origin': 'Origin', 'SC_Q_H_state': 'State', 'SC_Q_H_scope': 'SC_Q_H_scope', 'H_revenue': 'Revenue', 'H_employee': 'Employees', 'SC_Q_H_employee': 'Employees', "H_companysize": 'Company Size', 'I_importance': 'Importance', 'IB_imp_weighted': 'Weighted Importance', 'I_importance_fut': 'Future Importance', 'IB_imp_fut_weighted': 'Future Weighted Importance', 'I_invest_share': 'Investment Share (Past)', 'IB_invest_share_weighted': 'Weighted Investment Share (Past)', 'I_invest_share_fut': 'Investment Share (Future)', 'IB_invest_share_fut_weighted': 'Weighted Investment Share (Future)', 'I_eneffincrease_fut': 'Future Energy Efficiency Increase', 'H_energyuse': 'Energy Use', 'SC_Q_S23_turnover_energycost': 'Turn over Energy Cose', #must be reviewed 'IB_energyuse_fut': 'Future Energy Use', 'IB_energyuse_weighted': 'Weighted Energy Use', 'IB_energyuse_fut_weighted': 'Weighted Future Energy Use', 'H_energyuse_classes': 'Energy Use Classes', "SC_Q_H_energyuse_classes": 'Energy Use Classes', 'H_energyintensity': 'Energy Intensity', 'Des_Gesamtumsatz': 'Total Revenue', 'Prod_Erw': 'Product Development', 'Prod_BDB': 'Product BDB', } def create_visualizations(df, selected_column): # Line graph fig_line = px.line(df, y=selected_column, title=f"Line Graph for {selected_column}") line_color = PREDEFINED_COLORS.get(selected_column, 'black') fig_line.update_traces(line=dict(color=line_color)) fig_line.update_layout(width=800, height=400) st.plotly_chart(fig_line) # Bar chart fig_bar = px.bar(df, y=selected_column, title=f"Bar Chart for {selected_column}") bar_color = PREDEFINED_COLORS.get(selected_column, 'blue') fig_bar.update_traces(marker_color=bar_color) fig_bar.update_layout(width=800, height=400) st.plotly_chart(fig_bar) # Pie chart (for categorical data or numerical data with few unique values) if df[selected_column].dtype == 'object' or df[selected_column].nunique() < 10: value_counts = df[selected_column].value_counts() fig_pie = px.pie(names=value_counts.index, values=value_counts.values, title=f"Distribution of {selected_column}") # Use predefined colors for the pie chart slices pie_colors = [PREDEFINED_COLORS.get(c, 'grey') for c in value_counts.index] fig_pie.update_traces(marker=dict(colors=pie_colors)) fig_pie.update_layout(width=800, height=400) st.plotly_chart(fig_pie) else: st.write(f"Pie chart not applicable for {selected_column} due to high number of unique values.") # Streamlit app st.title('Fraunhofer Database') # File uploader uploaded_file = st.file_uploader("Choose a CSV file", type="csv") if uploaded_file is not None: # Load the data df = pd.read_csv(uploaded_file) # Get column names columns = df.columns.tolist() df_renamed = df.rename(columns=column_name_map) # Sidebar for user input st.sidebar.header('Options') view_option = st.sidebar.radio('Select View Option', ['View All Data', 'Select Columns', 'Filter Data', 'Natural Language Query', 'Visualize Data', 'General Visualization']) if view_option == 'View All Data': st.write(df) elif view_option == 'Select Columns': selected_columns = st.sidebar.multiselect('Select Columns', columns) if selected_columns: st.write(df[selected_columns]) else: st.write('Please select at least one column.') elif view_option == 'Filter Data': filter_column = st.sidebar.selectbox('Select Column to Filter', columns) filter_operator = st.sidebar.selectbox('Select Operator', ['==', '>', '<', '>=', '<=', '!=']) filter_value = st.sidebar.text_input('Enter Filter Value') if st.sidebar.button('Apply Filter'): if filter_value: column_type = df[filter_column].dtype if column_type == 'int64': filter_value = int(filter_value) elif column_type == 'float64': filter_value = float(filter_value) # Apply filter filtered_df = df.query(f"`{filter_column}` {filter_operator} @filter_value") st.write(filtered_df) else: st.write('Please enter a filter value.') elif view_option == 'Natural Language Query': query = st.text_input('Enter your query (e.g., "Show me rows where age is greater than 30")') if query: column, operator, value = parse_query(query, df) if column and operator and value is not None: filtered_df = df.query(f"`{column}` {operator} @value") st.write(filtered_df) else: st.write("Couldn't understand the query. Please try rephrasing.") elif view_option == 'Visualize Data': st.sidebar.subheader('Visualization Options') chart_type = st.sidebar.selectbox('Select Chart Type', ['Bar Chart', 'Stacked Bar Chart', 'Line Graph', 'Pie Chart']) # Sliders for figure size width = st.sidebar.slider('Select Figure Width', 400, 1200, 800) height = st.sidebar.slider('Select Figure Height', 300, 800, 600) # Bar Chart Example if chart_type == 'Bar Chart': x_axis = st.sidebar.selectbox('Select X-axis', columns) y_axis = st.sidebar.selectbox('Select Y-axis', columns) if x_axis and y_axis: fig = px.bar(df, x=x_axis, y=y_axis, title=f'{column_name_map.get(y_axis, "Undefined Y-axis")} by {x_axis}') # Apply predefined colors if y_axis in PREDEFINED_COLORS: fig.update_traces(marker_color=PREDEFINED_COLORS[y_axis]) fig.update_layout( title=f'{column_name_map.get(y_axis, "Undefined Y-axis")} by {column_name_map.get(x_axis, "Undefined X-axis")}', xaxis_title=column_name_map.get(x_axis, "Undefined X-axis"), yaxis_title=column_name_map.get(y_axis, "Undefined Y-axis"), width=width, height=height ) st.plotly_chart(fig) elif chart_type == 'Stacked Bar Chart': x_axis = st.sidebar.selectbox('Select X-axis', columns) y_axis = st.sidebar.selectbox('Select Y-axis', columns) secondary_y_axis = st.sidebar.selectbox('Select Secondary Y-axis (for stacking)', columns) if x_axis and y_axis and secondary_y_axis: fig = go.Figure(data=[ go.Bar(name=f'{column_name_map.get(y_axis, "Undefined Y-axis")}', x=df[x_axis], y=df[y_axis], marker_color=PREDEFINED_COLORS.get(y_axis, 'blue')), go.Bar(name=f'{column_name_map.get(secondary_y_axis, "Undefined Secondary Y-axis")}', x=df[x_axis], y=df[secondary_y_axis], marker_color=PREDEFINED_COLORS.get(secondary_y_axis, 'green')) ]) fig.update_layout(barmode='stack', title=f'{column_name_map.get(y_axis, "Undefined Y-axis")} and {column_name_map.get(secondary_y_axis, "Undefined Secondary Y-axis")} by {column_name_map.get(x_axis, "Undefined X-axis")}', width=width, height=height) st.plotly_chart(fig) elif chart_type == 'Line Graph': x_axis = st.sidebar.selectbox('Select X-axis for Line Graph', columns) y_axes = st.sidebar.multiselect('Select Y-axes for Line Graph', columns) if x_axis and y_axes: fig = go.Figure() for y_axis in y_axes: fig.add_trace(go.Scatter( x=df[x_axis], y=df[y_axis], mode='lines', name=column_name_map.get(y_axis, y_axis), # Use mapped name for the legend line=dict(color=PREDEFINED_COLORS.get(y_axis, 'black')) )) # Update layout with custom titles fig.update_layout( title=f'{", ".join(column_name_map.get(y_axis, y_axis) for y_axis in y_axes)} over {column_name_map.get(x_axis, x_axis)}', xaxis_title=column_name_map.get(x_axis, "Undefined X-axis"), yaxis_title="Values", # You can customize this or use a different mapping width=width, height=height ) st.plotly_chart(fig) # Pie Chart elif chart_type == 'Pie Chart': category_column = st.sidebar.selectbox('Select Category for Pie Chart', columns) values_column = st.sidebar.selectbox('Select Values for Pie Chart', columns) if category_column and values_column: st.subheader(f'Pie Chart: {column_name_map.get(values_column, values_column)} by {column_name_map.get(category_column, category_column)}') # Get the value counts value_counts = df.groupby(category_column)[values_column].sum().sort_values(ascending=False) # Generate warm to cold colors based on the number of slices n_slices = len(value_counts) color_map = get_warm_to_cold_colors(n_slices) # Create the pie chart using the generated color map fig = px.pie(df, names=value_counts.index, values=value_counts.values, title=f'{column_name_map.get(values_column, values_column)} distribution by {column_name_map.get(category_column, category_column)}', color_discrete_sequence=color_map) fig.update_layout(width=width, height=height) st.plotly_chart(fig) elif view_option == 'General Visualization': st.subheader('General Visualization') # Function to get columns starting with a specific prefix def get_columns_with_prefix(df, prefix): return [col for col in df.columns if col.startswith(prefix)] # Get columns for each category h_columns = get_columns_with_prefix(df, 'H_') i_columns = get_columns_with_prefix(df, 'I_') ib_columns = get_columns_with_prefix(df, 'IB_') # Create visualizations for a selected column def create_visualizations(df, selected_column): # Line graph fig_line = px.line(df, y=selected_column, title=f"Line Graph for {column_name_map.get(selected_column, selected_column)}") line_color = PREDEFINED_COLORS.get(selected_column, 'black') fig_line.update_traces(line=dict(color=line_color)) fig_line.update_layout( xaxis_title=column_name_map.get('x_axis', 'Index'), # Optional: update X-axis title if relevant yaxis_title=column_name_map.get(selected_column, 'Y-axis'), # Map Y-axis title width=800, height=400 ) st.plotly_chart(fig_line) # Bar chart fig_bar = px.bar(df, y=selected_column, title=f"Bar Chart for {column_name_map.get(selected_column, selected_column)}") bar_color = PREDEFINED_COLORS.get(selected_column, 'blue') fig_bar.update_traces(marker_color=bar_color) fig_bar.update_layout( xaxis_title=column_name_map.get('x_axis', 'Index'), # Optional: update X-axis title if relevant yaxis_title=column_name_map.get(selected_column, 'Y-axis'), # Map Y-axis title width=800, height=400 ) st.plotly_chart(fig_bar) # Pie chart (for categorical data or numerical data with few unique values) if df[selected_column].dtype == 'object' or df[selected_column].nunique() < 10: value_counts = df[selected_column].value_counts() fig_pie = px.pie(names=value_counts.index, values=value_counts.values, title=f"Distribution of {column_name_map.get(selected_column, selected_column)}") # Use predefined colors for the pie chart slices pie_colors = [PREDEFINED_COLORS.get(c, 'grey') for c in value_counts.index] fig_pie.update_traces(marker=dict(colors=pie_colors)) fig_pie.update_layout(width=800, height=400) st.plotly_chart(fig_pie) else: st.write(f"Pie chart not applicable for {column_name_map.get(selected_column, selected_column)} due to high number of unique values.") # Create tabs for different visualizations tabs = st.tabs(["H Columns", "I Columns", "IB Columns"]) with tabs[0]: st.subheader("H Columns Visualization") h_selected = st.selectbox("Select an H column to visualize", h_columns) if h_selected: create_visualizations(df, h_selected) with tabs[1]: st.subheader("I Columns Visualization") i_selected = st.selectbox("Select an I column to visualize", i_columns) if i_selected: create_visualizations(df, i_selected) with tabs[2]: st.subheader("IB Columns Visualization") ib_selected = st.selectbox("Select an IB column to visualize", ib_columns) if ib_selected: create_visualizations(df, ib_selected) # Summary statistics st.subheader("Summary Statistics") all_selected = h_columns + i_columns + ib_columns if all_selected: # Create a new DataFrame for summary statistics with mapped column names summary_df = df[all_selected].describe().T # Transpose for better readability summary_df.index = [column_name_map.get(col, col) for col in summary_df.index] # Update index names st.write(summary_df) else: st.write('Please upload your data.') st.sidebar.info('Designed by Taha Rasouli at Fraunhofer')