AskMyData / app.py
wweavishayaknin's picture
Update app.py
20d0d43 verified
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import random
# Function to upload the data from the user
def load_data():
st.sidebar.header("Upload Your Data")
uploaded_file = st.sidebar.file_uploader("Choose a CSV or Excel file", type=["csv", "xlsx"])
if uploaded_file is not None:
if uploaded_file.name.endswith('.csv'):
data = pd.read_csv(uploaded_file)
elif uploaded_file.name.endswith('.xlsx'):
data = pd.read_excel(uploaded_file)
# Try to convert date columns to datetime if they exist
for col in data.columns:
if 'ืชื›ื ื•ืŸ' in col or 'ื‘ื™ืฆื•ืข' in col or 'ื”ื’ืขื”' in col:
try:
data[col] = pd.to_datetime(data[col], errors='coerce') # 'coerce' will convert invalid dates to NaT
except Exception as e:
pass # skip columns that can't be converted
return data
else:
st.warning("Please upload a CSV or Excel file to proceed.")
return None
# Function to calculate delay
def calculate_delay(data):
if 'ืฉืขืช ื‘ื™ืฆื•ืข (ืžื‘ื“ืง)' in data.columns and 'ืฉืขืช ื”ื’ืขื” (ืžื‘ื“ืง)' in data.columns:
data['ืฉืขืช ื‘ื™ืฆื•ืข (ืžื‘ื“ืง)'] = pd.to_datetime(data['ืฉืขืช ื‘ื™ืฆื•ืข (ืžื‘ื“ืง)'], errors='coerce')
data['ืฉืขืช ื”ื’ืขื” (ืžื‘ื“ืง)'] = pd.to_datetime(data['ืฉืขืช ื”ื’ืขื” (ืžื‘ื“ืง)'], errors='coerce')
data['Delay'] = (data['ืฉืขืช ื”ื’ืขื” (ืžื‘ื“ืง)'] - data['ืฉืขืช ื‘ื™ืฆื•ืข (ืžื‘ื“ืง)']).dt.total_seconds() / 60
return data
# Function for a generic set of BI questions and plots
def bi_questions_analysis(data):
st.title("Business Intelligence Dashboard: 60 Questions and Plots")
# Ensure Delay column is created
data = calculate_delay(data)
# Generic function to display random visualizations
def plot_random_visualization(i):
random_choice = random.choice(["bar", "line", "scatter", "box", "hist", "heatmap", "pie", "count"])
# Plot bar chart
if random_choice == "bar" and 'Delay' in data.columns:
st.subheader(f"Plot {i}: Bar Chart - Delay by Direction")
direction_delay = data.groupby('Direction')['Delay'].mean().reset_index()
fig, ax = plt.subplots()
sns.barplot(x='Direction', y='Delay', data=direction_delay, ax=ax)
ax.set_title(f"Delay by Direction")
st.pyplot(fig)
# Plot line chart
elif random_choice == "line" and 'Plantime' in data.columns and 'Delay' in data.columns:
st.subheader(f"Plot {i}: Line Chart - Delay over Time")
data['Plantime'] = pd.to_datetime(data['Plantime'], errors='coerce')
data['hour_of_day'] = data['Plantime'].dt.hour
daily_delay = data.groupby('hour_of_day')['Delay'].mean()
fig, ax = plt.subplots()
daily_delay.plot(kind="line", ax=ax)
ax.set_title(f"Delay over Time")
st.pyplot(fig)
# Plot scatter plot
elif random_choice == "scatter" and 'Delay' in data.columns:
st.subheader(f"Plot {i}: Scatter Plot - Delay vs. Plantime")
fig, ax = plt.subplots()
sns.scatterplot(x=data['Plantime'], y=data['Delay'], ax=ax)
ax.set_title(f"Delay vs. Plantime")
st.pyplot(fig)
# Plot histogram
elif random_choice == "hist" and 'Delay' in data.columns:
st.subheader(f"Plot {i}: Histogram - Distribution of Delay")
fig, ax = plt.subplots()
sns.histplot(data['Delay'], kde=True, ax=ax)
ax.set_title(f"Distribution of Delay")
st.pyplot(fig)
# Plot box plot
elif random_choice == "box" and 'Delay' in data.columns:
st.subheader(f"Plot {i}: Box Plot - Delay by Status")
fig, ax = plt.subplots()
sns.boxplot(x='ืกื˜ื˜ื•ืก', y='Delay', data=data, ax=ax)
ax.set_title(f"Delay by Status")
st.pyplot(fig)
# Plot pie chart
elif random_choice == "pie" and 'Delay' in data.columns:
st.subheader(f"Plot {i}: Pie Chart - Delay Categories")
delay_status = data['Delay'].apply(lambda x: 'Delayed' if x > 0 else 'On Time').value_counts()
fig, ax = plt.subplots()
delay_status.plot(kind="pie", ax=ax, autopct='%1.1f%%', startangle=90)
ax.set_title(f"Delay Categories")
st.pyplot(fig)
# Plot count plot
elif random_choice == "count" and 'ืกื˜ื˜ื•ืก' in data.columns:
st.subheader(f"Plot {i}: Count Plot - Status Frequency")
fig, ax = plt.subplots()
sns.countplot(x='ืกื˜ื˜ื•ืก', data=data, ax=ax)
ax.set_title(f"Frequency of Status")
st.pyplot(fig)
# Plot heatmap
elif random_choice == "heatmap":
st.subheader(f"Plot {i}: Heatmap of Correlations")
corr = data.corr()
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
ax.set_title(f"Correlation Heatmap")
st.pyplot(fig)
# Loop through the 12 plots for each page
for page in range(1, 6):
st.header(f"Page {page} - 12 Plots")
for i in range((page - 1) * 12 + 1, page * 12 + 1):
plot_random_visualization(i)
# Main Page Navigation with buttons for each section
def main_page_navigation(data):
pages = {
"Business Intelligence Dashboard": bi_questions_analysis,
}
page = st.sidebar.selectbox("Select a page", pages.keys())
pages[page](data)
# Run the Streamlit app
if __name__ == "__main__":
data = load_data()
if data is not None:
main_page_navigation(data)