wweavishayaknin commited on
Commit
20d0d43
ยท
verified ยท
1 Parent(s): 52a0ebc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -112
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  import seaborn as sns
4
  import matplotlib.pyplot as plt
5
  import numpy as np
 
6
 
7
  # Function to upload the data from the user
8
  def load_data():
@@ -30,125 +31,98 @@ def load_data():
30
  # Function to calculate delay
31
  def calculate_delay(data):
32
  if 'ืฉืขืช ื‘ื™ืฆื•ืข (ืžื‘ื“ืง)' in data.columns and 'ืฉืขืช ื”ื’ืขื” (ืžื‘ื“ืง)' in data.columns:
33
- # Ensure that the columns are converted to datetime if not already
34
  data['ืฉืขืช ื‘ื™ืฆื•ืข (ืžื‘ื“ืง)'] = pd.to_datetime(data['ืฉืขืช ื‘ื™ืฆื•ืข (ืžื‘ื“ืง)'], errors='coerce')
35
  data['ืฉืขืช ื”ื’ืขื” (ืžื‘ื“ืง)'] = pd.to_datetime(data['ืฉืขืช ื”ื’ืขื” (ืžื‘ื“ืง)'], errors='coerce')
36
-
37
- # Calculate the delay in minutes
38
  data['Delay'] = (data['ืฉืขืช ื”ื’ืขื” (ืžื‘ื“ืง)'] - data['ืฉืขืช ื‘ื™ืฆื•ืข (ืžื‘ื“ืง)']).dt.total_seconds() / 60
39
- return data
40
- else:
41
- st.warning("Columns for delay calculation are missing.")
42
- return data
43
 
44
- # Function to display the analysis based on the BI questions
45
  def bi_questions_analysis(data):
46
  st.title("Business Intelligence Dashboard: 60 Questions and Plots")
47
 
48
- # 1. Average delay based on direction of the trip
49
- direction_delay = data.groupby('Direction')['Delay'].mean().reset_index()
50
- st.subheader("1. Average Delay Based on Direction")
51
- st.write(direction_delay)
52
- fig, ax = plt.subplots()
53
- sns.barplot(x='Direction', y='Delay', data=direction_delay, ax=ax)
54
- ax.set_title("Average Delay by Direction")
55
- st.pyplot(fig)
56
-
57
- # 2. Count of trips with and without delay
58
- delay_status = data['Delay'].apply(lambda x: 'Delayed' if x > 0 else 'On Time').value_counts().reset_index()
59
- st.subheader("2. Count of Trips With and Without Delay")
60
- st.write(delay_status)
61
- fig, ax = plt.subplots()
62
- sns.barplot(x='index', y='Delay', data=delay_status, ax=ax)
63
- ax.set_title("Trips with Delay vs On Time")
64
- st.pyplot(fig)
65
-
66
- # 3. Average delay based on alternative route
67
- if 'Alternative' in data.columns:
68
- alternative_delay = data.groupby('Alternative')['Delay'].mean().reset_index()
69
- st.subheader("3. Average Delay Based on Alternative Route")
70
- st.write(alternative_delay)
71
- fig, ax = plt.subplots()
72
- sns.barplot(x='Alternative', y='Delay', data=alternative_delay, ax=ax)
73
- ax.set_title("Average Delay by Alternative Route")
74
- st.pyplot(fig)
75
-
76
- # 4. Correlation between trip time and delay
77
- st.subheader("4. Correlation Between Trip Time and Delay")
78
- if 'Plantime' in data.columns:
79
- data['Plantime'] = pd.to_datetime(data['Plantime'], errors='coerce')
80
- data['hour_of_day'] = data['Plantime'].dt.hour
81
- correlation = data[['hour_of_day', 'Delay']].corr().iloc[0, 1]
82
- st.write(f"Correlation between trip start time and delay: {correlation}")
83
-
84
- # 5. Operator with highest and lowest average delay
85
- if 'ืžืคืขื™ืœ' in data.columns:
86
- operator_delay = data.groupby('ืžืคืขื™ืœ')['Delay'].mean().reset_index()
87
- st.subheader("5. Operator with Highest and Lowest Average Delay")
88
- st.write(operator_delay)
89
- fig, ax = plt.subplots()
90
- sns.barplot(x='ืžืคืขื™ืœ', y='Delay', data=operator_delay, ax=ax)
91
- ax.set_title("Average Delay by Operator")
92
- st.pyplot(fig)
93
-
94
- # 6. Number of trips categorized as first trip vs last trip
95
- if 'ื ืกื™ืขื” ืจืืฉื•ื ื”' in data.columns and 'ื ืกื™ืขื” ืื—ืจื•ื ื”' in data.columns:
96
- first_last_trip_count = data.groupby(['ื ืกื™ืขื” ืจืืฉื•ื ื”', 'ื ืกื™ืขื” ืื—ืจื•ื ื”']).size().reset_index(name='Count')
97
- st.subheader("6. Number of First vs Last Trips")
98
- st.write(first_last_trip_count)
99
- fig, ax = plt.subplots()
100
- sns.barplot(x='ื ืกื™ืขื” ืจืืฉื•ื ื”', y='Count', hue='ื ืกื™ืขื” ืื—ืจื•ื ื”', data=first_last_trip_count, ax=ax)
101
- ax.set_title("First vs Last Trips")
102
- st.pyplot(fig)
103
-
104
- # 7. Number of trips with no delay vs delay > 10 minutes
105
- delayed_trips = data[data['Delay'] > 10]
106
- no_delay_trips = data[data['Delay'] <= 0]
107
- st.subheader("7. Number of Trips with No Delay vs Delay > 10 Minutes")
108
- st.write(f"Trips with no delay: {len(no_delay_trips)}")
109
- st.write(f"Trips with delay > 10 minutes: {len(delayed_trips)}")
110
-
111
- # 8. Trip delays by license number (vehicle performance)
112
- if 'Licensenumber' in data.columns:
113
- license_delay = data.groupby('Licensenumber')['Delay'].mean().reset_index()
114
- st.subheader("8. Delay by License Number")
115
- st.write(license_delay)
116
- fig, ax = plt.subplots()
117
- sns.barplot(x='Licensenumber', y='Delay', data=license_delay, ax=ax)
118
- ax.set_title("Average Delay by License Number")
119
- st.pyplot(fig)
120
-
121
- # 9. Percentage of trips delayed by 5-20 minutes
122
- delayed_5_20 = data[data['Delay'].between(5, 20)].shape[0]
123
- total_trips = data.shape[0]
124
- percentage_delay_5_20 = (delayed_5_20 / total_trips) * 100
125
- st.subheader("9. Percentage of Trips Delayed by 5-20 Minutes")
126
- st.write(f"Percentage of trips delayed by 5-20 minutes: {percentage_delay_5_20:.2f}%")
127
-
128
- # 10. Delay distribution for each trip status
129
- if 'ืกื˜ื˜ื•ืก ื ืกื™ืขื”' in data.columns:
130
- status_delay = data.groupby('ืกื˜ื˜ื•ืก ื ืกื™ืขื”')['Delay'].mean().reset_index()
131
- st.subheader("10. Delay Distribution by Trip Status")
132
- st.write(status_delay)
133
- fig, ax = plt.subplots()
134
- sns.barplot(x='ืกื˜ื˜ื•ืก ื ืกื™ืขื”', y='Delay', data=status_delay, ax=ax)
135
- ax.set_title("Average Delay by Trip Status")
136
- st.pyplot(fig)
137
-
138
- # Additional 50 plots (from 11 to 60)
139
- for i in range(11, 61):
140
- # Here, we will generate random or calculated plots based on data that may exist
141
- # The below are just placeholders for additional plotting. You can adjust them as needed.
142
- st.subheader(f"{i}. Sample Question {i}")
143
- fig, ax = plt.subplots()
144
- # Plotting with random data (or replace with real analysis logic)
145
- sns.histplot(np.random.rand(100), kde=True, ax=ax)
146
- ax.set_title(f"Sample Plot {i}")
147
- st.pyplot(fig)
148
-
149
- # The plots from 11 to 60 can be customized according to your needs.
150
- # If you want to use specific attributes from the data for those plots, just adjust the logic accordingly.
151
-
152
  # Main Page Navigation with buttons for each section
153
  def main_page_navigation(data):
154
  pages = {
 
3
  import seaborn as sns
4
  import matplotlib.pyplot as plt
5
  import numpy as np
6
+ import random
7
 
8
  # Function to upload the data from the user
9
  def load_data():
 
31
  # Function to calculate delay
32
  def calculate_delay(data):
33
  if 'ืฉืขืช ื‘ื™ืฆื•ืข (ืžื‘ื“ืง)' in data.columns and 'ืฉืขืช ื”ื’ืขื” (ืžื‘ื“ืง)' in data.columns:
 
34
  data['ืฉืขืช ื‘ื™ืฆื•ืข (ืžื‘ื“ืง)'] = pd.to_datetime(data['ืฉืขืช ื‘ื™ืฆื•ืข (ืžื‘ื“ืง)'], errors='coerce')
35
  data['ืฉืขืช ื”ื’ืขื” (ืžื‘ื“ืง)'] = pd.to_datetime(data['ืฉืขืช ื”ื’ืขื” (ืžื‘ื“ืง)'], errors='coerce')
 
 
36
  data['Delay'] = (data['ืฉืขืช ื”ื’ืขื” (ืžื‘ื“ืง)'] - data['ืฉืขืช ื‘ื™ืฆื•ืข (ืžื‘ื“ืง)']).dt.total_seconds() / 60
37
+ return data
 
 
 
38
 
39
+ # Function for a generic set of BI questions and plots
40
  def bi_questions_analysis(data):
41
  st.title("Business Intelligence Dashboard: 60 Questions and Plots")
42
 
43
+ # Ensure Delay column is created
44
+ data = calculate_delay(data)
45
+
46
+ # Generic function to display random visualizations
47
+ def plot_random_visualization(i):
48
+ random_choice = random.choice(["bar", "line", "scatter", "box", "hist", "heatmap", "pie", "count"])
49
+
50
+ # Plot bar chart
51
+ if random_choice == "bar" and 'Delay' in data.columns:
52
+ st.subheader(f"Plot {i}: Bar Chart - Delay by Direction")
53
+ direction_delay = data.groupby('Direction')['Delay'].mean().reset_index()
54
+ fig, ax = plt.subplots()
55
+ sns.barplot(x='Direction', y='Delay', data=direction_delay, ax=ax)
56
+ ax.set_title(f"Delay by Direction")
57
+ st.pyplot(fig)
58
+
59
+ # Plot line chart
60
+ elif random_choice == "line" and 'Plantime' in data.columns and 'Delay' in data.columns:
61
+ st.subheader(f"Plot {i}: Line Chart - Delay over Time")
62
+ data['Plantime'] = pd.to_datetime(data['Plantime'], errors='coerce')
63
+ data['hour_of_day'] = data['Plantime'].dt.hour
64
+ daily_delay = data.groupby('hour_of_day')['Delay'].mean()
65
+ fig, ax = plt.subplots()
66
+ daily_delay.plot(kind="line", ax=ax)
67
+ ax.set_title(f"Delay over Time")
68
+ st.pyplot(fig)
69
+
70
+ # Plot scatter plot
71
+ elif random_choice == "scatter" and 'Delay' in data.columns:
72
+ st.subheader(f"Plot {i}: Scatter Plot - Delay vs. Plantime")
73
+ fig, ax = plt.subplots()
74
+ sns.scatterplot(x=data['Plantime'], y=data['Delay'], ax=ax)
75
+ ax.set_title(f"Delay vs. Plantime")
76
+ st.pyplot(fig)
77
+
78
+ # Plot histogram
79
+ elif random_choice == "hist" and 'Delay' in data.columns:
80
+ st.subheader(f"Plot {i}: Histogram - Distribution of Delay")
81
+ fig, ax = plt.subplots()
82
+ sns.histplot(data['Delay'], kde=True, ax=ax)
83
+ ax.set_title(f"Distribution of Delay")
84
+ st.pyplot(fig)
85
+
86
+ # Plot box plot
87
+ elif random_choice == "box" and 'Delay' in data.columns:
88
+ st.subheader(f"Plot {i}: Box Plot - Delay by Status")
89
+ fig, ax = plt.subplots()
90
+ sns.boxplot(x='ืกื˜ื˜ื•ืก', y='Delay', data=data, ax=ax)
91
+ ax.set_title(f"Delay by Status")
92
+ st.pyplot(fig)
93
+
94
+ # Plot pie chart
95
+ elif random_choice == "pie" and 'Delay' in data.columns:
96
+ st.subheader(f"Plot {i}: Pie Chart - Delay Categories")
97
+ delay_status = data['Delay'].apply(lambda x: 'Delayed' if x > 0 else 'On Time').value_counts()
98
+ fig, ax = plt.subplots()
99
+ delay_status.plot(kind="pie", ax=ax, autopct='%1.1f%%', startangle=90)
100
+ ax.set_title(f"Delay Categories")
101
+ st.pyplot(fig)
102
+
103
+ # Plot count plot
104
+ elif random_choice == "count" and 'ืกื˜ื˜ื•ืก' in data.columns:
105
+ st.subheader(f"Plot {i}: Count Plot - Status Frequency")
106
+ fig, ax = plt.subplots()
107
+ sns.countplot(x='ืกื˜ื˜ื•ืก', data=data, ax=ax)
108
+ ax.set_title(f"Frequency of Status")
109
+ st.pyplot(fig)
110
+
111
+ # Plot heatmap
112
+ elif random_choice == "heatmap":
113
+ st.subheader(f"Plot {i}: Heatmap of Correlations")
114
+ corr = data.corr()
115
+ fig, ax = plt.subplots(figsize=(10, 8))
116
+ sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
117
+ ax.set_title(f"Correlation Heatmap")
118
+ st.pyplot(fig)
119
+
120
+ # Loop through the 12 plots for each page
121
+ for page in range(1, 6):
122
+ st.header(f"Page {page} - 12 Plots")
123
+ for i in range((page - 1) * 12 + 1, page * 12 + 1):
124
+ plot_random_visualization(i)
125
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  # Main Page Navigation with buttons for each section
127
  def main_page_navigation(data):
128
  pages = {