elli-teu commited on
Commit
f23ed4a
·
1 Parent(s): fbcd22d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -73
app.py CHANGED
@@ -74,8 +74,6 @@ def get_buses():
74
  short_bus_list = list(pd.unique(bus_df["route_short_name"]))
75
  return bus_df, bus_list, short_bus_list
76
 
77
- # Function to remove duplicates
78
- def remove_near_duplicates(data):
79
  print(data["trip_id"].nunique())
80
  result = []
81
  data["datetime"] = pd.to_datetime(data["datetime"])
@@ -97,75 +95,6 @@ def remove_near_duplicates(data):
97
  print(filtered_df["trip_id"].nunique())
98
  return filtered_df
99
 
100
- def remove_duplicate_trips(df, route_id_col="route_id", trip_id_col="trip_id", datetime_col="datetime", time_window='3min'):
101
- """
102
- Removes duplicate trips based on route_id and starting time proximity within a time window.
103
-
104
- Parameters:
105
- df (pd.DataFrame): Input DataFrame containing trip data.
106
- route_id_col (str): Column name for route IDs.
107
- trip_id_col (str): Column name for trip IDs.
108
- datetime_col (str): Column name for departure times.
109
- time_window (str): Time window for considering trips as duplicates (e.g., '3min').
110
-
111
- Returns:
112
- pd.DataFrame: Filtered DataFrame with duplicates removed.
113
- """
114
- print(df["trip_id"].nunique())
115
-
116
- # Ensure the datetime column is of datetime type
117
- df[datetime_col] = pd.to_datetime(df[datetime_col])
118
-
119
- # Sort by route_id and datetime for correct chronological order within each route
120
- df = df.sort_values(by=[route_id_col, datetime_col])
121
-
122
- # Calculate time differences between consecutive rows within each route_id group
123
- df['time_diff'] = df.groupby(route_id_col)[datetime_col].diff().fillna(pd.Timedelta('0s'))
124
-
125
- # Mark rows as duplicates if the time difference is within the time window
126
- time_window_timedelta = pd.to_timedelta(time_window)
127
- df['is_duplicate'] = df['time_diff'] <= time_window_timedelta
128
-
129
- # Keep only the first row within each group of duplicates (based on time window)
130
- df['keep'] = ~df.groupby(route_id_col)['is_duplicate'].transform('any')
131
-
132
- # Filter rows: Keep only those that are marked as 'keep'
133
- result = df[df['keep']].drop(columns=['time_diff', 'is_duplicate', 'keep'])
134
-
135
- print(result["trip_id"].nunique())
136
- return result
137
-
138
- def plot_graph(plot_df):
139
- #Nu vill vi plotta!
140
- #TODO ska den bara visa de stopp man vill eller alla?
141
- categories = {0 : 'Empty',
142
- 1: 'Many seats available',
143
- 2:'Few seats available',
144
- 3:'Standing room only',
145
- 4:'Crushed standing room',
146
- 5: 'Full'}
147
-
148
- plot_df = plot_df[["datetime", "vehicle_occupancystatus", "stop_name", "route_id"]]
149
- plot_df = plot_df.sort_values("datetime")
150
- st.write(plot_df.head())
151
- st.write(plot_df.tail())
152
- #plot_df = plot_df.set_index("datetime")
153
- plot_df["Occupancy"] = plot_df["vehicle_occupancystatus"].map(categories)
154
- # Explicitly set the order for Y_category
155
- category_order = list(categories.values()) # ['Empty', 'Many seats available', ..., 'Full']
156
- category_order.reverse()
157
-
158
- #st.line_chart(plot_df)
159
- # Create the Altair chart
160
- chart = alt.Chart(plot_df).mark_line(point=True, interpolate="step-after").encode(
161
- x=alt.X('stop_name:N', title="Stop name"), # Use column name as string
162
- y=alt.Y('Occupancy:N', title="Vehicle Occupancy Status (Categories)", sort=category_order, scale=alt.Scale(domain=category_order)), # Treat Y as categorical
163
- tooltip=["datetime", 'stop_name', 'Occupancy'] # Add tooltips for interactivity
164
- ).properties(
165
- title="Vehicle Occupancy Status Over Time"
166
- )
167
- st.altair_chart(chart, use_container_width=True)
168
-
169
  def plot_graph_title(plot_df, stop, time):
170
  #Nu vill vi plotta!
171
  #TODO ska den bara visa de stopp man vill eller alla?
@@ -331,8 +260,6 @@ def drop_the_duplicates(df):
331
  return df
332
 
333
 
334
-
335
-
336
  # Streamlit UI
337
  def main():
338
  st.title("Wheely Fun Times - Bus Occupancy Explorer")
 
74
  short_bus_list = list(pd.unique(bus_df["route_short_name"]))
75
  return bus_df, bus_list, short_bus_list
76
 
 
 
77
  print(data["trip_id"].nunique())
78
  result = []
79
  data["datetime"] = pd.to_datetime(data["datetime"])
 
95
  print(filtered_df["trip_id"].nunique())
96
  return filtered_df
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  def plot_graph_title(plot_df, stop, time):
99
  #Nu vill vi plotta!
100
  #TODO ska den bara visa de stopp man vill eller alla?
 
260
  return df
261
 
262
 
 
 
263
  # Streamlit UI
264
  def main():
265
  st.title("Wheely Fun Times - Bus Occupancy Explorer")