Spaces:
Build error
Build error
elli-teu
commited on
Commit
·
f23ed4a
1
Parent(s):
fbcd22d
Update app.py
Browse files
app.py
CHANGED
|
@@ -74,8 +74,6 @@ def get_buses():
|
|
| 74 |
short_bus_list = list(pd.unique(bus_df["route_short_name"]))
|
| 75 |
return bus_df, bus_list, short_bus_list
|
| 76 |
|
| 77 |
-
# Function to remove duplicates
|
| 78 |
-
def remove_near_duplicates(data):
|
| 79 |
print(data["trip_id"].nunique())
|
| 80 |
result = []
|
| 81 |
data["datetime"] = pd.to_datetime(data["datetime"])
|
|
@@ -97,75 +95,6 @@ def remove_near_duplicates(data):
|
|
| 97 |
print(filtered_df["trip_id"].nunique())
|
| 98 |
return filtered_df
|
| 99 |
|
| 100 |
-
def remove_duplicate_trips(df, route_id_col="route_id", trip_id_col="trip_id", datetime_col="datetime", time_window='3min'):
|
| 101 |
-
"""
|
| 102 |
-
Removes duplicate trips based on route_id and starting time proximity within a time window.
|
| 103 |
-
|
| 104 |
-
Parameters:
|
| 105 |
-
df (pd.DataFrame): Input DataFrame containing trip data.
|
| 106 |
-
route_id_col (str): Column name for route IDs.
|
| 107 |
-
trip_id_col (str): Column name for trip IDs.
|
| 108 |
-
datetime_col (str): Column name for departure times.
|
| 109 |
-
time_window (str): Time window for considering trips as duplicates (e.g., '3min').
|
| 110 |
-
|
| 111 |
-
Returns:
|
| 112 |
-
pd.DataFrame: Filtered DataFrame with duplicates removed.
|
| 113 |
-
"""
|
| 114 |
-
print(df["trip_id"].nunique())
|
| 115 |
-
|
| 116 |
-
# Ensure the datetime column is of datetime type
|
| 117 |
-
df[datetime_col] = pd.to_datetime(df[datetime_col])
|
| 118 |
-
|
| 119 |
-
# Sort by route_id and datetime for correct chronological order within each route
|
| 120 |
-
df = df.sort_values(by=[route_id_col, datetime_col])
|
| 121 |
-
|
| 122 |
-
# Calculate time differences between consecutive rows within each route_id group
|
| 123 |
-
df['time_diff'] = df.groupby(route_id_col)[datetime_col].diff().fillna(pd.Timedelta('0s'))
|
| 124 |
-
|
| 125 |
-
# Mark rows as duplicates if the time difference is within the time window
|
| 126 |
-
time_window_timedelta = pd.to_timedelta(time_window)
|
| 127 |
-
df['is_duplicate'] = df['time_diff'] <= time_window_timedelta
|
| 128 |
-
|
| 129 |
-
# Keep only the first row within each group of duplicates (based on time window)
|
| 130 |
-
df['keep'] = ~df.groupby(route_id_col)['is_duplicate'].transform('any')
|
| 131 |
-
|
| 132 |
-
# Filter rows: Keep only those that are marked as 'keep'
|
| 133 |
-
result = df[df['keep']].drop(columns=['time_diff', 'is_duplicate', 'keep'])
|
| 134 |
-
|
| 135 |
-
print(result["trip_id"].nunique())
|
| 136 |
-
return result
|
| 137 |
-
|
| 138 |
-
def plot_graph(plot_df):
|
| 139 |
-
#Nu vill vi plotta!
|
| 140 |
-
#TODO ska den bara visa de stopp man vill eller alla?
|
| 141 |
-
categories = {0 : 'Empty',
|
| 142 |
-
1: 'Many seats available',
|
| 143 |
-
2:'Few seats available',
|
| 144 |
-
3:'Standing room only',
|
| 145 |
-
4:'Crushed standing room',
|
| 146 |
-
5: 'Full'}
|
| 147 |
-
|
| 148 |
-
plot_df = plot_df[["datetime", "vehicle_occupancystatus", "stop_name", "route_id"]]
|
| 149 |
-
plot_df = plot_df.sort_values("datetime")
|
| 150 |
-
st.write(plot_df.head())
|
| 151 |
-
st.write(plot_df.tail())
|
| 152 |
-
#plot_df = plot_df.set_index("datetime")
|
| 153 |
-
plot_df["Occupancy"] = plot_df["vehicle_occupancystatus"].map(categories)
|
| 154 |
-
# Explicitly set the order for Y_category
|
| 155 |
-
category_order = list(categories.values()) # ['Empty', 'Many seats available', ..., 'Full']
|
| 156 |
-
category_order.reverse()
|
| 157 |
-
|
| 158 |
-
#st.line_chart(plot_df)
|
| 159 |
-
# Create the Altair chart
|
| 160 |
-
chart = alt.Chart(plot_df).mark_line(point=True, interpolate="step-after").encode(
|
| 161 |
-
x=alt.X('stop_name:N', title="Stop name"), # Use column name as string
|
| 162 |
-
y=alt.Y('Occupancy:N', title="Vehicle Occupancy Status (Categories)", sort=category_order, scale=alt.Scale(domain=category_order)), # Treat Y as categorical
|
| 163 |
-
tooltip=["datetime", 'stop_name', 'Occupancy'] # Add tooltips for interactivity
|
| 164 |
-
).properties(
|
| 165 |
-
title="Vehicle Occupancy Status Over Time"
|
| 166 |
-
)
|
| 167 |
-
st.altair_chart(chart, use_container_width=True)
|
| 168 |
-
|
| 169 |
def plot_graph_title(plot_df, stop, time):
|
| 170 |
#Nu vill vi plotta!
|
| 171 |
#TODO ska den bara visa de stopp man vill eller alla?
|
|
@@ -331,8 +260,6 @@ def drop_the_duplicates(df):
|
|
| 331 |
return df
|
| 332 |
|
| 333 |
|
| 334 |
-
|
| 335 |
-
|
| 336 |
# Streamlit UI
|
| 337 |
def main():
|
| 338 |
st.title("Wheely Fun Times - Bus Occupancy Explorer")
|
|
|
|
| 74 |
short_bus_list = list(pd.unique(bus_df["route_short_name"]))
|
| 75 |
return bus_df, bus_list, short_bus_list
|
| 76 |
|
|
|
|
|
|
|
| 77 |
print(data["trip_id"].nunique())
|
| 78 |
result = []
|
| 79 |
data["datetime"] = pd.to_datetime(data["datetime"])
|
|
|
|
| 95 |
print(filtered_df["trip_id"].nunique())
|
| 96 |
return filtered_df
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
def plot_graph_title(plot_df, stop, time):
|
| 99 |
#Nu vill vi plotta!
|
| 100 |
#TODO ska den bara visa de stopp man vill eller alla?
|
|
|
|
| 260 |
return df
|
| 261 |
|
| 262 |
|
|
|
|
|
|
|
| 263 |
# Streamlit UI
|
| 264 |
def main():
|
| 265 |
st.title("Wheely Fun Times - Bus Occupancy Explorer")
|