dvnguyen02 commited on
Commit
f24e4cc
·
1 Parent(s): fa80e23
.gitattributes CHANGED
@@ -1,35 +1,2 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ data/*.csv filter=lfs diff=lfs merge=lfs -text
2
+ data/* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DynamicMap.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Library Importation
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import geopandas as gpd
5
+ import matplotlib.pyplot as plt
6
+ from matplotlib.colors import LinearSegmentedColormap
7
+ import folium
8
+ from folium.plugins import MarkerCluster, Search, MousePosition, MiniMap
9
+ from shapely import wkt
10
+ import branca.colormap as cm
11
+ import streamlit_folium as st_folium
12
+ from streamlit_extras.grid import grid
13
+
14
+
15
+ # Load data function
16
+ @st.cache_data
17
+ def load_data():
18
+ geodata = pd.read_csv("data/stats-area.csv", low_memory=False)
19
+ data_2018 = pd.read_csv("data/2018_march_ruc.csv", low_memory=False)
20
+ data_2022to2023 = pd.read_csv("data/2023_march_ruc.csv", low_memory=False)
21
+ return geodata, data_2018, data_2022to2023
22
+
23
+ # Data preprocessing function
24
+ def preprocess_data(data_2018, data_2022to2023):
25
+ light_vehicles = ['1', '12', 'NaN']
26
+ trailers = ['24', '28', '29', '30', '33', '37', '43', '951', '929', '939']
27
+ vintage_vehicles = ['402', '403', '404']
28
+ mobile_cranes = ['299', '399', '499', '599', '699', '799']
29
+ type_H_vehicles = ['H01', 'H61', 'H62', 'H71', 'H73', 'H74', 'H77', 'H81', 'H82', 'H83', 'H84', 'H75', 'H76', 'H91', 'H92', 'H93', 'H94', 'H95', 'H63',
30
+ 'H97', 'H98', 'H99', 'H72', 'H78', 'H79', 'H30', 'H31', 'H32', 'H33', 'H34', 'H35', 'H11', 'H12', 'H13', 'H14', 'H15', 'H36', 'H37',
31
+ 'H38', 'H17', 'H18', 'H19']
32
+ list_to_filter = light_vehicles + trailers + vintage_vehicles + mobile_cranes + type_H_vehicles
33
+
34
+ mask = ~data_2018.ruc_type.isin(list_to_filter)
35
+ mask2 = ~data_2022to2023.ruc_type.isin(list_to_filter)
36
+ filtered_2018 = data_2018[mask]
37
+ filtered_2023 = data_2022to2023[mask2]
38
+
39
+ return filtered_2018, filtered_2023
40
+ def create_percentage_change_df():
41
+ sum_by_sa2_zone_2018 = filtered_2018.groupby(['start_sa2'])['num_trips'].sum().reset_index()
42
+ sum_by_sa2_zone_2023 = filtered_2023.groupby(['start_sa2'])['num_trips'].sum().reset_index()
43
+ sum_by_sa2_zone_2018['num_trips'] = sum_by_sa2_zone_2018['num_trips'].fillna(0)
44
+ sum_by_sa2_zone_2023['num_trips'] = sum_by_sa2_zone_2023['num_trips'].fillna(0)
45
+ changes_origin_sa2_zone = pd.merge(sum_by_sa2_zone_2018, sum_by_sa2_zone_2023, on = 'start_sa2', how = 'outer')
46
+ changes_origin_sa2_zone['num_trips_x'] = changes_origin_sa2_zone['num_trips_x'].fillna(0)
47
+ changes_origin_sa2_zone['num_trips_y'] = changes_origin_sa2_zone['num_trips_y'].fillna(0)
48
+ changes_origin_sa2_zone['difference'] = abs((changes_origin_sa2_zone['num_trips_y']-changes_origin_sa2_zone['num_trips_x'])/changes_origin_sa2_zone['num_trips_y']) *100
49
+
50
+ percentage_change = changes_origin_sa2_zone
51
+ percentage_change = percentage_change.sort_values('num_trips_y', ascending=False).reset_index(drop=True)
52
+ percentage_change = pd.merge(percentage_change, geodata, left_on='start_sa2', right_on='SA22018_V1_00', how= 'outer')
53
+ percentage_change = percentage_change.drop(columns=['start_sa2', 'SA22018_V1_00'])
54
+ percentage_change = percentage_change[['SA22018_V1_NAME', 'num_trips_x', 'num_trips_y', 'difference']]
55
+ percentage_change.columns = ['SA2 Zone', 'Number of Trips 2018', 'Number of Trips 2023', 'Absolute Percentage Change (%)']
56
+ return percentage_change
57
+
58
+ def create_vehicle_count_df(data):
59
+ vehicle_counts = data[['SA22018_V1_NAME', 'num_machines']].groupby('SA22018_V1_NAME').sum().reset_index()
60
+ vehicle_counts = vehicle_counts.sort_values('num_machines', ascending=False).reset_index(drop=True)
61
+ vehicle_counts.index += 1 # Start index at 1 instead of 0
62
+ vehicle_counts.columns = ['SA2 Zone', 'Number of Vehicles']
63
+ return vehicle_counts
64
+
65
+ # Function to create Folium map
66
+ def create_folium_map(data, column='difference', title=''):
67
+ # Create a base map centered on New Zealand
68
+ map = folium.Map(location=[-40.9006, 174.8860], zoom_start=5) # New Zealand coordinates - 40.9006° S, 174.8860° E
69
+
70
+ # Create a colormap
71
+ colormap = cm.linear.PuBuGn_04.scale(0, 100)
72
+
73
+ # Add a GeoJson layer
74
+ GeoJson = folium.GeoJson(
75
+ data,
76
+ highlight_function=lambda feature: {
77
+ "fillColor": ("#87CEFA")
78
+ },
79
+ style_function=lambda feature: {
80
+ 'fillColor': colormap(feature['properties'][column]),
81
+ 'color': 'black',
82
+ 'weight': 1,
83
+ 'fillOpacity': 0.7,
84
+ },
85
+ tooltip=folium.GeoJsonTooltip(fields=['SA22018_V1_NAME', column, 'num_trips_x', 'num_trips_y'],
86
+ aliases=['Area:', 'Percentage change (%):', 'Number of Trips in 2018', 'Number of Trips in 2023'],
87
+ localize=True,
88
+ sticky=False,
89
+ labels=True)
90
+ ).add_to(map)
91
+
92
+ # Add a type in search
93
+ search = Search(
94
+ layer=GeoJson,
95
+ geom_type='Polygon',
96
+ placeholder='Type in the place you like to search',
97
+ collapsed=False,
98
+ search_label='SA22018_V1_NAME',
99
+ weight=3
100
+ ).add_to(map)
101
+
102
+ # Add colormap to the map
103
+ colormap.add_to(map)
104
+ colormap.caption = 'Percentage Change'
105
+
106
+ folium.plugins.Fullscreen(
107
+ position="topright",
108
+ title="Expand me",
109
+ title_cancel="Exit me",
110
+ force_separate_button=True,
111
+ ).add_to(map)
112
+ MiniMap().add_to(map)
113
+ MousePosition().add_to(map)
114
+ return map
115
+
116
+ # Streamlit app
117
+ st.title("National Freight Analysis")
118
+
119
+ # Load data with a loading indicator
120
+ with st.spinner("Loading data... Please wait."):
121
+ geodata, data_2018, data_2022to2023 = load_data()
122
+
123
+ # Preprocess data with a loading indicator
124
+ with st.spinner("Preprocessing data... Please wait."):
125
+ filtered_2018, filtered_2023 = preprocess_data(data_2018, data_2022to2023)
126
+
127
+ # Prepare geodata
128
+ geodata['geometry'] = geodata['WKT'].apply(wkt.loads)
129
+ geodata = gpd.GeoDataFrame(geodata, geometry='geometry', crs="EPSG:4326")
130
+ #geodata = geodata.drop(columns=['WKT', 'LAND_AREA_SQ_KM', 'AREA_SQ_KM', 'Shape_Length'])
131
+
132
+
133
+
134
+ # Sidebar for navigation
135
+ page = st.sidebar.selectbox("Choose a zone", ["Changes","Origin", "Destination", "Sanity Checks"])
136
+
137
+ if page == "Changes":
138
+ st.header("Change in Freight Pre-covid and Post-covid")
139
+ sum_by_sa2_zone_2018 = filtered_2018.groupby(['start_sa2'])['num_trips'].sum().reset_index()
140
+ sum_by_sa2_zone_2023 = filtered_2023.groupby(['start_sa2'])['num_trips'].sum().reset_index()
141
+ sum_by_sa2_zone_2018['num_trips'] = sum_by_sa2_zone_2018['num_trips'].fillna(0)
142
+ sum_by_sa2_zone_2023['num_trips'] = sum_by_sa2_zone_2023['num_trips'].fillna(0)
143
+ changes_origin_sa2_zone = pd.merge(sum_by_sa2_zone_2018, sum_by_sa2_zone_2023, on = 'start_sa2', how = 'outer')
144
+ changes_origin_sa2_zone['num_trips_x'] = changes_origin_sa2_zone['num_trips_x'].fillna(0)
145
+ changes_origin_sa2_zone['num_trips_y'] = changes_origin_sa2_zone['num_trips_y'].fillna(0)
146
+ changes_origin_sa2_zone['difference'] = abs((changes_origin_sa2_zone['num_trips_y']-changes_origin_sa2_zone['num_trips_x'])/changes_origin_sa2_zone['num_trips_y']) *100
147
+ changes_origin_sa2_zone = pd.merge(changes_origin_sa2_zone, geodata, left_on='start_sa2', right_on='SA22018_V1_00', how= 'outer')
148
+ changes_origin_sa2_zone = changes_origin_sa2_zone.drop(columns='start_sa2')
149
+ changes_origin_sa2_zone= changes_origin_sa2_zone.fillna(0)
150
+ changes_origin_sa2_zone = gpd.GeoDataFrame(changes_origin_sa2_zone, geometry='geometry', crs="EPSG:4326")
151
+ changes_origin_sa2_zone_map = create_folium_map(changes_origin_sa2_zone, title='Number of Vehicles in March 2023 by Origin')
152
+ st_folium.folium_static(changes_origin_sa2_zone_map, width=1100, height=600)
153
+ change = create_percentage_change_df()
154
+
155
+ st.subheader("Percentage Change in 2018 and 2023 (Sort by trips 2023)")
156
+
157
+ st.dataframe(change, width= 1000, height=400)
158
+
159
+ '''
160
+ elif page == "Origin":
161
+ st.header("Origin Zone")
162
+
163
+ # Origin data
164
+ sum_by_sa2_zone_2018 = filtered_2018.groupby(['start_sa2'])['num_machines'].sum().reset_index()
165
+ sum_by_sa2_zone_2023 = filtered_2023.groupby(['start_sa2'])['num_machines'].sum().reset_index()
166
+
167
+ sa2_zone_2018_start = pd.merge(sum_by_sa2_zone_2018, geodata, left_on='start_sa2', right_on='SA22018_V1_00', how= 'outer')
168
+ sa2_zone_2023_start = pd.merge(sum_by_sa2_zone_2023, geodata, left_on='start_sa2', right_on='SA22018_V1_00', how='outer')
169
+ sa2_zone_2018_start['num_machines'] = sa2_zone_2018_start['num_machines'].fillna(0)
170
+ sa2_zone_2023_start['num_machines'] = sa2_zone_2023_start['num_machines'].fillna(0)
171
+
172
+ # Convert to GeoDataFrame
173
+ sa2_zone_2018_start = gpd.GeoDataFrame(sa2_zone_2018_start, geometry='geometry', crs="EPSG:4326")
174
+ sa2_zone_2023_start = gpd.GeoDataFrame(sa2_zone_2023_start, geometry='geometry', crs="EPSG:4326")
175
+
176
+ # Display maps and tables side by side
177
+ st.subheader("Number of Vehicles by Origin: March 2018 vs March 2023")
178
+
179
+ grid_layout_origin = grid(3, vertical_align="start")
180
+ with st.spinner("Loading data... Please wait."):
181
+ with grid_layout_origin.container():
182
+ map_2018 = create_folium_map(sa2_zone_2018_start, title='Number of Vehicles in March 2018 by Origin')
183
+ st_folium.folium_static(map_2018, width=450, height=400)
184
+
185
+ st.markdown("#### Vehicle Counts by Region (2018)")
186
+ counts_2018 = create_vehicle_count_df(sa2_zone_2018_start)
187
+ st.dataframe(counts_2018, width= 400, height=400)
188
+ with st.spinner("Loading data... Please Wait"):
189
+ with grid_layout_origin.container():
190
+ st.empty()
191
+ with grid_layout_origin.container():
192
+ map_2023 = create_folium_map(sa2_zone_2023_start, title='Number of Vehicles in March 2023 by Origin')
193
+ st_folium.folium_static(map_2023, width=400, height=400)
194
+
195
+ st.markdown("#### Vehicle Counts by Region (2023)")
196
+ counts_2023 = create_vehicle_count_df(sa2_zone_2023_start)
197
+ st.dataframe(counts_2023, width=400,height=400, use_container_width=True)
198
+
199
+
200
+ elif page == "Destination":
201
+ st.header("Destination Zone")
202
+
203
+ # Destination data
204
+ sum_by_sa2_zone_2018_dest = filtered_2018.groupby(['end_sa2'])['num_machines'].sum().reset_index()
205
+ sum_by_sa2_zone_2023_dest = filtered_2023.groupby(['end_sa2'])['num_machines'].sum().reset_index()
206
+
207
+ sa2_zone_2018_dest = pd.merge(sum_by_sa2_zone_2018_dest, geodata, left_on='end_sa2', right_on='SA22018_V1_00', how='outer')
208
+ sa2_zone_2023_dest = pd.merge(sum_by_sa2_zone_2023_dest, geodata, left_on='end_sa2', right_on='SA22018_V1_00', how='outer')
209
+ sa2_zone_2018_dest['num_machines'] = sa2_zone_2018_dest['num_machines'].fillna(0)
210
+ sa2_zone_2023_dest['num_machines'] = sa2_zone_2023_dest['num_machines'].fillna(0)
211
+
212
+ # Convert to GeoDataFrame
213
+ sa2_zone_2018_dest = gpd.GeoDataFrame(sa2_zone_2018_dest, geometry='geometry', crs="EPSG:4326")
214
+ sa2_zone_2023_dest = gpd.GeoDataFrame(sa2_zone_2023_dest, geometry='geometry', crs="EPSG:4326")
215
+
216
+ # Display maps and tables side by side
217
+ st.subheader("Number of Vehicles by Destination: March 2018 vs March 2023")
218
+
219
+ grid_layout_destination = grid(3, vertical_align="start")
220
+
221
+ with st.spinner("Loading data... Please wait."):
222
+ with grid_layout_destination.container():
223
+ map_2018_dest = create_folium_map(sa2_zone_2018_dest, title='Number of Vehicles in March 2018 by Origin')
224
+ st_folium.folium_static(map_2018_dest, width=400, height=400)
225
+
226
+ st.markdown("#### Vehicle Counts by Region (2018)")
227
+ counts_2018 = create_vehicle_count_df(sa2_zone_2018_dest)
228
+ st.dataframe(counts_2018, width= 400, height=400)
229
+ with st.spinner("Loading data... Please Wait"):
230
+ with grid_layout_destination.container():
231
+ st.empty()
232
+ with grid_layout_destination.container():
233
+ map_2023_dest = create_folium_map(sa2_zone_2023_dest, title='Number of Vehicles in March 2023 by Origin')
234
+ st_folium.folium_static(map_2023_dest= create_folium_map(sa2_zone_2023_dest, title='Number of Vehicles in March 2023 by Origin')
235
+ , width=400, height=400)
236
+
237
+ st.markdown("#### Vehicle Counts by Region (2023)")
238
+ counts_2023 = create_vehicle_count_df(sa2_zone_2023_dest)
239
+ st.dataframe(counts_2023, width=400,height=400, use_container_width=True)
240
+ '''
241
+ if page == "Sanity Checks":
242
+ # Number of trips March 2018 vs March 2023
243
+ st.subheader("Number of Trips Comparison")
244
+ sum_origin_trips_2018 = filtered_2018.num_trips.sum()
245
+ sum_origin_trips_2023 = filtered_2023.num_trips.sum()
246
+ trips = {'March 2018': sum_origin_trips_2018, 'March 2023': sum_origin_trips_2023}
247
+ st.bar_chart(trips, color= '#29AB87')
248
+
249
+ # Number of all Machines March 2018 vs March 2023
250
+ st.subheader("Number of Vehicles Comparison")
251
+ sum_origin_machines_2018 = filtered_2018.num_machines.sum()
252
+ sum_origin_machines_2023 = filtered_2023.num_machines.sum()
253
+ machines = {"March 2018": sum_origin_machines_2018, "March 2023": sum_origin_machines_2023}
254
+ st.bar_chart(machines, color= '#00A693')
255
+ def calculate_percentage_difference(value_2018, value_2023):
256
+ return ((value_2023 - value_2018) / value_2018) * 100
257
+
258
+ if page == "Sanity Checks":
259
+ # Number of trips March 2018 vs March 2023
260
+ st.subheader("Percentage Change in Number of Trips")
261
+ sum_origin_trips_2018 = filtered_2018.num_trips.sum()
262
+ sum_origin_trips_2023 = filtered_2023.num_trips.sum()
263
+ trips_percentage_diff = calculate_percentage_difference(sum_origin_trips_2018, sum_origin_trips_2023)
264
+
265
+ # Number of all Machines March 2018 vs March 2023
266
+ st.subheader("Percentage Change in Number of Vehicles")
267
+ sum_origin_machines_2018 = filtered_2018.num_machines.sum()
268
+ sum_origin_machines_2023 = filtered_2023.num_machines.sum()
269
+ machines_percentage_diff = calculate_percentage_difference(sum_origin_machines_2018, sum_origin_machines_2023)
270
+
271
+ # Create a DataFrame for the percentage differences
272
+ data = pd.DataFrame({
273
+ 'Category': ['Trips', 'Vehicles'],
274
+ 'Percentage Change': [trips_percentage_diff, machines_percentage_diff]
275
+ })
276
+
277
+ # Create the bar chart using Plotly
278
+ fig = go.Figure(go.Bar(
279
+ x=data['Category'],
280
+ y=data['Percentage Change'],
281
+ text=data['Percentage Change'].apply(lambda x: f'{x:.2f}%'),
282
+ textposition='outside',
283
+ marker_color=['#29AB87', '#00A693']
284
+ ))
285
+
286
+ fig.update_layout(
287
+ title='Percentage Change from March 2018 to March 2023',
288
+ yaxis_title='Percentage Change',
289
+ yaxis_tickformat=',.2f%',
290
+ yaxis_ticksuffix='%'
291
+ )
292
+
293
+ # Display the chart in Streamlit
294
+ st.plotly_chart(fig)
295
+ # TO-DO Sum of all Trips March 2018 compared to March 2023
296
+
297
+
data/2018_march_ruc.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88227cf28102e5d54fd37df502109f093a6153f0d0c183de36691c8506844a28
3
+ size 29789168
data/2023_march_ruc.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67fa04f796ebdf801940cd25e99b933df717db6effea9b37bcf06563eff62c49
3
+ size 38923909
data/stats-area.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d09ff8de9215e60d341a600e6618971b8ed6da634d2a2452ce1817e93d934cb
3
+ size 88767394
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ geopandas
4
+ matplotlib
5
+ folium
6
+ shapely
7
+ branca
8
+ streamlit-folium
9
+ streamlit-extras