Yulle commited on
Commit
9d5a9e1
·
1 Parent(s): 935ea2b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +409 -0
app.py ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ import gradio as gr
3
+ import numpy as np
4
+ import requests
5
+ import pandas as pd
6
+ import hopsworks
7
+ import joblib
8
+ import torch
9
+ from torch import nn
10
+
11
+
12
+ import os
13
+ from dotenv import load_dotenv
14
+ import httpx
15
+ import datetime
16
+ import json
17
+ from urllib.request import Request, urlopen
18
+ import random
19
+ from datetime import datetime
20
+ from sklearn.preprocessing import StandardScaler
21
+
22
+
23
+
24
+
25
+ # %%
26
+ project = hopsworks.login()
27
+ fs = project.get_feature_store()
28
+
29
+ # %%
30
+ #Load api keys
31
+ load_dotenv()
32
+ weather_api_key = os.getenv("weather_api_key")
33
+ pressure_api_key = os.getenv("pressure_api_key")
34
+ flight_api_key = os.getenv("flight_api_key")
35
+
36
+
37
+ # %%
38
+ #Mappings
39
+ icao_to_iata_map = {
40
+ "KDTW": "DTW",
41
+ "KLAS": "LAS",
42
+ "KPHL": "PHL",
43
+ "KDEN": "DEN",
44
+ "KCLT": "CLT",
45
+ "KSEA": "SEA",
46
+ "KMCO": "MCO",
47
+ "KFLL": "FLL",
48
+ "KIAD": "IAD",
49
+ "KIAH": "IAH",
50
+ "KSFO": "SFO",
51
+ "KEWR": "EWR",
52
+ "KMIA": "MIA",
53
+ "KJFK": "JFK",
54
+ "KLAX": "LAX",
55
+ "KORD": "ORD",
56
+ "KATL": "ATL",
57
+ }
58
+ iata_to_icao_map = {v: k for k, v in icao_to_iata_map.items()}
59
+ wac_map = {
60
+ "BOS": 13,
61
+ "CLT": 36,
62
+ "DEN": 82,
63
+ "DTW": 43,
64
+ "EWR": 21,
65
+ "FLL": 33,
66
+ "IAD": 38,
67
+ "IAH": 74,
68
+ "JFK": 22,
69
+ "LAS": 85,
70
+ "LAX": 91,
71
+ "MCO": 33,
72
+ "MIA": 33,
73
+ "ORD": 41,
74
+ "PHL": 23,
75
+ "SEA": 93,
76
+ "SFO": 91,
77
+ "ATL": 34,
78
+ }
79
+ weather_features = [
80
+ ("dewpoint", "value"),
81
+ "relative_humidity",
82
+ ("remarks_info", "precip_hourly", "value"),
83
+ ("remarks_info", "temperature_decimal", "value"),
84
+ ("visibility", "value"),
85
+ ("wind_direction", "value"),
86
+ ("wind_gust", "value"),
87
+ ("wind_speed", "value"),
88
+ ]
89
+ pressure_features = [("pressure", "hpa")]
90
+ flight_features = [
91
+ "flight_date",
92
+ ("departure", "iata"),
93
+ ("departure", "delay"),
94
+ ("departure", "scheduled"),
95
+ ("arrival", "iata"),
96
+ ("arrival", "delay"),
97
+ ("arrival", "scheduled"),
98
+ ]
99
+ airport_id_map={
100
+ "CLT": 11057,
101
+ "DEN": 11292,
102
+ "DTW": 11433,
103
+ "EWR": 11618,
104
+ "FLL": 11697,
105
+ "IAD": 12264,
106
+ "IAH": 12266,
107
+ "JFK": 12478,
108
+ "LAS": 12889,
109
+ "LAX": 12892,
110
+ "MCO": 13204,
111
+ "MIA": 13303,
112
+ "ORD": 13930,
113
+ "PHL": 14100,
114
+ "SEA": 14747,
115
+ "SFO": 14771,
116
+ "ATL": 10397,
117
+ }
118
+ label_tranformed_airport_id_map={'ATL': 0, 'CLT': 1, 'DEN': 2, 'DTW': 3, 'EWR': 4, 'FLL': 5, 'IAD': 6, 'IAH': 7, 'JFK': 8,
119
+ 'LAS': 9, 'LAX': 10, 'MCO': 11, 'MIA': 12, 'ORD': 13, 'PHL': 14, 'SEA': 15, 'SFO': 16}
120
+ # Create predefined lists for origin and destination airport codes
121
+ airports = [ "PHL - PHILADELPHIA INTERNATIONAL AIRPORT, PA US",
122
+ "SEA - SEATTLE TACOMA AIRPORT, WA US",
123
+ "JFK - JFK INTERNATIONAL AIRPORT, NY US",
124
+ "DEN - DENVER INTERNATIONAL AIRPORT, CO US",
125
+ "EWR - NEWARK LIBERTY INTERNATIONAL AIRPORT, NJ US",
126
+ "LAS - MCCARRAN INTERNATIONAL AIRPORT, NV US",
127
+ "MCO - ORLANDO INTERNATIONAL AIRPORT, FL US",
128
+ "ATL - ATLANTA HARTSFIELD JACKSON INTERNATIONAL AIRPORT, GA US",
129
+ "FLL - FORT LAUDERDALE INTERNATIONAL AIRPORT, FL US",
130
+ "DTW - DETROIT METRO AIRPORT, MI US",
131
+ "IAD - WASHINGTON DULLES INTERNATIONAL AIRPORT, VA US",
132
+ "ORD - CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",
133
+ "LAX - LOS ANGELES INTERNATIONAL AIRPORT, CA US",
134
+ "CLT - CHARLOTTE DOUGLAS AIRPORT, NC US",
135
+ "MIA - MIAMI INTERNATIONAL AIRPORT, FL US",
136
+ "IAH - HOUSTON INTERCONTINENTAL AIRPORT, TX US",
137
+ "SFO - SAN FRANCISCO INTERNATIONAL AIRPORT, CA US"]
138
+
139
+ # %%
140
+
141
+ #Class definition needed due to the way pytorch neural networks are saved and loaded by python
142
+ # A solution, if needed, would be to save the state dict of the NN and load the model via load_state_dict
143
+ class NeuralNetwork(nn.Module):
144
+ def __init__(self, input_size):
145
+ super(NeuralNetwork, self).__init__()
146
+ self.fc1 = nn.Linear(input_size, 128)
147
+ self.relu = nn.ReLU()
148
+ self.dropout = nn.Dropout(0.2)
149
+ self.fc2 = nn.Linear(128, 64)
150
+ self.output = nn.Linear(64, 1)
151
+
152
+ def forward(self, x):
153
+ x = self.fc1(x)
154
+ x = self.relu(x)
155
+ x = self.dropout(x)
156
+ x = self.fc2(x)
157
+ x = self.relu(x)
158
+ x = self.output(x)
159
+ return x
160
+
161
+ #Load model from model registry
162
+ mr = project.get_model_registry()
163
+ model = mr.get_model("flight_delay_model", version=2)
164
+ model_dir = model.download()
165
+ model = joblib.load(model_dir + "/flight_delay_model.pkl")
166
+
167
+ # get the original train test splits used for training the model and use it for fitting scaler
168
+ feature_view = fs.get_feature_view(name="flight_data_v3",version=1)
169
+ X_train, X_test, y_train, y_test = feature_view.get_train_test_split(training_dataset_version=3)
170
+
171
+ #fit scaler the same way it was used for training
172
+ scaler = StandardScaler()
173
+ X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
174
+ X_train_scaled = scaler.fit_transform(X_train_tensor)
175
+ X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
176
+
177
+ results = pd.DataFrame(columns=["Origin Airport", "Destination Airport", "Scheduled Departure", "Scheduled Arrival", "Predicted Departure Delay"])
178
+
179
+
180
+ # %%
181
+ def get_weather_data(selected_airports_iata):
182
+ # Input: list of selected airports in IATA code
183
+ # Make API call to fetch weather data for the airport
184
+ # Process and return weather data
185
+ responses = {}
186
+ for airport in selected_airports_iata:
187
+ print(f"Getting weather for {airport}")
188
+ request = Request(
189
+ f"https://avwx.rest/api/metar/{iata_to_icao_map[airport]}",
190
+ headers={"Authorization": weather_api_key},
191
+ )
192
+ response_body = urlopen(request).read()
193
+ response_json = json.loads(response_body)
194
+ responses[airport] = response_json
195
+
196
+ weather_data = []
197
+
198
+ for airport in selected_airports_iata:
199
+ response_json = responses[airport]
200
+ data = {"airport": airport}
201
+ data["HourlyDewPointTemperature"] = response_json["remarks_info"][
202
+ "dewpoint_decimal"
203
+ ]["value"]
204
+ data["HourlyRelativeHumidity"] = response_json["relative_humidity"]
205
+ if response_json["remarks_info"]["precip_hourly"] is not None:
206
+ data["HourlyPrecipitation"] = response_json["remarks_info"]["precip_hourly"][
207
+ "value"
208
+ ]
209
+ else:
210
+ data["HourlyPrecipitation"] = 0
211
+ data["HourlyDryBulbTemperature"] = response_json["remarks_info"][
212
+ "temperature_decimal"
213
+ ]["value"]
214
+ data["HourlyVisibility"] = response_json["visibility"]["value"]
215
+ data["HourlyWindDirection"] = response_json["wind_direction"]["value"]
216
+ if response_json["wind_gust"] is not None:
217
+ data["HourlyWindGustSpeed"] = response_json["wind_gust"]["value"]
218
+ else:
219
+ data["HourlyWindGustSpeed"] = 0
220
+ data["HourlyWindSpeed"] = response_json["wind_speed"]["value"]
221
+ weather_data.append(data)
222
+
223
+ weather_data = pd.DataFrame(weather_data)
224
+ #weather_data.info()
225
+ return weather_data
226
+
227
+ # %%
228
+ def get_pressure_data(selected_airports_iata):
229
+ # Input: list of selected airports in IATA code
230
+ responses={}
231
+ url = "https://api.checkwx.com/metar/KJFK/decoded"
232
+
233
+ #response = requests.request("GET", url, headers={"X-API-Key": pressure_api_key})
234
+ for airport in selected_airports_iata:
235
+ print(f"Getting pressure for {airport}")
236
+ request = Request(
237
+ f"https://api.checkwx.com/metar/{iata_to_icao_map[airport]}/decoded",
238
+ headers={"X-API-Key": pressure_api_key},
239
+ )
240
+ response_body = urlopen(request).read()
241
+ response_json = json.loads(response_body)
242
+ responses[airport] = response_json
243
+
244
+ pressure_data = []
245
+
246
+ for airport in selected_airports_iata:
247
+ response_json = responses[airport]
248
+ data = {"airport": airport}
249
+ data["HourlyStationPressure"] = response_json["data"][0]["barometer"]["hpa"]
250
+ pressure_data.append(data)
251
+
252
+ pressure_data = pd.DataFrame(pressure_data)
253
+ #pressure_data.info()
254
+ return pressure_data
255
+
256
+ # %%
257
+ def get_flight_data(origin, destination,scheduled_dep_time, scheduled_arr_time):
258
+ # Input: origin airport IATA code, destination airport IATA code,
259
+ # and dep and arr time in HH:MM 24 hour format
260
+ current_datetime = datetime.now()
261
+
262
+ # Extract different date-related information
263
+ day_of_week = current_datetime.weekday()
264
+ day_of_month = current_datetime.day
265
+ year = current_datetime.year
266
+ month = current_datetime.month
267
+
268
+
269
+
270
+ origin_wac = wac_map[origin]
271
+ origin_airport_id = label_tranformed_airport_id_map[origin]
272
+
273
+ # Mapping destination to dest_WAC and dest_airport_id
274
+ dest_wac = wac_map[destination]
275
+ dest_airport_id = label_tranformed_airport_id_map[destination]
276
+ # Create a DataFrame for the given airport codes
277
+ airport_df = pd.DataFrame({
278
+ #"Year":[year],
279
+ "month":[month],
280
+ "Day_of_month":[day_of_month],
281
+ "Day_of_week":[day_of_week],
282
+ "origin": [origin],
283
+ "origin_airport_id": [origin_airport_id],
284
+ "origin_WAC": [origin_wac],
285
+ "dest": [destination],
286
+ "dest_airport_id": [dest_airport_id],
287
+ "dest_WAC": [dest_wac],
288
+ "CRS_DEP_TIME":[int(scheduled_dep_time.replace(":", ""))],
289
+ "CRS_ARR_TIME":[int(scheduled_arr_time.replace(":", ""))],
290
+ "airport":[origin]
291
+
292
+ })
293
+
294
+ #print(airport_df.info())
295
+ #print(airport_df)
296
+ return airport_df
297
+
298
+ # %%
299
+ # Define the function to predict flight delay based on user inputs
300
+ def predict_delay(origin, destination,scheduled_dep_time, scheduled_arr_time):
301
+
302
+ #test code to try running Gradio app
303
+ origin=origin.split()[0]
304
+ destination=destination.split()[0]
305
+
306
+ #error handling
307
+ try:
308
+ # check if correct hour format by trying to convert to datetime objects
309
+ datetime.strptime(scheduled_dep_time, "%H:%M")
310
+ datetime.strptime(scheduled_arr_time, "%H:%M")
311
+ except ValueError:
312
+ # else error
313
+ return "Error: Please enter scheduled departure and arrival times in 24-hour format (HH:MM)."
314
+ if origin == destination:
315
+ return "Error: Origin and destination airports cannot be the same. Please select different airports."
316
+
317
+ #Get data from APIs
318
+ selected_airports_iata = [origin,destination]
319
+ weather_data=get_weather_data(selected_airports_iata)
320
+ pressure_data=get_pressure_data(selected_airports_iata)
321
+ flight_data=get_flight_data(origin, destination,scheduled_dep_time, scheduled_arr_time)
322
+
323
+ #Merge data
324
+ weather_delay_data = pd.merge(pressure_data, weather_data, on="airport")
325
+
326
+ # fix order of columns so that it is same as in training
327
+ weather_delay_data=weather_delay_data.reindex(sorted(weather_delay_data.columns), axis=1)
328
+
329
+ #merge columns
330
+ flight_weather_data=pd.merge(flight_data, weather_delay_data, on="airport")
331
+
332
+ #drop objects
333
+ flight_weather_data.drop(columns=['airport', 'origin', 'dest'], inplace=True)
334
+
335
+ #fix type
336
+ columns_to_float64 = ['HourlyPrecipitation', 'HourlyVisibility', 'HourlyWindGustSpeed', 'HourlyWindSpeed']
337
+ for column in columns_to_float64:
338
+ # Convert to int64
339
+ flight_weather_data[column] = flight_weather_data[column].astype('float64')
340
+
341
+ #flight_weather_data.info()
342
+
343
+ flight_weather_data=torch.tensor(flight_weather_data.values, dtype=torch.float32)
344
+ print(flight_weather_data)
345
+ #flight_weather_data=scaler.transform(flight_weather_data.reshape(1, -1))
346
+ flight_weather_data=scaler.transform(flight_weather_data)
347
+
348
+ print(flight_weather_data)
349
+ # transform np array to torch tensor
350
+ flight_weather_data_tensor=torch.tensor(flight_weather_data, dtype=torch.float32)
351
+ print(flight_weather_data_tensor)
352
+
353
+ output=model(flight_weather_data_tensor)
354
+ """
355
+ return_dict = {
356
+ 'Origin Airport': origin,
357
+ 'Destination Airport': destination,
358
+ 'Scheduled Departure': scheduled_dep_time,
359
+ 'Scheduled Arrival': scheduled_arr_time,
360
+ 'Predicted Departure Delay': int(output.item())
361
+ }
362
+
363
+
364
+ # Convert the dictionary to a Pandas DataFrame
365
+ df = pd.DataFrame([return_dict])
366
+ return df
367
+ """
368
+ global results
369
+ new_prediction = {
370
+ 'Origin Airport': origin,
371
+ 'Destination Airport': destination,
372
+ 'Scheduled Departure': scheduled_dep_time,
373
+ 'Scheduled Arrival': scheduled_arr_time,
374
+ 'Predicted Departure Delay': int(output.item())
375
+ }
376
+ # Append the new prediction to the existing DataFrame
377
+ results = pd.concat([results, pd.DataFrame([new_prediction])])
378
+
379
+ return results
380
+ #return "Predicted delay for {} to {} with the scheduled departure time {} and scheduled " \
381
+ # "arrival time {} is {} minutes".format(origin, destination, scheduled_dep_time, scheduled_arr_time,int(output.item()))
382
+
383
+
384
+ # %%
385
+ # Create Gradio interface with dropdowns for airport selection
386
+ with gr.Blocks() as demo:
387
+ gr.Markdown("# Flight departure delay predictor using Flight data and Weather Data")
388
+ gr.Markdown("Input origin airport and destination airport from the dropdown boxes. Also input the scheduled departure time and scheduled arrival time")
389
+ gr.Markdown("The scheduled departure time should be within one hour from now since live weather data for the airports will be fetched")
390
+
391
+ with gr.Row():
392
+ output = gr.Dataframe(headers=["Origin Airport", "Destination Airport", "Scheduled Departure", "Scheduled Arrival", "Predicted Departure Delay"],
393
+ row_count=3,col_count=5,type="pandas",label="Predicted Departure Delay")
394
+ with gr.Column():
395
+ origin_dropdown = gr.Dropdown(choices=airports, label="Origin Airport")
396
+ destination_dropdown = gr.Dropdown(choices=airports, label="Destination Airport")
397
+ scheduled_dep_time_text = gr.Textbox(type="text", label="Enter scheduled Departure time in 24-hour format HH:MM(eg. 17:59)")
398
+ scheduled_arr_time_text = gr.Textbox(type="text", label="Enter scheduled Arrival time in 24-hour format HH:MM (eg. 20:59)")
399
+
400
+ with gr.Row():
401
+ submit_button = gr.Button("Predict Departure Delay")
402
+
403
+
404
+
405
+ submit_button.click(predict_delay, inputs=[origin_dropdown, destination_dropdown, scheduled_dep_time_text, scheduled_arr_time_text], outputs=output)
406
+
407
+ demo.launch()
408
+
409
+