SebastianoMeneghin commited on
Commit
2775945
·
verified ·
1 Parent(s): 16f68c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +207 -21
app.py CHANGED
@@ -1,12 +1,179 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import hopsworks
 
 
 
4
  import math
5
  import os
6
 
7
  hopsworks_today_path = "Resources/today_timetable_prediction/today_timetable_prediction.csv"
8
  hopsworks_tomorrow_path = "Resources/tomorrow_timetable_prediction/tomorrow_timetable_prediction.csv"
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def get_dataframe(online_dataframe_path):
12
  # Connect to Hopsworks File System
@@ -19,6 +186,8 @@ def get_dataframe(online_dataframe_path):
19
  # Read dataframe from local path, drop duplicates, return
20
  dataframe = pd.read_csv(dataframe_path)
21
  dataframe.drop_duplicates(inplace=True)
 
 
22
  return dataframe
23
 
24
  def get_tomorrow_dataframe():
@@ -27,18 +196,42 @@ def get_tomorrow_dataframe():
27
  def get_today_dataframe():
28
  return get_dataframe(hopsworks_tomorrow_path)
29
 
30
- def get_dataframe_of(day):
31
- if (day.lower() == 'today'):
32
- return get_today_dataframe()
33
- elif (day.lower() == 'tomorrow'):
34
- return get_tomorrow_dataframe()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  def get_possible_destinations():
37
- today_df, tomorrow_df = get_today_dataframe(), get_tomorrow_dataframe()
38
- total_df = pd.DataFrame({'airport': pd.concat([today_df['airport'], tomorrow_df['airport']]).drop_duplicates().reset_index(drop=True).sort_values()})
39
- total_dest = (total_df['airport']).tolist()
 
40
  return total_dest
41
 
 
 
 
 
 
 
42
 
43
  def get_specific_flights(day, max_delay, departure_hour, ampm, weather, destinations, yes):
44
  df = get_dataframe_of(day)
@@ -56,7 +249,7 @@ def get_specific_flights(day, max_delay, departure_hour, ampm, weather, destinat
56
 
57
  # Get flight with less delay than the given and from the destinations selected, of the right day
58
  df['delay'] = (df['delayed'] - df['ontime']).dt.total_seconds() / 60
59
- filtered_df = df.loc[(df['delay'] < max_delay) & (df['airport'].isin(destinations)), ['airport', 'flight_number', 'ontime', 'delayed']]
60
 
61
  # Convert the string to datetime, then the datetime column to HH:MM
62
  filtered_df['ontime'] = pd.to_datetime(filtered_df['ontime'])
@@ -69,16 +262,9 @@ def get_specific_flights(day, max_delay, departure_hour, ampm, weather, destinat
69
  def full_day_departure(day):
70
  return get_dataframe_of(day)
71
 
72
- def get_metrics():
73
- # Connect to Hopsworks File System
74
- dataframe = hopsworks.login(api_key_value = os.environ['HOPSWORKS_API_KEY'])
75
- dataframe = dataframe.get_feature_store()
76
- dataframe = dataframe.get_feature_group(name = 'model_performance', version = 1)
77
- dataframe = dataframe.read(dataframe_type = 'pandas')
78
- dataframe = dataframe.sort_values('timestamp')
79
- dataframe = dataframe[['timestamp', 'mae', 'dateset_size']].rename(columns={'dateset_size':'Dataset Size', 'mae':'Mean Absolute Error', 'timestamp':'Date'})
80
- dataframe = dataframe.sort_values(['Date'], ascending = False)
81
- return dataframe
82
 
83
  specific_flights = gr.Interface(
84
  get_specific_flights,
@@ -104,9 +290,9 @@ total_departure = gr.Interface(
104
  "dataframe",
105
  )
106
 
107
- metrics = gr.Interface(fn = get_metrics, inputs=None, outputs='dataframe', allow_flagging="never")
108
 
109
  #flights.launch()
110
 
111
- interface = gr.TabbedInterface([specific_flights, total_departure, metrics], {"Full Day Departure", "Specific Flights", "Model Performances"})
112
  interface.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
  import hopsworks
4
+ from datetime import datetime
5
+ import requests
6
+ import json
7
  import math
8
  import os
9
 
10
  hopsworks_today_path = "Resources/today_timetable_prediction/today_timetable_prediction.csv"
11
  hopsworks_tomorrow_path = "Resources/tomorrow_timetable_prediction/tomorrow_timetable_prediction.csv"
12
 
13
+ def one_day_forward(year, month, day):
14
+ '''
15
+ Return "year", "month" and "day" numbers of the day after the inserted day
16
+ It works for all the possible years from 1592
17
+ '''
18
+ if month == 12:
19
+ if day == 31:
20
+ day = 1
21
+ month = 1
22
+ year = year + 1
23
+ else:
24
+ day = day + 1
25
+
26
+ elif month == 2:
27
+ if (day == 28):
28
+ if (year % 4 == 0):
29
+ day = 29
30
+ else:
31
+ day = 1
32
+ month = 3
33
+ elif (day == 29):
34
+ day = 1
35
+ month = 3
36
+ else:
37
+ day = day + 1
38
+
39
+ elif month == 4 or month == 6 or month == 9 or month == 11:
40
+ if (day == 30):
41
+ month = month + 1
42
+ day = 1
43
+ else:
44
+ day = day + 1
45
+
46
+ else:
47
+ day = day + 1
48
+
49
+ return year, month, day
50
+
51
+
52
+ def get_today_date():
53
+ '''
54
+ Return today's year, month and day numbers
55
+ '''
56
+ # Get today's date through TimeAPI
57
+ time_url = "https://worldtimeapi.org/api/timezone/Europe/Stockholm"
58
+ time_response = requests.get(time_url)
59
+ time_responseJson = time_response.json()
60
+
61
+ # Extract datetime
62
+ datetime_str = time_responseJson["datetime"]
63
+ datetime_object = datetime.fromisoformat(datetime_str[:-6]) # Remove the timezone offset for parsing
64
+
65
+ # Extract components from datetime
66
+ day = datetime_object.day
67
+ month = datetime_object.month
68
+ year = datetime_object.year
69
+
70
+ return year, month, day
71
+
72
+
73
+ def get_year_month_label(year, month, mode):
74
+ '''
75
+ Return the year_month in the format wanted by the different APIs file structure, by passing
76
+ the year, month and the mode. It pads with 0 when needed. The "mode" can be specified
77
+ between "hyphen", "underscore" and "empty" and it determines which divider you will find in
78
+ the year_month_label between the different input passed (e.g. 2024-01 or 20240105)
79
+ '''
80
+ year_month_label = ''
81
+
82
+ year_label = str(year)
83
+ month_label = ''
84
+ if month not in {10, 11, 12}:
85
+ month_label = '0' + str(month)
86
+ else:
87
+ month_label = str(month)
88
+
89
+ if mode == 'hyphen':
90
+ year_month_label = year_label + '-' + month_label
91
+ elif mode == 'underscore':
92
+ year_month_label = year_label + '_' + month_label
93
+ elif mode == 'empty':
94
+ year_month_label = year_label + month_label
95
+
96
+ return year_month_label
97
+
98
+
99
+ def get_date_label(year, month, day, mode):
100
+ '''
101
+ Return the date in the format wanted by the different APIs file structure, by passing
102
+ the year, month, day and the mode. It pads with 0 when needed. The "mode" can be specified
103
+ between "hyphen", "underscore" and "empty" and it determines which divider you will find in
104
+ the date_label between the different input passed (e.g. 2024-01-05 or 20240105)
105
+ '''
106
+
107
+ date_label = ''
108
+ year_month_label = get_year_month_label(year, month, mode)
109
+
110
+ day_label = ''
111
+ if day < 10:
112
+ day_label = '0' + str(day)
113
+ else:
114
+ day_label = str(day)
115
+
116
+ if mode == 'hyphen':
117
+ date_label = year_month_label + '-' + day_label
118
+ elif mode == 'underscore':
119
+ date_label = year_month_label + '_' + day_label
120
+ elif mode == 'empty':
121
+ date_label = year_month_label + day_label
122
+
123
+ return date_label
124
+
125
+
126
+ def get_name_of_cities():
127
+ yyyy, mm, dd = get_today_date()
128
+ yyyy1, mm1, dd1 = one_day_forward(yyyy, mm, dd)
129
+
130
+ date_label = get_date_label(yyyy, mm, dd, 'hyphen')
131
+ date_label1 = get_date_label(yyyy1, mm1, dd1, 'hyphen')
132
+
133
+ # Create the request_url, then get the subscription key from Swedavia API and set them in the header
134
+ swedavia_url = 'https://api.swedavia.se/flightinfo/v2/ARN/departures/' + date_label
135
+ swedavia_url1 = 'https://api.swedavia.se/flightinfo/v2/ARN/departures/' + date_label1
136
+
137
+ subscription_key = os.environ['SWEDAVIA_API_KEY']
138
+ headers = {
139
+ "Ocp-Apim-Subscription-Key": subscription_key,
140
+ "Accept": "application/json",
141
+ "Content-Type": 'application/json',
142
+ }
143
+
144
+ # Make the API request for Swedavia API
145
+ response = requests.get(swedavia_url, headers = headers)
146
+ response1 = requests.get(swedavia_url, headers = headers)
147
+ flights_swedavia = response.json()
148
+ flights_swedavia1 = response1.json()
149
+
150
+
151
+ # Load JSON data into a Python dictionary
152
+ arrival_airports_info = [{
153
+ 'ArrivalAirportIata': flight.get('flightLegIdentifier', {}).get('arrivalAirportIata'),
154
+ 'ArrivalAirportEnglish': flight.get('arrivalAirportEnglish')}
155
+ for flight in flights_swedavia.get('flights', [])]
156
+ df = pd.DataFrame(arrival_airports_info)
157
+ arrival_airports_info1 = [{
158
+ 'ArrivalAirportIata': flight.get('flightLegIdentifier', {}).get('arrivalAirportIata'),
159
+ 'ArrivalAirportEnglish': flight.get('arrivalAirportEnglish')}
160
+ for flight in flights_swedavia1.get('flights', [])]
161
+ df1 = pd.DataFrame(arrival_airports_info1)
162
+
163
+ total_df = pd.DataFrame({'ArrivalAirportEnglish': pd.concat([df1['ArrivalAirportEnglish'], df['ArrivalAirportEnglish']]).drop_duplicates().reset_index(drop=True),'ArrivalAirportIata': pd.concat([df1['ArrivalAirportIata'], df['ArrivalAirportIata']]).drop_duplicates().reset_index(drop=True)})
164
+ total_df.sort_values('ArrivalAirportEnglish', inplace=True)
165
+
166
+ return total_df
167
+
168
+
169
+ def create_single_dataframe_from(dataframe):
170
+ df = get_name_of_cities()
171
+ df['ArrivalAirportIata'] = df['ArrivalAirportIata'].str.lower()
172
+ merged_df = pd.merge(df, dataframe, left_on='ArrivalAirportIata', right_on='airport', how='inner')
173
+ # Drop the duplicate 'ArrivalAirportIata' column
174
+ merged_df = merged_df.drop('ArrivalAirportIata', axis=1)
175
+
176
+ return merged_df
177
 
178
  def get_dataframe(online_dataframe_path):
179
  # Connect to Hopsworks File System
 
186
  # Read dataframe from local path, drop duplicates, return
187
  dataframe = pd.read_csv(dataframe_path)
188
  dataframe.drop_duplicates(inplace=True)
189
+
190
+ dataframe = create_single_dataframe_from(dataframe)
191
  return dataframe
192
 
193
  def get_tomorrow_dataframe():
 
196
  def get_today_dataframe():
197
  return get_dataframe(hopsworks_tomorrow_path)
198
 
199
+ def get_metrics():
200
+ # Connect to Hopsworks File System
201
+ dataframe = hopsworks.login(api_key_value = os.environ['HOPSWORKS_API_KEY'])
202
+ dataframe = dataframe.get_feature_store()
203
+ dataframe = dataframe.get_feature_group(name = 'model_performance', version = 1)
204
+ dataframe = dataframe.read(dataframe_type = 'pandas')
205
+ dataframe = dataframe.sort_values('timestamp')
206
+ dataframe = dataframe[['timestamp', 'mae', 'dateset_size']].rename(columns={'dateset_size':'Dataset Size', 'mae':'Mean Absolute Error', 'timestamp':'Date'})
207
+ dataframe = dataframe.sort_values(['Date'], ascending = False)
208
+ return dataframe
209
+
210
+ selected_columns = ['destination', 'airport code', 'flight number', 'ontime', 'delayed']
211
+ ciccio, pasticcio = pd.DataFrame(), pd.DataFrame()
212
+ cities_datafram = get_name_of_cities()
213
+ ciccio = get_today_dataframe()
214
+ ciccio = ciccio.rename(columns={'airport':'airport code' , 'ArrivalAirportEnglish':'destination', 'flight_number':'flight number'})
215
+ today_dataframe = ciccio[selected_columns]
216
+ pasticcio = get_tomorrow_dataframe()
217
+ pasticcio = pasticcio.rename(columns={'airport':'airport code','ArrivalAirportEnglish':'destination', 'flight_number':'flight number'})
218
+ tomorrow_dataframe = pasticcio[selected_columns]
219
+ performance_metric = get_metrics()
220
+
221
 
222
  def get_possible_destinations():
223
+ global today_dataframe, tomorrow_dataframe
224
+ today_df, tomorrow_df = today_dataframe, tomorrow_dataframe
225
+ total_df = pd.DataFrame({'destination': pd.concat([today_df['destination'], tomorrow_df['destination']]).drop_duplicates().reset_index(drop=True).sort_values()})
226
+ total_dest = (total_df['destination']).tolist()
227
  return total_dest
228
 
229
+ def get_dataframe_of(day):
230
+ global cities_datafram, today_dataframe, tomorrow_dataframe
231
+ if (day.lower() == 'today'):
232
+ return today_dataframe
233
+ elif (day.lower() == 'tomorrow'):
234
+ return tomorrow_dataframe
235
 
236
  def get_specific_flights(day, max_delay, departure_hour, ampm, weather, destinations, yes):
237
  df = get_dataframe_of(day)
 
249
 
250
  # Get flight with less delay than the given and from the destinations selected, of the right day
251
  df['delay'] = (df['delayed'] - df['ontime']).dt.total_seconds() / 60
252
+ filtered_df = df.loc[(df['delay'] < max_delay) & (df['destination'].isin(destinations)), ['destination', 'flight number', 'ontime', 'delayed']]
253
 
254
  # Convert the string to datetime, then the datetime column to HH:MM
255
  filtered_df['ontime'] = pd.to_datetime(filtered_df['ontime'])
 
262
  def full_day_departure(day):
263
  return get_dataframe_of(day)
264
 
265
+ def get_performance():
266
+ global performance_metric
267
+ return performance_metric
 
 
 
 
 
 
 
268
 
269
  specific_flights = gr.Interface(
270
  get_specific_flights,
 
290
  "dataframe",
291
  )
292
 
293
+ metrics = gr.Interface(fn = get_performance, inputs=None, outputs='dataframe', allow_flagging="never")
294
 
295
  #flights.launch()
296
 
297
+ interface = gr.TabbedInterface([specific_flights, total_departure, metrics], {"Full Day Departure", "Specific Flights", "Model Performances"})
298
  interface.launch()