Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,179 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
import hopsworks
|
|
|
|
|
|
|
|
|
|
| 4 |
import math
|
| 5 |
import os
|
| 6 |
|
| 7 |
hopsworks_today_path = "Resources/today_timetable_prediction/today_timetable_prediction.csv"
|
| 8 |
hopsworks_tomorrow_path = "Resources/tomorrow_timetable_prediction/tomorrow_timetable_prediction.csv"
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
def get_dataframe(online_dataframe_path):
|
| 12 |
# Connect to Hopsworks File System
|
|
@@ -19,6 +186,8 @@ def get_dataframe(online_dataframe_path):
|
|
| 19 |
# Read dataframe from local path, drop duplicates, return
|
| 20 |
dataframe = pd.read_csv(dataframe_path)
|
| 21 |
dataframe.drop_duplicates(inplace=True)
|
|
|
|
|
|
|
| 22 |
return dataframe
|
| 23 |
|
| 24 |
def get_tomorrow_dataframe():
|
|
@@ -27,18 +196,42 @@ def get_tomorrow_dataframe():
|
|
| 27 |
def get_today_dataframe():
|
| 28 |
return get_dataframe(hopsworks_tomorrow_path)
|
| 29 |
|
| 30 |
-
def
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
def get_possible_destinations():
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
| 40 |
return total_dest
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def get_specific_flights(day, max_delay, departure_hour, ampm, weather, destinations, yes):
|
| 44 |
df = get_dataframe_of(day)
|
|
@@ -56,7 +249,7 @@ def get_specific_flights(day, max_delay, departure_hour, ampm, weather, destinat
|
|
| 56 |
|
| 57 |
# Get flight with less delay than the given and from the destinations selected, of the right day
|
| 58 |
df['delay'] = (df['delayed'] - df['ontime']).dt.total_seconds() / 60
|
| 59 |
-
filtered_df = df.loc[(df['delay'] < max_delay) & (df['
|
| 60 |
|
| 61 |
# Convert the string to datetime, then the datetime column to HH:MM
|
| 62 |
filtered_df['ontime'] = pd.to_datetime(filtered_df['ontime'])
|
|
@@ -69,16 +262,9 @@ def get_specific_flights(day, max_delay, departure_hour, ampm, weather, destinat
|
|
| 69 |
def full_day_departure(day):
|
| 70 |
return get_dataframe_of(day)
|
| 71 |
|
| 72 |
-
def
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
dataframe = dataframe.get_feature_store()
|
| 76 |
-
dataframe = dataframe.get_feature_group(name = 'model_performance', version = 1)
|
| 77 |
-
dataframe = dataframe.read(dataframe_type = 'pandas')
|
| 78 |
-
dataframe = dataframe.sort_values('timestamp')
|
| 79 |
-
dataframe = dataframe[['timestamp', 'mae', 'dateset_size']].rename(columns={'dateset_size':'Dataset Size', 'mae':'Mean Absolute Error', 'timestamp':'Date'})
|
| 80 |
-
dataframe = dataframe.sort_values(['Date'], ascending = False)
|
| 81 |
-
return dataframe
|
| 82 |
|
| 83 |
specific_flights = gr.Interface(
|
| 84 |
get_specific_flights,
|
|
@@ -104,9 +290,9 @@ total_departure = gr.Interface(
|
|
| 104 |
"dataframe",
|
| 105 |
)
|
| 106 |
|
| 107 |
-
metrics = gr.Interface(fn =
|
| 108 |
|
| 109 |
#flights.launch()
|
| 110 |
|
| 111 |
-
|
| 112 |
interface.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
import hopsworks
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
import requests
|
| 6 |
+
import json
|
| 7 |
import math
|
| 8 |
import os
|
| 9 |
|
| 10 |
hopsworks_today_path = "Resources/today_timetable_prediction/today_timetable_prediction.csv"
|
| 11 |
hopsworks_tomorrow_path = "Resources/tomorrow_timetable_prediction/tomorrow_timetable_prediction.csv"
|
| 12 |
|
| 13 |
+
def one_day_forward(year, month, day):
|
| 14 |
+
'''
|
| 15 |
+
Return "year", "month" and "day" numbers of the day after the inserted day
|
| 16 |
+
It works for all the possible years from 1592
|
| 17 |
+
'''
|
| 18 |
+
if month == 12:
|
| 19 |
+
if day == 31:
|
| 20 |
+
day = 1
|
| 21 |
+
month = 1
|
| 22 |
+
year = year + 1
|
| 23 |
+
else:
|
| 24 |
+
day = day + 1
|
| 25 |
+
|
| 26 |
+
elif month == 2:
|
| 27 |
+
if (day == 28):
|
| 28 |
+
if (year % 4 == 0):
|
| 29 |
+
day = 29
|
| 30 |
+
else:
|
| 31 |
+
day = 1
|
| 32 |
+
month = 3
|
| 33 |
+
elif (day == 29):
|
| 34 |
+
day = 1
|
| 35 |
+
month = 3
|
| 36 |
+
else:
|
| 37 |
+
day = day + 1
|
| 38 |
+
|
| 39 |
+
elif month == 4 or month == 6 or month == 9 or month == 11:
|
| 40 |
+
if (day == 30):
|
| 41 |
+
month = month + 1
|
| 42 |
+
day = 1
|
| 43 |
+
else:
|
| 44 |
+
day = day + 1
|
| 45 |
+
|
| 46 |
+
else:
|
| 47 |
+
day = day + 1
|
| 48 |
+
|
| 49 |
+
return year, month, day
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def get_today_date():
|
| 53 |
+
'''
|
| 54 |
+
Return today's year, month and day numbers
|
| 55 |
+
'''
|
| 56 |
+
# Get today's date through TimeAPI
|
| 57 |
+
time_url = "https://worldtimeapi.org/api/timezone/Europe/Stockholm"
|
| 58 |
+
time_response = requests.get(time_url)
|
| 59 |
+
time_responseJson = time_response.json()
|
| 60 |
+
|
| 61 |
+
# Extract datetime
|
| 62 |
+
datetime_str = time_responseJson["datetime"]
|
| 63 |
+
datetime_object = datetime.fromisoformat(datetime_str[:-6]) # Remove the timezone offset for parsing
|
| 64 |
+
|
| 65 |
+
# Extract components from datetime
|
| 66 |
+
day = datetime_object.day
|
| 67 |
+
month = datetime_object.month
|
| 68 |
+
year = datetime_object.year
|
| 69 |
+
|
| 70 |
+
return year, month, day
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def get_year_month_label(year, month, mode):
|
| 74 |
+
'''
|
| 75 |
+
Return the year_month in the format wanted by the different APIs file structure, by passing
|
| 76 |
+
the year, month and the mode. It pads with 0 when needed. The "mode" can be specified
|
| 77 |
+
between "hyphen", "underscore" and "empty" and it determines which divider you will find in
|
| 78 |
+
the year_month_label between the different input passed (e.g. 2024-01 or 20240105)
|
| 79 |
+
'''
|
| 80 |
+
year_month_label = ''
|
| 81 |
+
|
| 82 |
+
year_label = str(year)
|
| 83 |
+
month_label = ''
|
| 84 |
+
if month not in {10, 11, 12}:
|
| 85 |
+
month_label = '0' + str(month)
|
| 86 |
+
else:
|
| 87 |
+
month_label = str(month)
|
| 88 |
+
|
| 89 |
+
if mode == 'hyphen':
|
| 90 |
+
year_month_label = year_label + '-' + month_label
|
| 91 |
+
elif mode == 'underscore':
|
| 92 |
+
year_month_label = year_label + '_' + month_label
|
| 93 |
+
elif mode == 'empty':
|
| 94 |
+
year_month_label = year_label + month_label
|
| 95 |
+
|
| 96 |
+
return year_month_label
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def get_date_label(year, month, day, mode):
|
| 100 |
+
'''
|
| 101 |
+
Return the date in the format wanted by the different APIs file structure, by passing
|
| 102 |
+
the year, month, day and the mode. It pads with 0 when needed. The "mode" can be specified
|
| 103 |
+
between "hyphen", "underscore" and "empty" and it determines which divider you will find in
|
| 104 |
+
the date_label between the different input passed (e.g. 2024-01-05 or 20240105)
|
| 105 |
+
'''
|
| 106 |
+
|
| 107 |
+
date_label = ''
|
| 108 |
+
year_month_label = get_year_month_label(year, month, mode)
|
| 109 |
+
|
| 110 |
+
day_label = ''
|
| 111 |
+
if day < 10:
|
| 112 |
+
day_label = '0' + str(day)
|
| 113 |
+
else:
|
| 114 |
+
day_label = str(day)
|
| 115 |
+
|
| 116 |
+
if mode == 'hyphen':
|
| 117 |
+
date_label = year_month_label + '-' + day_label
|
| 118 |
+
elif mode == 'underscore':
|
| 119 |
+
date_label = year_month_label + '_' + day_label
|
| 120 |
+
elif mode == 'empty':
|
| 121 |
+
date_label = year_month_label + day_label
|
| 122 |
+
|
| 123 |
+
return date_label
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def get_name_of_cities():
|
| 127 |
+
yyyy, mm, dd = get_today_date()
|
| 128 |
+
yyyy1, mm1, dd1 = one_day_forward(yyyy, mm, dd)
|
| 129 |
+
|
| 130 |
+
date_label = get_date_label(yyyy, mm, dd, 'hyphen')
|
| 131 |
+
date_label1 = get_date_label(yyyy1, mm1, dd1, 'hyphen')
|
| 132 |
+
|
| 133 |
+
# Create the request_url, then get the subscription key from Swedavia API and set them in the header
|
| 134 |
+
swedavia_url = 'https://api.swedavia.se/flightinfo/v2/ARN/departures/' + date_label
|
| 135 |
+
swedavia_url1 = 'https://api.swedavia.se/flightinfo/v2/ARN/departures/' + date_label1
|
| 136 |
+
|
| 137 |
+
subscription_key = os.environ['SWEDAVIA_API_KEY']
|
| 138 |
+
headers = {
|
| 139 |
+
"Ocp-Apim-Subscription-Key": subscription_key,
|
| 140 |
+
"Accept": "application/json",
|
| 141 |
+
"Content-Type": 'application/json',
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
# Make the API request for Swedavia API
|
| 145 |
+
response = requests.get(swedavia_url, headers = headers)
|
| 146 |
+
response1 = requests.get(swedavia_url, headers = headers)
|
| 147 |
+
flights_swedavia = response.json()
|
| 148 |
+
flights_swedavia1 = response1.json()
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
# Load JSON data into a Python dictionary
|
| 152 |
+
arrival_airports_info = [{
|
| 153 |
+
'ArrivalAirportIata': flight.get('flightLegIdentifier', {}).get('arrivalAirportIata'),
|
| 154 |
+
'ArrivalAirportEnglish': flight.get('arrivalAirportEnglish')}
|
| 155 |
+
for flight in flights_swedavia.get('flights', [])]
|
| 156 |
+
df = pd.DataFrame(arrival_airports_info)
|
| 157 |
+
arrival_airports_info1 = [{
|
| 158 |
+
'ArrivalAirportIata': flight.get('flightLegIdentifier', {}).get('arrivalAirportIata'),
|
| 159 |
+
'ArrivalAirportEnglish': flight.get('arrivalAirportEnglish')}
|
| 160 |
+
for flight in flights_swedavia1.get('flights', [])]
|
| 161 |
+
df1 = pd.DataFrame(arrival_airports_info1)
|
| 162 |
+
|
| 163 |
+
total_df = pd.DataFrame({'ArrivalAirportEnglish': pd.concat([df1['ArrivalAirportEnglish'], df['ArrivalAirportEnglish']]).drop_duplicates().reset_index(drop=True),'ArrivalAirportIata': pd.concat([df1['ArrivalAirportIata'], df['ArrivalAirportIata']]).drop_duplicates().reset_index(drop=True)})
|
| 164 |
+
total_df.sort_values('ArrivalAirportEnglish', inplace=True)
|
| 165 |
+
|
| 166 |
+
return total_df
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def create_single_dataframe_from(dataframe):
|
| 170 |
+
df = get_name_of_cities()
|
| 171 |
+
df['ArrivalAirportIata'] = df['ArrivalAirportIata'].str.lower()
|
| 172 |
+
merged_df = pd.merge(df, dataframe, left_on='ArrivalAirportIata', right_on='airport', how='inner')
|
| 173 |
+
# Drop the duplicate 'ArrivalAirportIata' column
|
| 174 |
+
merged_df = merged_df.drop('ArrivalAirportIata', axis=1)
|
| 175 |
+
|
| 176 |
+
return merged_df
|
| 177 |
|
| 178 |
def get_dataframe(online_dataframe_path):
|
| 179 |
# Connect to Hopsworks File System
|
|
|
|
| 186 |
# Read dataframe from local path, drop duplicates, return
|
| 187 |
dataframe = pd.read_csv(dataframe_path)
|
| 188 |
dataframe.drop_duplicates(inplace=True)
|
| 189 |
+
|
| 190 |
+
dataframe = create_single_dataframe_from(dataframe)
|
| 191 |
return dataframe
|
| 192 |
|
| 193 |
def get_tomorrow_dataframe():
|
|
|
|
| 196 |
def get_today_dataframe():
|
| 197 |
return get_dataframe(hopsworks_tomorrow_path)
|
| 198 |
|
| 199 |
+
def get_metrics():
|
| 200 |
+
# Connect to Hopsworks File System
|
| 201 |
+
dataframe = hopsworks.login(api_key_value = os.environ['HOPSWORKS_API_KEY'])
|
| 202 |
+
dataframe = dataframe.get_feature_store()
|
| 203 |
+
dataframe = dataframe.get_feature_group(name = 'model_performance', version = 1)
|
| 204 |
+
dataframe = dataframe.read(dataframe_type = 'pandas')
|
| 205 |
+
dataframe = dataframe.sort_values('timestamp')
|
| 206 |
+
dataframe = dataframe[['timestamp', 'mae', 'dateset_size']].rename(columns={'dateset_size':'Dataset Size', 'mae':'Mean Absolute Error', 'timestamp':'Date'})
|
| 207 |
+
dataframe = dataframe.sort_values(['Date'], ascending = False)
|
| 208 |
+
return dataframe
|
| 209 |
+
|
| 210 |
+
selected_columns = ['destination', 'airport code', 'flight number', 'ontime', 'delayed']
|
| 211 |
+
ciccio, pasticcio = pd.DataFrame(), pd.DataFrame()
|
| 212 |
+
cities_datafram = get_name_of_cities()
|
| 213 |
+
ciccio = get_today_dataframe()
|
| 214 |
+
ciccio = ciccio.rename(columns={'airport':'airport code' , 'ArrivalAirportEnglish':'destination', 'flight_number':'flight number'})
|
| 215 |
+
today_dataframe = ciccio[selected_columns]
|
| 216 |
+
pasticcio = get_tomorrow_dataframe()
|
| 217 |
+
pasticcio = pasticcio.rename(columns={'airport':'airport code','ArrivalAirportEnglish':'destination', 'flight_number':'flight number'})
|
| 218 |
+
tomorrow_dataframe = pasticcio[selected_columns]
|
| 219 |
+
performance_metric = get_metrics()
|
| 220 |
+
|
| 221 |
|
| 222 |
def get_possible_destinations():
|
| 223 |
+
global today_dataframe, tomorrow_dataframe
|
| 224 |
+
today_df, tomorrow_df = today_dataframe, tomorrow_dataframe
|
| 225 |
+
total_df = pd.DataFrame({'destination': pd.concat([today_df['destination'], tomorrow_df['destination']]).drop_duplicates().reset_index(drop=True).sort_values()})
|
| 226 |
+
total_dest = (total_df['destination']).tolist()
|
| 227 |
return total_dest
|
| 228 |
|
| 229 |
+
def get_dataframe_of(day):
|
| 230 |
+
global cities_datafram, today_dataframe, tomorrow_dataframe
|
| 231 |
+
if (day.lower() == 'today'):
|
| 232 |
+
return today_dataframe
|
| 233 |
+
elif (day.lower() == 'tomorrow'):
|
| 234 |
+
return tomorrow_dataframe
|
| 235 |
|
| 236 |
def get_specific_flights(day, max_delay, departure_hour, ampm, weather, destinations, yes):
|
| 237 |
df = get_dataframe_of(day)
|
|
|
|
| 249 |
|
| 250 |
# Get flight with less delay than the given and from the destinations selected, of the right day
|
| 251 |
df['delay'] = (df['delayed'] - df['ontime']).dt.total_seconds() / 60
|
| 252 |
+
filtered_df = df.loc[(df['delay'] < max_delay) & (df['destination'].isin(destinations)), ['destination', 'flight number', 'ontime', 'delayed']]
|
| 253 |
|
| 254 |
# Convert the string to datetime, then the datetime column to HH:MM
|
| 255 |
filtered_df['ontime'] = pd.to_datetime(filtered_df['ontime'])
|
|
|
|
| 262 |
def full_day_departure(day):
|
| 263 |
return get_dataframe_of(day)
|
| 264 |
|
| 265 |
+
def get_performance():
|
| 266 |
+
global performance_metric
|
| 267 |
+
return performance_metric
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
|
| 269 |
specific_flights = gr.Interface(
|
| 270 |
get_specific_flights,
|
|
|
|
| 290 |
"dataframe",
|
| 291 |
)
|
| 292 |
|
| 293 |
+
metrics = gr.Interface(fn = get_performance, inputs=None, outputs='dataframe', allow_flagging="never")
|
| 294 |
|
| 295 |
#flights.launch()
|
| 296 |
|
| 297 |
+
interface = gr.TabbedInterface([specific_flights, total_departure, metrics], {"Full Day Departure", "Specific Flights", "Model Performances"})
|
| 298 |
interface.launch()
|