Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import pandas as pd | |
| import hopsworks | |
| from datetime import datetime | |
| import requests | |
| import json | |
| import math | |
| import os | |
| hopsworks_today_path = "Resources/today_timetable_prediction/today_timetable_prediction.csv" | |
| hopsworks_tomorrow_path = "Resources/tomorrow_timetable_prediction/tomorrow_timetable_prediction.csv" | |
| def one_day_forward(year, month, day): | |
| ''' | |
| Return "year", "month" and "day" numbers of the day after the inserted day | |
| It works for all the possible years from 1592 | |
| ''' | |
| if month == 12: | |
| if day == 31: | |
| day = 1 | |
| month = 1 | |
| year = year + 1 | |
| else: | |
| day = day + 1 | |
| elif month == 2: | |
| if (day == 28): | |
| if (year % 4 == 0): | |
| day = 29 | |
| else: | |
| day = 1 | |
| month = 3 | |
| elif (day == 29): | |
| day = 1 | |
| month = 3 | |
| else: | |
| day = day + 1 | |
| elif month == 4 or month == 6 or month == 9 or month == 11: | |
| if (day == 30): | |
| month = month + 1 | |
| day = 1 | |
| else: | |
| day = day + 1 | |
| else: | |
| day = day + 1 | |
| return year, month, day | |
| def get_today_date(): | |
| ''' | |
| Return today's year, month and day numbers | |
| ''' | |
| # Get today's date through TimeAPI | |
| time_url = "https://worldtimeapi.org/api/timezone/Europe/Stockholm" | |
| time_response = requests.get(time_url) | |
| time_responseJson = time_response.json() | |
| # Extract datetime | |
| datetime_str = time_responseJson["datetime"] | |
| datetime_object = datetime.fromisoformat(datetime_str[:-6]) # Remove the timezone offset for parsing | |
| # Extract components from datetime | |
| day = datetime_object.day | |
| month = datetime_object.month | |
| year = datetime_object.year | |
| return year, month, day | |
| def get_year_month_label(year, month, mode): | |
| ''' | |
| Return the year_month in the format wanted by the different APIs file structure, by passing | |
| the year, month and the mode. It pads with 0 when needed. The "mode" can be specified | |
| between "hyphen", "underscore" and "empty" and it determines which divider you will find in | |
| the year_month_label between the different input passed (e.g. 2024-01 or 20240105) | |
| ''' | |
| year_month_label = '' | |
| year_label = str(year) | |
| month_label = '' | |
| if month not in {10, 11, 12}: | |
| month_label = '0' + str(month) | |
| else: | |
| month_label = str(month) | |
| if mode == 'hyphen': | |
| year_month_label = year_label + '-' + month_label | |
| elif mode == 'underscore': | |
| year_month_label = year_label + '_' + month_label | |
| elif mode == 'empty': | |
| year_month_label = year_label + month_label | |
| return year_month_label | |
| def get_date_label(year, month, day, mode): | |
| ''' | |
| Return the date in the format wanted by the different APIs file structure, by passing | |
| the year, month, day and the mode. It pads with 0 when needed. The "mode" can be specified | |
| between "hyphen", "underscore" and "empty" and it determines which divider you will find in | |
| the date_label between the different input passed (e.g. 2024-01-05 or 20240105) | |
| ''' | |
| date_label = '' | |
| year_month_label = get_year_month_label(year, month, mode) | |
| day_label = '' | |
| if day < 10: | |
| day_label = '0' + str(day) | |
| else: | |
| day_label = str(day) | |
| if mode == 'hyphen': | |
| date_label = year_month_label + '-' + day_label | |
| elif mode == 'underscore': | |
| date_label = year_month_label + '_' + day_label | |
| elif mode == 'empty': | |
| date_label = year_month_label + day_label | |
| return date_label | |
| def get_name_of_cities(): | |
| yyyy, mm, dd = get_today_date() | |
| yyyy1, mm1, dd1 = one_day_forward(yyyy, mm, dd) | |
| date_label = get_date_label(yyyy, mm, dd, 'hyphen') | |
| date_label1 = get_date_label(yyyy1, mm1, dd1, 'hyphen') | |
| # Create the request_url, then get the subscription key from Swedavia API and set them in the header | |
| swedavia_url = 'https://api.swedavia.se/flightinfo/v2/ARN/departures/' + date_label | |
| swedavia_url1 = 'https://api.swedavia.se/flightinfo/v2/ARN/departures/' + date_label1 | |
| subscription_key = os.environ['SWEDAVIA_API_KEY'] | |
| headers = { | |
| "Ocp-Apim-Subscription-Key": subscription_key, | |
| "Accept": "application/json", | |
| "Content-Type": 'application/json', | |
| } | |
| # Make the API request for Swedavia API | |
| response = requests.get(swedavia_url, headers = headers) | |
| response1 = requests.get(swedavia_url1, headers = headers) | |
| flights_swedavia = response.json() | |
| flights_swedavia1 = response1.json() | |
| # Load JSON data into a Python dictionary | |
| arrival_airports_info = [{ | |
| 'ArrivalAirportIata': flight.get('flightLegIdentifier', {}).get('arrivalAirportIata'), | |
| 'ArrivalAirportEnglish': flight.get('arrivalAirportEnglish')} | |
| for flight in flights_swedavia.get('flights', [])] | |
| df = pd.DataFrame(arrival_airports_info) | |
| arrival_airports_info1 = [{ | |
| 'ArrivalAirportIata': flight.get('flightLegIdentifier', {}).get('arrivalAirportIata'), | |
| 'ArrivalAirportEnglish': flight.get('arrivalAirportEnglish')} | |
| for flight in flights_swedavia1.get('flights', [])] | |
| df1 = pd.DataFrame(arrival_airports_info1) | |
| total_df = pd.DataFrame({'ArrivalAirportEnglish': pd.concat([df1['ArrivalAirportEnglish'], df['ArrivalAirportEnglish']]).drop_duplicates().reset_index(drop=True),'ArrivalAirportIata': pd.concat([df1['ArrivalAirportIata'], df['ArrivalAirportIata']]).drop_duplicates().reset_index(drop=True)}) | |
| total_df.sort_values('ArrivalAirportEnglish', inplace=True) | |
| return total_df | |
| def create_single_dataframe_from(dataframe): | |
| df = get_name_of_cities() | |
| df['ArrivalAirportIata'] = df['ArrivalAirportIata'].str.lower() | |
| merged_df = pd.merge(df, dataframe, left_on='ArrivalAirportIata', right_on='airport', how='inner') | |
| # Drop the duplicate 'ArrivalAirportIata' column | |
| merged_df = merged_df.drop('ArrivalAirportIata', axis=1) | |
| return merged_df | |
| def get_dataframe(online_dataframe_path): | |
| # Connect to Hopsworks File System | |
| project = hopsworks.login(api_key_value = os.environ['HOPSWORKS_API_KEY']) | |
| dataset_api = project.get_dataset_api() | |
| # Download online dataframe and get path | |
| dataframe_path = os.path.abspath(dataset_api.download(online_dataframe_path, overwrite = True)) | |
| # Read dataframe from local path, drop duplicates, return | |
| dataframe = pd.read_csv(dataframe_path) | |
| dataframe.drop_duplicates(inplace=True) | |
| dataframe = create_single_dataframe_from(dataframe) | |
| return dataframe | |
| def get_tomorrow_dataframe(): | |
| return get_dataframe(hopsworks_today_path) | |
| def get_today_dataframe(): | |
| return get_dataframe(hopsworks_tomorrow_path) | |
| def get_metrics(): | |
| # Connect to Hopsworks File System | |
| dataframe = hopsworks.login(api_key_value = os.environ['HOPSWORKS_API_KEY']) | |
| dataframe = dataframe.get_feature_store() | |
| dataframe = dataframe.get_feature_group(name = 'model_performance', version = 1) | |
| dataframe = dataframe.read(dataframe_type = 'pandas') | |
| dataframe = dataframe.sort_values('timestamp') | |
| dataframe = dataframe[['timestamp', 'mae', 'dateset_size']].rename(columns={'dateset_size':'Dataset Size', 'mae':'Mean Absolute Error', 'timestamp':'Date'}) | |
| dataframe = dataframe.sort_values(['Date'], ascending = False) | |
| return dataframe | |
| selected_columns = ['destination', 'airport code', 'flight number', 'ontime', 'delayed'] | |
| ciccio, pasticcio = pd.DataFrame(), pd.DataFrame() | |
| cities_datafram = get_name_of_cities() | |
| ciccio = get_today_dataframe() | |
| ciccio = ciccio.rename(columns={'airport':'airport code', 'ArrivalAirportEnglish':'destination', 'flight_number':'flight number'}) | |
| today_dataframe = ciccio[selected_columns] | |
| pasticcio = get_tomorrow_dataframe() | |
| pasticcio = pasticcio.rename(columns={'airport':'airport code','ArrivalAirportEnglish':'destination', 'flight_number':'flight number'}) | |
| tomorrow_dataframe = pasticcio[selected_columns] | |
| performance_metric = get_metrics() | |
| def get_possible_destinations(): | |
| global today_dataframe, tomorrow_dataframe | |
| today_df, tomorrow_df = today_dataframe, tomorrow_dataframe | |
| total_df = pd.DataFrame({'destination': pd.concat([today_df['destination'], tomorrow_df['destination']]).drop_duplicates().reset_index(drop=True).sort_values()}) | |
| total_dest = (total_df['destination']).tolist() | |
| return total_dest | |
| def get_dataframe_of(day): | |
| global cities_datafram, today_dataframe, tomorrow_dataframe | |
| today_df, tomorrow_df = pd.DataFrame(), pd.DataFrame() | |
| today_df, tomorrow_df = today_dataframe, tomorrow_dataframe | |
| if (day.lower() == 'today'): | |
| return today_df | |
| elif (day.lower() == 'tomorrow'): | |
| return tomorrow_df | |
| def get_specific_flights(day, max_delay, departure_hour, ampm, weather, destinations, yes): | |
| df = get_dataframe_of(day) | |
| if ('Select all' in destinations): | |
| destinations = get_possible_destinations() | |
| # Remove unwanted destinations | |
| destinations = [dest for dest in destinations if dest not in ["That's a reason why I travel alone...", "I prefer not to say", 'Select all']] | |
| # Select only flight during the same departure hour | |
| df['departure_hour'] = df['ontime'].str.split(':').str[0].astype(int) | |
| df = df[df['departure_hour'] == departure_hour].drop(columns=['departure_hour']) | |
| # Convert time columns to datetime objects | |
| df['ontime'] = pd.to_datetime(df['ontime'], format='%H:%M') | |
| df['delayed'] = pd.to_datetime(df['delayed'], format='%H:%M') | |
| # Get flight with less delay than the given and from the destinations selected, of the right day | |
| df['delay'] = (df['delayed'] - df['ontime']).dt.total_seconds() / 60 | |
| filtered_df = df.loc[(df['delay'] < max_delay) & (df['destination'].isin(destinations)), ['destination', 'flight number', 'ontime', 'delayed']] | |
| # Convert the string to datetime, then the datetime column to HH:MM | |
| filtered_df['ontime'] = pd.to_datetime(filtered_df['ontime']) | |
| filtered_df['ontime'] = filtered_df['ontime'].dt.strftime('%H:%M') | |
| filtered_df['delayed'] = pd.to_datetime(filtered_df['delayed']) | |
| filtered_df['delayed'] = filtered_df['delayed'].dt.strftime('%H:%M') | |
| return filtered_df | |
| def full_day_departure(day): | |
| dataframe = get_dataframe_of(day) | |
| copy_df = dataframe.drop(columns=['airport code']).sort_values(['ontime'], ascending = False) | |
| return copy_df | |
| def get_performance(): | |
| global performance_metric | |
| return performance_metric | |
| ''' | |
| print(get_specific_flights('today', 100, 8, 'am', 'weather', ['Select all'], 'yes')) | |
| print(full_day_departure('tomorrow')) | |
| print(get_performance()) | |
| ''' | |
| specific_flights = gr.Interface( | |
| get_specific_flights, | |
| [ | |
| gr.Radio(["today", "tomorrow"], type="value", label="Day", info="When do you have the plane?"), | |
| gr.Slider(0, 50, value=20, label="Possible Delay", info="How unfortunate do you wanna be?"), | |
| gr.Number(precision=0, minimum=0, maximum=23, label="Departure Time"), | |
| gr.Radio(["am", "pm"], type="index", info="It's the same, no worries!", label = "Am or Pm?"), | |
| gr.CheckboxGroup(["Yes, it's cloudy", "I am not in Stockholm"], label="Weather", info="Is it a typical Stockholm day?"), | |
| gr.Dropdown(get_possible_destinations() + ["That's a reason why I travel alone...", "I prefer not to say", "Select all"], | |
| type = "value", multiselect=True, label="Destination", value=["That's a reason why I travel alone..."], | |
| info="Are you just curious or you are actually going somewhere? Where? With who?"), | |
| gr.Radio(["Yes", "Yes", "Yes"], type="index", label="Let's guess?", info="We know that you'll say yes!"), | |
| ], | |
| "dataframe", | |
| ) | |
| total_departure = gr.Interface( | |
| full_day_departure, | |
| [ | |
| gr.Radio(["Today", "Tomorrow"], type="value", label="Departure", info="When are you departing?"), | |
| ], | |
| "dataframe", | |
| ) | |
| metrics = gr.Interface(fn = get_performance, inputs=None, outputs='dataframe', allow_flagging="never") | |
| #flights.launch() | |
| interface = gr.TabbedInterface([specific_flights, total_departure, metrics], {"Full Day Departure", "Specific Flights", "Model Performances"}) | |
| interface.launch() |