Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import pandas as pd | |
| import joblib | |
| import numpy as np | |
| from sklearn.impute import SimpleImputer | |
| # ----------------------------- | |
| # Load new tuned classification model package | |
| # ----------------------------- | |
| # This file should be created from your training script: | |
| # joblib.dump({"model": ensemble, "threshold": best_threshold, "columns": list(X_train.columns)}, "main/final_delay_model.pkl") | |
| model_package = joblib.load("main/final_delay_model.pkl") | |
| ensemble_model = model_package["model"] | |
| best_threshold = model_package["threshold"] | |
| reference_columns = model_package["columns"] | |
| # ----------------------------- | |
| # Load regression models and training columns | |
| # ----------------------------- | |
| ridge_model = joblib.load("main/ridge_model.pkl") | |
| xgb_reg_model = joblib.load("main/xgb_model.pkl") | |
| gbr_reg_model = joblib.load("main/gbr_model.pkl") | |
| training_columns_reg = joblib.load("main/training_columns.pkl") | |
| # ----------------------------- | |
| # Preprocessing for classification | |
| # ----------------------------- | |
| def preprocess_classification(df): | |
| categorical_cols = ['UNIQUE_CARRIER', 'CARRIER', 'ORIGIN', 'DEST', | |
| 'ORIGIN_STATE_ABR', 'DEST_STATE_ABR', | |
| 'DEP_TIME_BLK', 'ARR_TIME_BLK'] | |
| df_encoded = pd.get_dummies(df, columns=categorical_cols) | |
| # Add missing columns from training | |
| for col in reference_columns: | |
| if col not in df_encoded.columns: | |
| df_encoded[col] = 0 | |
| # Reorder columns | |
| df_encoded = df_encoded[reference_columns] | |
| # Impute missing values | |
| imputer = SimpleImputer(strategy='median') | |
| df_encoded = pd.DataFrame(imputer.fit_transform(df_encoded), columns=df_encoded.columns) | |
| return df_encoded | |
| # ----------------------------- | |
| # Preprocessing for regression | |
| # ----------------------------- | |
| def preprocess_regression(df): | |
| df_encoded = pd.get_dummies(df, columns=['time_of_day', 'wind_dir_bucket']) | |
| df_encoded = df_encoded.reindex(columns=training_columns_reg, fill_value=0) | |
| return df_encoded | |
| # ----------------------------- | |
| # Delay category helper | |
| # ----------------------------- | |
| def categorize_delay(minutes): | |
| if minutes < 15: | |
| return "Delay not considered less than 15 mins" | |
| elif 15 <= minutes < 20: | |
| return "Delay is Minimum" | |
| elif 20 <= minutes < 30: | |
| return "Flight is moderately delayed" | |
| elif 30 <= minutes < 60: | |
| return "Flight is highly delayed" | |
| else: | |
| return "Flight is delayed too much" | |
| # ----------------------------- | |
| # Classification prediction function | |
| # ----------------------------- | |
| def predict_classification(YEAR, MONTH, DAY_OF_MONTH, DAY_OF_WEEK, | |
| ORIGIN, DEST, CARRIER, | |
| ORIGIN_STATE_ABR, DEST_STATE_ABR, | |
| DEP_TIME_BLK, ARR_TIME_BLK, | |
| temp, prcp, wspd, wdir, route_delay_rate): | |
| data = { | |
| 'YEAR': int(YEAR), | |
| 'MONTH': int(MONTH), | |
| 'DAY_OF_MONTH': int(DAY_OF_MONTH), | |
| 'DAY_OF_WEEK': int(DAY_OF_WEEK), | |
| 'UNIQUE_CARRIER': CARRIER, | |
| 'CARRIER': CARRIER, | |
| 'ORIGIN': ORIGIN, | |
| 'DEST': DEST, | |
| 'ORIGIN_STATE_ABR': ORIGIN_STATE_ABR, | |
| 'DEST_STATE_ABR': DEST_STATE_ABR, | |
| 'DEP_TIME_BLK': DEP_TIME_BLK, | |
| 'ARR_TIME_BLK': ARR_TIME_BLK, | |
| 'temp': float(temp), | |
| 'prcp': float(prcp), | |
| 'wspd': float(wspd), | |
| 'wdir': float(wdir), | |
| 'route_delay_rate': float(route_delay_rate) | |
| } | |
| df_input = pd.DataFrame([data]) | |
| X = preprocess_classification(df_input) | |
| proba = ensemble_model.predict_proba(X)[0][1] | |
| pred = int(proba >= best_threshold) | |
| return { | |
| "Prediction": "Delayed" if pred == 1 else "On Time", | |
| "Confidence": round(proba, 3), | |
| "Threshold": round(best_threshold, 3) | |
| } | |
| # ----------------------------- | |
| # Regression prediction function (unchanged) | |
| # ----------------------------- | |
| def predict_regression_with_check(DEP_DELAY, DEP_DELAY_NEW, DEP_DEL15, DEP_DELAY_GROUP, | |
| temp, prcp, wspd, wdir, bad_weather, wind_dir_bucket, | |
| time_of_day, is_weekend): | |
| if int(DEP_DEL15) == 0: | |
| return { | |
| "Status": "No delay predicted", | |
| "Delay Category": None | |
| } | |
| data = { | |
| 'DEP_DELAY': float(DEP_DELAY), | |
| 'DEP_DELAY_NEW': float(DEP_DELAY_NEW), | |
| 'DEP_DEL15': int(DEP_DEL15), | |
| 'DEP_DELAY_GROUP': int(DEP_DELAY_GROUP), | |
| 'temp': float(temp), | |
| 'prcp': float(prcp), | |
| 'wspd': float(wspd), | |
| 'wdir': float(wdir), | |
| 'bad_weather': int(bad_weather), | |
| 'wind_dir_bucket': wind_dir_bucket, | |
| 'time_of_day': time_of_day, | |
| 'is_weekend': int(is_weekend) | |
| } | |
| df_input = pd.DataFrame([data]) | |
| X = preprocess_regression(df_input) | |
| pred_ridge = ridge_model.predict(X)[0] | |
| pred_xgb = xgb_reg_model.predict(X)[0] | |
| pred_gbr = gbr_reg_model.predict(X)[0] | |
| max_pred = max(pred_ridge, pred_xgb, pred_gbr) | |
| category = categorize_delay(max_pred) | |
| return { | |
| "Ridge Prediction": round(pred_ridge, 2), | |
| "XGBoost Prediction": round(pred_xgb, 2), | |
| "Gradient Boosting Prediction": round(pred_gbr, 2), | |
| "Max Prediction": round(max_pred, 2), | |
| "Delay Category": category | |
| } | |
| # ----------------------------- | |
| # Gradio Interface | |
| # ----------------------------- | |
| classification_inputs = [ | |
| gr.Number(label="YEAR"), | |
| gr.Number(label="MONTH"), | |
| gr.Number(label="DAY_OF_MONTH"), | |
| gr.Number(label="DAY_OF_WEEK (1=Mon ... 7=Sun)"), | |
| gr.Textbox(label="Origin Airport Code"), | |
| gr.Textbox(label="Destination Airport Code"), | |
| gr.Textbox(label="Carrier Code"), | |
| gr.Textbox(label="Origin State Abbreviation"), | |
| gr.Textbox(label="Destination State Abbreviation"), | |
| gr.Textbox(label="Departure Time Block (e.g., 0600-0659)"), | |
| gr.Textbox(label="Arrival Time Block (e.g., 0900-0959)"), | |
| gr.Number(label="Temperature"), | |
| gr.Number(label="Precipitation"), | |
| gr.Number(label="Wind Speed"), | |
| gr.Number(label="Wind Direction"), | |
| gr.Number(label="Route Delay Rate (historical)") | |
| ] | |
| regression_inputs = [ | |
| gr.Number(label="DEP_DELAY"), | |
| gr.Number(label="DEP_DELAY_NEW"), | |
| gr.Number(label="DEP_DEL15 (0 or 1)"), | |
| gr.Number(label="DEP_DELAY_GROUP"), | |
| gr.Number(label="Temperature"), | |
| gr.Number(label="Precipitation"), | |
| gr.Number(label="Wind Speed"), | |
| gr.Number(label="Wind Direction"), | |
| gr.Number(label="Bad Weather (0 or 1)"), | |
| gr.Textbox(label="Wind Dir Bucket (North/South/East/West/etc.)"), | |
| gr.Textbox(label="Time of Day (Morning/Afternoon/Evening/Night)"), | |
| gr.Number(label="Is Weekend (0 or 1)") | |
| ] | |
| classification_tab = gr.Interface( | |
| fn=predict_classification, | |
| inputs=classification_inputs, | |
| outputs="json", | |
| title="Flight Delay Classification (Tuned Ensemble)", | |
| description="Predict delay classification using the tuned ensemble model with threshold optimization." | |
| ) | |
| regression_tab = gr.Interface( | |
| fn=predict_regression_with_check, | |
| inputs=regression_inputs, | |
| outputs="json", | |
| title="Flight Delay Regression (Conditional)", | |
| description="Predict arrival delay in minutes only if DEP_DEL15=1, with categorized output." | |
| ) | |
| demo = gr.TabbedInterface([classification_tab, regression_tab], | |
| ["Classification", "Regression"]) | |
| if __name__ == "__main__": | |
| demo.launch() |