vansh0003's picture
Update app.py
fa57b58 verified
import gradio as gr
import pandas as pd
import joblib
import numpy as np
from sklearn.impute import SimpleImputer
# -----------------------------
# Load new tuned classification model package
# -----------------------------
# This file should be created from your training script:
# joblib.dump({"model": ensemble, "threshold": best_threshold, "columns": list(X_train.columns)}, "main/final_delay_model.pkl")
model_package = joblib.load("main/final_delay_model.pkl")
ensemble_model = model_package["model"]
best_threshold = model_package["threshold"]
reference_columns = model_package["columns"]
# -----------------------------
# Load regression models and training columns
# -----------------------------
ridge_model = joblib.load("main/ridge_model.pkl")
xgb_reg_model = joblib.load("main/xgb_model.pkl")
gbr_reg_model = joblib.load("main/gbr_model.pkl")
training_columns_reg = joblib.load("main/training_columns.pkl")
# -----------------------------
# Preprocessing for classification
# -----------------------------
def preprocess_classification(df):
categorical_cols = ['UNIQUE_CARRIER', 'CARRIER', 'ORIGIN', 'DEST',
'ORIGIN_STATE_ABR', 'DEST_STATE_ABR',
'DEP_TIME_BLK', 'ARR_TIME_BLK']
df_encoded = pd.get_dummies(df, columns=categorical_cols)
# Add missing columns from training
for col in reference_columns:
if col not in df_encoded.columns:
df_encoded[col] = 0
# Reorder columns
df_encoded = df_encoded[reference_columns]
# Impute missing values
imputer = SimpleImputer(strategy='median')
df_encoded = pd.DataFrame(imputer.fit_transform(df_encoded), columns=df_encoded.columns)
return df_encoded
# -----------------------------
# Preprocessing for regression
# -----------------------------
def preprocess_regression(df):
df_encoded = pd.get_dummies(df, columns=['time_of_day', 'wind_dir_bucket'])
df_encoded = df_encoded.reindex(columns=training_columns_reg, fill_value=0)
return df_encoded
# -----------------------------
# Delay category helper
# -----------------------------
def categorize_delay(minutes):
if minutes < 15:
return "Delay not considered less than 15 mins"
elif 15 <= minutes < 20:
return "Delay is Minimum"
elif 20 <= minutes < 30:
return "Flight is moderately delayed"
elif 30 <= minutes < 60:
return "Flight is highly delayed"
else:
return "Flight is delayed too much"
# -----------------------------
# Classification prediction function
# -----------------------------
def predict_classification(YEAR, MONTH, DAY_OF_MONTH, DAY_OF_WEEK,
ORIGIN, DEST, CARRIER,
ORIGIN_STATE_ABR, DEST_STATE_ABR,
DEP_TIME_BLK, ARR_TIME_BLK,
temp, prcp, wspd, wdir, route_delay_rate):
data = {
'YEAR': int(YEAR),
'MONTH': int(MONTH),
'DAY_OF_MONTH': int(DAY_OF_MONTH),
'DAY_OF_WEEK': int(DAY_OF_WEEK),
'UNIQUE_CARRIER': CARRIER,
'CARRIER': CARRIER,
'ORIGIN': ORIGIN,
'DEST': DEST,
'ORIGIN_STATE_ABR': ORIGIN_STATE_ABR,
'DEST_STATE_ABR': DEST_STATE_ABR,
'DEP_TIME_BLK': DEP_TIME_BLK,
'ARR_TIME_BLK': ARR_TIME_BLK,
'temp': float(temp),
'prcp': float(prcp),
'wspd': float(wspd),
'wdir': float(wdir),
'route_delay_rate': float(route_delay_rate)
}
df_input = pd.DataFrame([data])
X = preprocess_classification(df_input)
proba = ensemble_model.predict_proba(X)[0][1]
pred = int(proba >= best_threshold)
return {
"Prediction": "Delayed" if pred == 1 else "On Time",
"Confidence": round(proba, 3),
"Threshold": round(best_threshold, 3)
}
# -----------------------------
# Regression prediction function (unchanged)
# -----------------------------
def predict_regression_with_check(DEP_DELAY, DEP_DELAY_NEW, DEP_DEL15, DEP_DELAY_GROUP,
temp, prcp, wspd, wdir, bad_weather, wind_dir_bucket,
time_of_day, is_weekend):
if int(DEP_DEL15) == 0:
return {
"Status": "No delay predicted",
"Delay Category": None
}
data = {
'DEP_DELAY': float(DEP_DELAY),
'DEP_DELAY_NEW': float(DEP_DELAY_NEW),
'DEP_DEL15': int(DEP_DEL15),
'DEP_DELAY_GROUP': int(DEP_DELAY_GROUP),
'temp': float(temp),
'prcp': float(prcp),
'wspd': float(wspd),
'wdir': float(wdir),
'bad_weather': int(bad_weather),
'wind_dir_bucket': wind_dir_bucket,
'time_of_day': time_of_day,
'is_weekend': int(is_weekend)
}
df_input = pd.DataFrame([data])
X = preprocess_regression(df_input)
pred_ridge = ridge_model.predict(X)[0]
pred_xgb = xgb_reg_model.predict(X)[0]
pred_gbr = gbr_reg_model.predict(X)[0]
max_pred = max(pred_ridge, pred_xgb, pred_gbr)
category = categorize_delay(max_pred)
return {
"Ridge Prediction": round(pred_ridge, 2),
"XGBoost Prediction": round(pred_xgb, 2),
"Gradient Boosting Prediction": round(pred_gbr, 2),
"Max Prediction": round(max_pred, 2),
"Delay Category": category
}
# -----------------------------
# Gradio Interface
# -----------------------------
classification_inputs = [
gr.Number(label="YEAR"),
gr.Number(label="MONTH"),
gr.Number(label="DAY_OF_MONTH"),
gr.Number(label="DAY_OF_WEEK (1=Mon ... 7=Sun)"),
gr.Textbox(label="Origin Airport Code"),
gr.Textbox(label="Destination Airport Code"),
gr.Textbox(label="Carrier Code"),
gr.Textbox(label="Origin State Abbreviation"),
gr.Textbox(label="Destination State Abbreviation"),
gr.Textbox(label="Departure Time Block (e.g., 0600-0659)"),
gr.Textbox(label="Arrival Time Block (e.g., 0900-0959)"),
gr.Number(label="Temperature"),
gr.Number(label="Precipitation"),
gr.Number(label="Wind Speed"),
gr.Number(label="Wind Direction"),
gr.Number(label="Route Delay Rate (historical)")
]
regression_inputs = [
gr.Number(label="DEP_DELAY"),
gr.Number(label="DEP_DELAY_NEW"),
gr.Number(label="DEP_DEL15 (0 or 1)"),
gr.Number(label="DEP_DELAY_GROUP"),
gr.Number(label="Temperature"),
gr.Number(label="Precipitation"),
gr.Number(label="Wind Speed"),
gr.Number(label="Wind Direction"),
gr.Number(label="Bad Weather (0 or 1)"),
gr.Textbox(label="Wind Dir Bucket (North/South/East/West/etc.)"),
gr.Textbox(label="Time of Day (Morning/Afternoon/Evening/Night)"),
gr.Number(label="Is Weekend (0 or 1)")
]
classification_tab = gr.Interface(
fn=predict_classification,
inputs=classification_inputs,
outputs="json",
title="Flight Delay Classification (Tuned Ensemble)",
description="Predict delay classification using the tuned ensemble model with threshold optimization."
)
regression_tab = gr.Interface(
fn=predict_regression_with_check,
inputs=regression_inputs,
outputs="json",
title="Flight Delay Regression (Conditional)",
description="Predict arrival delay in minutes only if DEP_DEL15=1, with categorized output."
)
demo = gr.TabbedInterface([classification_tab, regression_tab],
["Classification", "Regression"])
if __name__ == "__main__":
demo.launch()