vansh0003 commited on
Commit
4d27f6a
·
verified ·
1 Parent(s): f24a3f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +193 -0
app.py CHANGED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import joblib
4
+ import numpy as np
5
+
6
+ # -----------------------------
7
+ # Load models and training columns
8
+ # -----------------------------
9
+ # Classification models
10
+ rf_model = joblib.load("main/random_forest_model.pkl")
11
+ xgb_clf_model = joblib.load("main/xgboost_model.pkl")
12
+ gbr_clf_model = joblib.load("main/gradient_boosting_model.pkl")
13
+ training_columns_clf = joblib.load("main/training_clm.pkl")
14
+
15
+ # Regression models
16
+ ridge_model = joblib.load("main/ridge_model.pkl")
17
+ xgb_reg_model = joblib.load("main/xgb_model.pkl")
18
+ gbr_reg_model = joblib.load("main/gbr_model.pkl")
19
+ training_columns_reg = joblib.load("main/training_columns.pkl")
20
+
21
+ # -----------------------------
22
+ # Preprocessing functions
23
+ # -----------------------------
24
+ def preprocess_classification(df):
25
+ categorical_cols = ['ORIGIN', 'DEST', 'CARRIER', 'TAIL_NUM',
26
+ 'DEP_TIME_BLK', 'DEST_STATE_ABR', 'ORIGIN_CITY_NAME',
27
+ 'DEST_CITY_NAME', 'route']
28
+ df_encoded = pd.get_dummies(df, columns=categorical_cols)
29
+ df_encoded = df_encoded.reindex(columns=training_columns_clf, fill_value=0)
30
+ return df_encoded
31
+
32
+ def preprocess_regression(df):
33
+ df_encoded = pd.get_dummies(df, columns=['time_of_day', 'wind_dir_bucket'])
34
+ df_encoded = df_encoded.reindex(columns=training_columns_reg, fill_value=0)
35
+ return df_encoded
36
+
37
+ # -----------------------------
38
+ # Delay category helper
39
+ # -----------------------------
40
+ def categorize_delay(minutes):
41
+ if minutes < 15:
42
+ return "Delay not considered less then 15mins"
43
+ elif 15 <= minutes < 20:
44
+ return "Delay is Minimum"
45
+ elif 20 <= minutes < 30:
46
+ return "Flight is moderately delayed"
47
+ elif 30 <= minutes < 60:
48
+ return "Flight is highly delayed"
49
+ else:
50
+ return "Flight is delayed too much"
51
+
52
+ # -----------------------------
53
+ # Prediction functions
54
+ # -----------------------------
55
+ def predict_classification(YEAR, MONTH, DAY_OF_MONTH, DAY_OF_WEEK,
56
+ ORIGIN, DEST, CARRIER, TAIL_NUM, DEP_TIME_BLK,
57
+ DEST_STATE_ABR, ORIGIN_CITY_NAME, DEST_CITY_NAME):
58
+ # Auto-generate route
59
+ route = f"{ORIGIN}_{DEST}"
60
+ data = {
61
+ 'YEAR': int(YEAR),
62
+ 'MONTH': int(MONTH),
63
+ 'DAY_OF_MONTH': int(DAY_OF_MONTH),
64
+ 'DAY_OF_WEEK': int(DAY_OF_WEEK),
65
+ 'ORIGIN': ORIGIN,
66
+ 'DEST': DEST,
67
+ 'CARRIER': CARRIER,
68
+ 'TAIL_NUM': TAIL_NUM,
69
+ 'DEP_TIME_BLK': DEP_TIME_BLK,
70
+ 'DEST_STATE_ABR': DEST_STATE_ABR,
71
+ 'ORIGIN_CITY_NAME': ORIGIN_CITY_NAME,
72
+ 'DEST_CITY_NAME': DEST_CITY_NAME,
73
+ 'route': route
74
+ }
75
+ df_input = pd.DataFrame([data])
76
+ X = preprocess_classification(df_input)
77
+
78
+ pred_rf = rf_model.predict(X)[0]
79
+ pred_xgb = xgb_clf_model.predict(X)[0]
80
+ pred_gbr = gbr_clf_model.predict(X)[0]
81
+
82
+ prob_rf = rf_model.predict_proba(X)[0][1] if hasattr(rf_model, "predict_proba") else None
83
+ prob_xgb = xgb_clf_model.predict_proba(X)[0][1] if hasattr(xgb_clf_model, "predict_proba") else None
84
+ prob_gbr = gbr_clf_model.predict_proba(X)[0][1] if hasattr(gbr_clf_model, "predict_proba") else None
85
+
86
+ majority_vote = int(np.round(np.mean([pred_rf, pred_xgb, pred_gbr])))
87
+
88
+ return {
89
+ "Random Forest Prediction": int(pred_rf),
90
+ "Random Forest Prob": round(prob_rf, 3) if prob_rf is not None else None,
91
+ "XGBoost Prediction": int(pred_xgb),
92
+ "XGBoost Prob": round(prob_xgb, 3) if prob_xgb is not None else None,
93
+ "Gradient Boosting Prediction": int(pred_gbr),
94
+ "Gradient Boosting Prob": round(prob_gbr, 3) if prob_gbr is not None else None,
95
+ "Majority Vote": majority_vote
96
+ }
97
+
98
+ def predict_regression_with_check(DEP_DELAY, DEP_DELAY_NEW, DEP_DEL15, DEP_DELAY_GROUP,
99
+ temp, prcp, wspd, wdir, bad_weather, wind_dir_bucket,
100
+ time_of_day, is_weekend):
101
+ # If not delayed, skip regression
102
+ if int(DEP_DEL15) == 0:
103
+ return {
104
+ "Status": "No delay predicted",
105
+ "Delay Category": None
106
+ }
107
+
108
+ data = {
109
+ 'DEP_DELAY': float(DEP_DELAY),
110
+ 'DEP_DELAY_NEW': float(DEP_DELAY_NEW),
111
+ 'DEP_DEL15': int(DEP_DEL15),
112
+ 'DEP_DELAY_GROUP': int(DEP_DELAY_GROUP),
113
+ 'temp': float(temp),
114
+ 'prcp': float(prcp),
115
+ 'wspd': float(wspd),
116
+ 'wdir': float(wdir),
117
+ 'bad_weather': int(bad_weather),
118
+ 'wind_dir_bucket': wind_dir_bucket,
119
+ 'time_of_day': time_of_day,
120
+ 'is_weekend': int(is_weekend)
121
+ }
122
+ df_input = pd.DataFrame([data])
123
+ X = preprocess_regression(df_input)
124
+
125
+ pred_ridge = ridge_model.predict(X)[0]
126
+ pred_xgb = xgb_reg_model.predict(X)[0]
127
+ pred_gbr = gbr_reg_model.predict(X)[0]
128
+
129
+ max_pred = max(pred_ridge, pred_xgb, pred_gbr)
130
+ category = categorize_delay(max_pred)
131
+
132
+ return {
133
+ "Ridge Prediction": round(pred_ridge, 2),
134
+ "XGBoost Prediction": round(pred_xgb, 2),
135
+ "Gradient Boosting Prediction": round(pred_gbr, 2),
136
+ "Max Prediction": round(max_pred, 2),
137
+ "Delay Category": category
138
+ }
139
+
140
+ # -----------------------------
141
+ # Gradio Interface
142
+ # -----------------------------
143
+ classification_inputs = [
144
+ gr.Number(label="YEAR"),
145
+ gr.Number(label="MONTH"),
146
+ gr.Number(label="DAY_OF_MONTH"),
147
+ gr.Number(label="DAY_OF_WEEK (1=Mon ... 7=Sun)"),
148
+ gr.Textbox(label="Origin Airport Code"),
149
+ gr.Textbox(label="Destination Airport Code"),
150
+ gr.Textbox(label="Carrier Code"),
151
+ gr.Textbox(label="Tail Number"),
152
+ gr.Textbox(label="Departure Time Block (e.g., 0600-0659)"),
153
+ gr.Textbox(label="Destination State Abbreviation"),
154
+ gr.Textbox(label="Origin City Name"),
155
+ gr.Textbox(label="Destination City Name")
156
+ ]
157
+
158
+ regression_inputs = [
159
+ gr.Number(label="DEP_DELAY"),
160
+ gr.Number(label="DEP_DELAY_NEW"),
161
+ gr.Number(label="DEP_DEL15 (0 or 1)"),
162
+ gr.Number(label="DEP_DELAY_GROUP"),
163
+ gr.Number(label="Temperature"),
164
+ gr.Number(label="Precipitation"),
165
+ gr.Number(label="Wind Speed"),
166
+ gr.Number(label="Wind Direction"),
167
+ gr.Number(label="Bad Weather (0 or 1)"),
168
+ gr.Textbox(label="Wind Dir Bucket (North/South/East/West/etc.)"),
169
+ gr.Textbox(label="Time of Day (Morning/Afternoon/Evening/Night)"),
170
+ gr.Number(label="Is Weekend (0 or 1)")
171
+ ]
172
+
173
+ classification_tab = gr.Interface(
174
+ fn=predict_classification,
175
+ inputs=classification_inputs,
176
+ outputs="json",
177
+ title="Flight Delay Classification",
178
+ description="Predict delay classification using Random Forest, XGBoost, and Gradient Boosting."
179
+ )
180
+
181
+ regression_tab = gr.Interface(
182
+ fn=predict_regression_with_check,
183
+ inputs=regression_inputs,
184
+ outputs="json",
185
+ title="Flight Delay Regression (Conditional)",
186
+ description="Predict arrival delay in minutes only if DEP_DEL15=1, with categorized output."
187
+ )
188
+
189
+ demo = gr.TabbedInterface([classification_tab, regression_tab],
190
+ ["Classification", "Regression"])
191
+
192
+ if __name__ == "__main__":
193
+ demo.launch()