Spaces:
Sleeping
Sleeping
File size: 12,218 Bytes
bef09da | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 | """
Real-time crew sequence risk predictor.
Usage:
python predict.py ORD LAX # predict risk for ORD→DFW→LAX right now
python predict.py ORD LAX --month 7 # predict for July (historical patterns only)
How it works:
1. Base risk — XGBoost model trained on 2018–2024 historical delay patterns
2. Live weather adjustment — fetches current METAR for airport_A, DFW, airport_B
via AWC API and adjusts the base score upward if
current conditions are severe.
3. Returns a combined risk score [0, 1] with full explanation.
"""
import argparse
import datetime
import os
import sys
import numpy as np
import pandas as pd
import xgboost as xgb
sys.path.insert(0, os.path.dirname(__file__))
from weather import fetch_live_metar, _empty_weather
PROC = os.path.join(os.path.dirname(__file__), "..", "data", "processed")
FEATURE_COLS = [
"A_weather_delay_rate", "A_weather_cancel_rate", "A_avg_weather_delay_min",
"A_p75_weather_delay_min", "A_p95_weather_delay_min", "A_nas_delay_rate",
"A_overall_weather_delay_rate", "A_overall_avg_weather_delay_min",
"B_weather_delay_rate", "B_weather_cancel_rate", "B_avg_weather_delay_min",
"B_p75_weather_delay_min", "B_p95_weather_delay_min", "B_nas_delay_rate",
"B_overall_weather_delay_rate", "B_overall_avg_weather_delay_min",
"pair_combined_weather_rate", "pair_max_weather_rate", "pair_min_weather_rate",
"pair_weather_rate_sum", "pair_avg_weather_delay_min", "both_high_risk",
"Month", "is_spring_summer", "median_turnaround_min",
]
RISK_LABELS = {
(0.0, 0.3): ("LOW", "✓ Safe to sequence"),
(0.3, 0.55): ("MODERATE", "⚠ Use caution — review turnaround buffer"),
(0.55, 0.75): ("HIGH", "✗ Avoid if possible — weather-prone pair"),
(0.75, 1.01): ("CRITICAL","✗✗ Do not sequence — high cascade risk"),
}
def risk_label(score: float) -> tuple[str, str]:
for (lo, hi), (label, advice) in RISK_LABELS.items():
if lo <= score < hi:
return label, advice
return "CRITICAL", "Do not sequence"
class PairRiskPredictor:
def __init__(self):
self.model = xgb.XGBClassifier(device="cuda", tree_method="hist")
self.model.load_model(os.path.join(PROC, "xgb_model.json"))
self.airport_features = pd.read_parquet(os.path.join(PROC, "airport_features.parquet"))
self.pair_scores = pd.read_parquet(os.path.join(PROC, "pair_risk_scores.parquet"))
def _get_airport_features(self, airport: str, month: int) -> dict:
row = self.airport_features[
(self.airport_features["airport"] == airport) &
(self.airport_features["Month"] == month)
]
if row.empty:
# Fall back to annual average for that airport
row = self.airport_features[self.airport_features["airport"] == airport]
if row.empty:
return {}
return row.mean(numeric_only=True).to_dict()
def _build_feature_vector(self, airport_a: str, airport_b: str, month: int) -> pd.DataFrame:
fa = self._get_airport_features(airport_a, month)
fb = self._get_airport_features(airport_b, month)
feat = {
"A_weather_delay_rate": fa.get("weather_delay_rate", np.nan),
"A_weather_cancel_rate": fa.get("weather_cancel_rate", np.nan),
"A_avg_weather_delay_min": fa.get("avg_weather_delay_min", np.nan),
"A_p75_weather_delay_min": fa.get("p75_weather_delay_min", np.nan),
"A_p95_weather_delay_min": fa.get("p95_weather_delay_min", np.nan),
"A_nas_delay_rate": fa.get("nas_delay_rate", np.nan),
"A_overall_weather_delay_rate": fa.get("overall_weather_delay_rate",np.nan),
"A_overall_avg_weather_delay_min":fa.get("overall_avg_weather_delay_min",np.nan),
"B_weather_delay_rate": fb.get("weather_delay_rate", np.nan),
"B_weather_cancel_rate": fb.get("weather_cancel_rate", np.nan),
"B_avg_weather_delay_min": fb.get("avg_weather_delay_min", np.nan),
"B_p75_weather_delay_min": fb.get("p75_weather_delay_min", np.nan),
"B_p95_weather_delay_min": fb.get("p95_weather_delay_min", np.nan),
"B_nas_delay_rate": fb.get("nas_delay_rate", np.nan),
"B_overall_weather_delay_rate": fb.get("overall_weather_delay_rate",np.nan),
"B_overall_avg_weather_delay_min":fb.get("overall_avg_weather_delay_min",np.nan),
"Month": month,
"is_spring_summer": int(month in (3,4,5,6,7,8)),
"median_turnaround_min": 90.0, # default 90-min turnaround at DFW
}
a_rate = feat["A_weather_delay_rate"] or 0
b_rate = feat["B_weather_delay_rate"] or 0
feat["pair_combined_weather_rate"] = a_rate * b_rate
feat["pair_max_weather_rate"] = max(a_rate, b_rate)
feat["pair_min_weather_rate"] = min(a_rate, b_rate)
feat["pair_weather_rate_sum"] = a_rate + b_rate
feat["pair_avg_weather_delay_min"] = (
(feat["A_avg_weather_delay_min"] or 0) +
(feat["B_avg_weather_delay_min"] or 0)
) / 2
# both_high_risk: both airports above 75th percentile of delay rate
all_rates = self.airport_features["weather_delay_rate"].dropna()
p75 = all_rates.quantile(0.75)
feat["both_high_risk"] = int(a_rate > p75 and b_rate > p75)
# Season dummies
season = {3:"spring",4:"spring",5:"spring",6:"summer",7:"summer",8:"summer",
9:"fall",10:"fall",11:"fall",12:"winter",1:"winter",2:"winter"}[month]
for s in ("fall","spring","summer","winter"):
feat[f"season_{s}"] = int(season == s)
return pd.DataFrame([feat])
def predict_historical(self, airport_a: str, airport_b: str, month: int = None) -> dict:
"""Predict using historical patterns only (no live weather)."""
if month is None:
month = datetime.date.today().month
fv = self._build_feature_vector(airport_a, airport_b, month)
model_cols = self.model.get_booster().feature_names
for col in model_cols:
if col not in fv.columns:
fv[col] = 0.0
fv = fv[model_cols].astype(float)
base_score = float(self.model.predict_proba(fv)[0, 1])
# Look up observed rate from historical data if available
hist = self.pair_scores[
(self.pair_scores["airport_A"] == airport_a) &
(self.pair_scores["airport_B"] == airport_b) &
(self.pair_scores["Month"] == month)
]
observed = float(hist["observed_bad_rate"].iloc[0]) if not hist.empty else None
label, advice = risk_label(base_score)
return {
"airport_a": airport_a,
"airport_b": airport_b,
"hub": "DFW",
"month": month,
"base_risk_score": base_score,
"risk_label": label,
"advice": advice,
"observed_bad_rate": observed,
}
def predict_live(self, airport_a: str, airport_b: str) -> dict:
"""
Full real-time prediction: historical model + live weather adjustment.
Fetches current METAR for airport_A, DFW, and airport_B.
"""
month = datetime.date.today().month
result = self.predict_historical(airport_a, airport_b, month)
print(f" Fetching live METAR for {airport_a}, DFW, {airport_b}...")
weather = fetch_live_metar([airport_a, "DFW", airport_b])
wa = weather.get(airport_a, _empty_weather())
wdfw= weather.get("DFW", _empty_weather())
wb = weather.get(airport_b, _empty_weather())
# DFW weather affects both legs — weight it higher
weather_penalty = max(
wdfw["weather_severity"] * 0.6, # DFW: hub, both legs affected
wa["weather_severity"] * 0.4, # leg 1: A→DFW
wb["weather_severity"] * 0.4, # leg 2: DFW→B
)
base = result["base_risk_score"]
# Blend: bad weather pushes score toward 1; clear weather doesn't reduce below base
live_score = base + (1.0 - base) * weather_penalty
live_score = float(np.clip(live_score, 0, 1))
label, advice = risk_label(live_score)
result.update({
"live_risk_score": live_score,
"weather_penalty": weather_penalty,
"risk_label": label,
"advice": advice,
"weather_airport_a": wa,
"weather_dfw": wdfw,
"weather_airport_b": wb,
})
return result
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def _fmt_weather(label: str, w: dict) -> str:
raw = w.get("raw", "")
sev = w.get("weather_severity", 0)
flags = []
if w.get("has_thunderstorm"): flags.append("THUNDERSTORM")
if w.get("has_fog"): flags.append("FOG/MIST")
if w.get("has_snow_ice"): flags.append("SNOW/ICE")
if w.get("has_low_ceiling"): flags.append(f"LOW CEILING ({w.get('ceiling_ft',0):.0f}ft)")
vis = w.get("min_visibility_mi", 10)
if vis < 3: flags.append(f"LOW VIS ({vis:.1f}mi)")
wind = w.get("max_wind_kt", 0)
if wind > 20: flags.append(f"HIGH WIND ({wind:.0f}kt)")
flag_str = ", ".join(flags) if flags else "Clear"
return (
f" {label:12s} severity={sev:.2f} [{flag_str}]\n"
f" METAR: {raw[:80] if raw else 'unavailable'}"
)
def main():
parser = argparse.ArgumentParser(description="Predict crew sequence risk for airport_A → DFW → airport_B")
parser.add_argument("airport_a", help="Inbound airport IATA code (e.g. ORD)")
parser.add_argument("airport_b", help="Outbound airport IATA code (e.g. LAX)")
parser.add_argument("--month", type=int, default=None, help="Month 1-12 (default: current month)")
parser.add_argument("--no-live", action="store_true", help="Skip live weather fetch")
args = parser.parse_args()
predictor = PairRiskPredictor()
print(f"\n{'='*60}")
print(f" Crew Sequence Risk: {args.airport_a.upper()} → DFW → {args.airport_b.upper()}")
print(f"{'='*60}")
if args.no_live or args.month:
month = args.month or datetime.date.today().month
result = predictor.predict_historical(args.airport_a.upper(), args.airport_b.upper(), month)
score = result["base_risk_score"]
label = result["risk_label"]
print(f"\n Historical risk score : {score:.3f}")
print(f" Risk level : {label}")
print(f" Advice : {result['advice']}")
if result["observed_bad_rate"] is not None:
print(f" Observed bad rate : {result['observed_bad_rate']:.1%} (historical month {month})")
else:
result = predictor.predict_live(args.airport_a.upper(), args.airport_b.upper())
base = result["base_risk_score"]
live = result["live_risk_score"]
print(f"\n Historical base score : {base:.3f}")
print(f" Live weather penalty : +{result['weather_penalty']:.3f}")
print(f" FINAL risk score : {live:.3f}")
print(f" Risk level : {result['risk_label']}")
print(f" Advice : {result['advice']}")
if result.get("observed_bad_rate"):
print(f" Historical bad rate : {result['observed_bad_rate']:.1%}")
print(f"\n Current conditions:")
print(_fmt_weather(args.airport_a.upper(), result["weather_airport_a"]))
print(_fmt_weather("DFW (hub)", result["weather_dfw"]))
print(_fmt_weather(args.airport_b.upper(), result["weather_airport_b"]))
print(f"\n{'='*60}\n")
if __name__ == "__main__":
main()
|