""" Real-time crew sequence risk predictor. Usage: python predict.py ORD LAX # predict risk for ORD→DFW→LAX right now python predict.py ORD LAX --month 7 # predict for July (historical patterns only) How it works: 1. Base risk — XGBoost model trained on 2018–2024 historical delay patterns 2. Live weather adjustment — fetches current METAR for airport_A, DFW, airport_B via AWC API and adjusts the base score upward if current conditions are severe. 3. Returns a combined risk score [0, 1] with full explanation. """ import argparse import datetime import os import sys import numpy as np import pandas as pd import xgboost as xgb sys.path.insert(0, os.path.dirname(__file__)) from weather import fetch_live_metar, _empty_weather PROC = os.path.join(os.path.dirname(__file__), "..", "data", "processed") FEATURE_COLS = [ "A_weather_delay_rate", "A_weather_cancel_rate", "A_avg_weather_delay_min", "A_p75_weather_delay_min", "A_p95_weather_delay_min", "A_nas_delay_rate", "A_overall_weather_delay_rate", "A_overall_avg_weather_delay_min", "B_weather_delay_rate", "B_weather_cancel_rate", "B_avg_weather_delay_min", "B_p75_weather_delay_min", "B_p95_weather_delay_min", "B_nas_delay_rate", "B_overall_weather_delay_rate", "B_overall_avg_weather_delay_min", "pair_combined_weather_rate", "pair_max_weather_rate", "pair_min_weather_rate", "pair_weather_rate_sum", "pair_avg_weather_delay_min", "both_high_risk", "Month", "is_spring_summer", "median_turnaround_min", ] RISK_LABELS = { (0.0, 0.3): ("LOW", "✓ Safe to sequence"), (0.3, 0.55): ("MODERATE", "⚠ Use caution — review turnaround buffer"), (0.55, 0.75): ("HIGH", "✗ Avoid if possible — weather-prone pair"), (0.75, 1.01): ("CRITICAL","✗✗ Do not sequence — high cascade risk"), } def risk_label(score: float) -> tuple[str, str]: for (lo, hi), (label, advice) in RISK_LABELS.items(): if lo <= score < hi: return label, advice return "CRITICAL", "Do not sequence" class PairRiskPredictor: def __init__(self): self.model = xgb.XGBClassifier(device="cuda", tree_method="hist") self.model.load_model(os.path.join(PROC, "xgb_model.json")) self.airport_features = pd.read_parquet(os.path.join(PROC, "airport_features.parquet")) self.pair_scores = pd.read_parquet(os.path.join(PROC, "pair_risk_scores.parquet")) def _get_airport_features(self, airport: str, month: int) -> dict: row = self.airport_features[ (self.airport_features["airport"] == airport) & (self.airport_features["Month"] == month) ] if row.empty: # Fall back to annual average for that airport row = self.airport_features[self.airport_features["airport"] == airport] if row.empty: return {} return row.mean(numeric_only=True).to_dict() def _build_feature_vector(self, airport_a: str, airport_b: str, month: int) -> pd.DataFrame: fa = self._get_airport_features(airport_a, month) fb = self._get_airport_features(airport_b, month) feat = { "A_weather_delay_rate": fa.get("weather_delay_rate", np.nan), "A_weather_cancel_rate": fa.get("weather_cancel_rate", np.nan), "A_avg_weather_delay_min": fa.get("avg_weather_delay_min", np.nan), "A_p75_weather_delay_min": fa.get("p75_weather_delay_min", np.nan), "A_p95_weather_delay_min": fa.get("p95_weather_delay_min", np.nan), "A_nas_delay_rate": fa.get("nas_delay_rate", np.nan), "A_overall_weather_delay_rate": fa.get("overall_weather_delay_rate",np.nan), "A_overall_avg_weather_delay_min":fa.get("overall_avg_weather_delay_min",np.nan), "B_weather_delay_rate": fb.get("weather_delay_rate", np.nan), "B_weather_cancel_rate": fb.get("weather_cancel_rate", np.nan), "B_avg_weather_delay_min": fb.get("avg_weather_delay_min", np.nan), "B_p75_weather_delay_min": fb.get("p75_weather_delay_min", np.nan), "B_p95_weather_delay_min": fb.get("p95_weather_delay_min", np.nan), "B_nas_delay_rate": fb.get("nas_delay_rate", np.nan), "B_overall_weather_delay_rate": fb.get("overall_weather_delay_rate",np.nan), "B_overall_avg_weather_delay_min":fb.get("overall_avg_weather_delay_min",np.nan), "Month": month, "is_spring_summer": int(month in (3,4,5,6,7,8)), "median_turnaround_min": 90.0, # default 90-min turnaround at DFW } a_rate = feat["A_weather_delay_rate"] or 0 b_rate = feat["B_weather_delay_rate"] or 0 feat["pair_combined_weather_rate"] = a_rate * b_rate feat["pair_max_weather_rate"] = max(a_rate, b_rate) feat["pair_min_weather_rate"] = min(a_rate, b_rate) feat["pair_weather_rate_sum"] = a_rate + b_rate feat["pair_avg_weather_delay_min"] = ( (feat["A_avg_weather_delay_min"] or 0) + (feat["B_avg_weather_delay_min"] or 0) ) / 2 # both_high_risk: both airports above 75th percentile of delay rate all_rates = self.airport_features["weather_delay_rate"].dropna() p75 = all_rates.quantile(0.75) feat["both_high_risk"] = int(a_rate > p75 and b_rate > p75) # Season dummies season = {3:"spring",4:"spring",5:"spring",6:"summer",7:"summer",8:"summer", 9:"fall",10:"fall",11:"fall",12:"winter",1:"winter",2:"winter"}[month] for s in ("fall","spring","summer","winter"): feat[f"season_{s}"] = int(season == s) return pd.DataFrame([feat]) def predict_historical(self, airport_a: str, airport_b: str, month: int = None) -> dict: """Predict using historical patterns only (no live weather).""" if month is None: month = datetime.date.today().month fv = self._build_feature_vector(airport_a, airport_b, month) model_cols = self.model.get_booster().feature_names for col in model_cols: if col not in fv.columns: fv[col] = 0.0 fv = fv[model_cols].astype(float) base_score = float(self.model.predict_proba(fv)[0, 1]) # Look up observed rate from historical data if available hist = self.pair_scores[ (self.pair_scores["airport_A"] == airport_a) & (self.pair_scores["airport_B"] == airport_b) & (self.pair_scores["Month"] == month) ] observed = float(hist["observed_bad_rate"].iloc[0]) if not hist.empty else None label, advice = risk_label(base_score) return { "airport_a": airport_a, "airport_b": airport_b, "hub": "DFW", "month": month, "base_risk_score": base_score, "risk_label": label, "advice": advice, "observed_bad_rate": observed, } def predict_live(self, airport_a: str, airport_b: str) -> dict: """ Full real-time prediction: historical model + live weather adjustment. Fetches current METAR for airport_A, DFW, and airport_B. """ month = datetime.date.today().month result = self.predict_historical(airport_a, airport_b, month) print(f" Fetching live METAR for {airport_a}, DFW, {airport_b}...") weather = fetch_live_metar([airport_a, "DFW", airport_b]) wa = weather.get(airport_a, _empty_weather()) wdfw= weather.get("DFW", _empty_weather()) wb = weather.get(airport_b, _empty_weather()) # DFW weather affects both legs — weight it higher weather_penalty = max( wdfw["weather_severity"] * 0.6, # DFW: hub, both legs affected wa["weather_severity"] * 0.4, # leg 1: A→DFW wb["weather_severity"] * 0.4, # leg 2: DFW→B ) base = result["base_risk_score"] # Blend: bad weather pushes score toward 1; clear weather doesn't reduce below base live_score = base + (1.0 - base) * weather_penalty live_score = float(np.clip(live_score, 0, 1)) label, advice = risk_label(live_score) result.update({ "live_risk_score": live_score, "weather_penalty": weather_penalty, "risk_label": label, "advice": advice, "weather_airport_a": wa, "weather_dfw": wdfw, "weather_airport_b": wb, }) return result # --------------------------------------------------------------------------- # CLI # --------------------------------------------------------------------------- def _fmt_weather(label: str, w: dict) -> str: raw = w.get("raw", "") sev = w.get("weather_severity", 0) flags = [] if w.get("has_thunderstorm"): flags.append("THUNDERSTORM") if w.get("has_fog"): flags.append("FOG/MIST") if w.get("has_snow_ice"): flags.append("SNOW/ICE") if w.get("has_low_ceiling"): flags.append(f"LOW CEILING ({w.get('ceiling_ft',0):.0f}ft)") vis = w.get("min_visibility_mi", 10) if vis < 3: flags.append(f"LOW VIS ({vis:.1f}mi)") wind = w.get("max_wind_kt", 0) if wind > 20: flags.append(f"HIGH WIND ({wind:.0f}kt)") flag_str = ", ".join(flags) if flags else "Clear" return ( f" {label:12s} severity={sev:.2f} [{flag_str}]\n" f" METAR: {raw[:80] if raw else 'unavailable'}" ) def main(): parser = argparse.ArgumentParser(description="Predict crew sequence risk for airport_A → DFW → airport_B") parser.add_argument("airport_a", help="Inbound airport IATA code (e.g. ORD)") parser.add_argument("airport_b", help="Outbound airport IATA code (e.g. LAX)") parser.add_argument("--month", type=int, default=None, help="Month 1-12 (default: current month)") parser.add_argument("--no-live", action="store_true", help="Skip live weather fetch") args = parser.parse_args() predictor = PairRiskPredictor() print(f"\n{'='*60}") print(f" Crew Sequence Risk: {args.airport_a.upper()} → DFW → {args.airport_b.upper()}") print(f"{'='*60}") if args.no_live or args.month: month = args.month or datetime.date.today().month result = predictor.predict_historical(args.airport_a.upper(), args.airport_b.upper(), month) score = result["base_risk_score"] label = result["risk_label"] print(f"\n Historical risk score : {score:.3f}") print(f" Risk level : {label}") print(f" Advice : {result['advice']}") if result["observed_bad_rate"] is not None: print(f" Observed bad rate : {result['observed_bad_rate']:.1%} (historical month {month})") else: result = predictor.predict_live(args.airport_a.upper(), args.airport_b.upper()) base = result["base_risk_score"] live = result["live_risk_score"] print(f"\n Historical base score : {base:.3f}") print(f" Live weather penalty : +{result['weather_penalty']:.3f}") print(f" FINAL risk score : {live:.3f}") print(f" Risk level : {result['risk_label']}") print(f" Advice : {result['advice']}") if result.get("observed_bad_rate"): print(f" Historical bad rate : {result['observed_bad_rate']:.1%}") print(f"\n Current conditions:") print(_fmt_weather(args.airport_a.upper(), result["weather_airport_a"])) print(_fmt_weather("DFW (hub)", result["weather_dfw"])) print(_fmt_weather(args.airport_b.upper(), result["weather_airport_b"])) print(f"\n{'='*60}\n") if __name__ == "__main__": main()