Spaces:
Sleeping
Sleeping
| """ | |
| Real-time crew sequence risk predictor. | |
| Usage: | |
| python predict.py ORD LAX # predict risk for ORD→DFW→LAX right now | |
| python predict.py ORD LAX --month 7 # predict for July (historical patterns only) | |
| How it works: | |
| 1. Base risk — XGBoost model trained on 2018–2024 historical delay patterns | |
| 2. Live weather adjustment — fetches current METAR for airport_A, DFW, airport_B | |
| via AWC API and adjusts the base score upward if | |
| current conditions are severe. | |
| 3. Returns a combined risk score [0, 1] with full explanation. | |
| """ | |
| import argparse | |
| import datetime | |
| import os | |
| import sys | |
| import numpy as np | |
| import pandas as pd | |
| import xgboost as xgb | |
| sys.path.insert(0, os.path.dirname(__file__)) | |
| from weather import fetch_live_metar, _empty_weather | |
| PROC = os.path.join(os.path.dirname(__file__), "..", "data", "processed") | |
| FEATURE_COLS = [ | |
| "A_weather_delay_rate", "A_weather_cancel_rate", "A_avg_weather_delay_min", | |
| "A_p75_weather_delay_min", "A_p95_weather_delay_min", "A_nas_delay_rate", | |
| "A_overall_weather_delay_rate", "A_overall_avg_weather_delay_min", | |
| "B_weather_delay_rate", "B_weather_cancel_rate", "B_avg_weather_delay_min", | |
| "B_p75_weather_delay_min", "B_p95_weather_delay_min", "B_nas_delay_rate", | |
| "B_overall_weather_delay_rate", "B_overall_avg_weather_delay_min", | |
| "pair_combined_weather_rate", "pair_max_weather_rate", "pair_min_weather_rate", | |
| "pair_weather_rate_sum", "pair_avg_weather_delay_min", "both_high_risk", | |
| "Month", "is_spring_summer", "median_turnaround_min", | |
| ] | |
| RISK_LABELS = { | |
| (0.0, 0.3): ("LOW", "✓ Safe to sequence"), | |
| (0.3, 0.55): ("MODERATE", "⚠ Use caution — review turnaround buffer"), | |
| (0.55, 0.75): ("HIGH", "✗ Avoid if possible — weather-prone pair"), | |
| (0.75, 1.01): ("CRITICAL","✗✗ Do not sequence — high cascade risk"), | |
| } | |
| def risk_label(score: float) -> tuple[str, str]: | |
| for (lo, hi), (label, advice) in RISK_LABELS.items(): | |
| if lo <= score < hi: | |
| return label, advice | |
| return "CRITICAL", "Do not sequence" | |
| class PairRiskPredictor: | |
| def __init__(self): | |
| self.model = xgb.XGBClassifier(device="cuda", tree_method="hist") | |
| self.model.load_model(os.path.join(PROC, "xgb_model.json")) | |
| self.airport_features = pd.read_parquet(os.path.join(PROC, "airport_features.parquet")) | |
| self.pair_scores = pd.read_parquet(os.path.join(PROC, "pair_risk_scores.parquet")) | |
| def _get_airport_features(self, airport: str, month: int) -> dict: | |
| row = self.airport_features[ | |
| (self.airport_features["airport"] == airport) & | |
| (self.airport_features["Month"] == month) | |
| ] | |
| if row.empty: | |
| # Fall back to annual average for that airport | |
| row = self.airport_features[self.airport_features["airport"] == airport] | |
| if row.empty: | |
| return {} | |
| return row.mean(numeric_only=True).to_dict() | |
| def _build_feature_vector(self, airport_a: str, airport_b: str, month: int) -> pd.DataFrame: | |
| fa = self._get_airport_features(airport_a, month) | |
| fb = self._get_airport_features(airport_b, month) | |
| feat = { | |
| "A_weather_delay_rate": fa.get("weather_delay_rate", np.nan), | |
| "A_weather_cancel_rate": fa.get("weather_cancel_rate", np.nan), | |
| "A_avg_weather_delay_min": fa.get("avg_weather_delay_min", np.nan), | |
| "A_p75_weather_delay_min": fa.get("p75_weather_delay_min", np.nan), | |
| "A_p95_weather_delay_min": fa.get("p95_weather_delay_min", np.nan), | |
| "A_nas_delay_rate": fa.get("nas_delay_rate", np.nan), | |
| "A_overall_weather_delay_rate": fa.get("overall_weather_delay_rate",np.nan), | |
| "A_overall_avg_weather_delay_min":fa.get("overall_avg_weather_delay_min",np.nan), | |
| "B_weather_delay_rate": fb.get("weather_delay_rate", np.nan), | |
| "B_weather_cancel_rate": fb.get("weather_cancel_rate", np.nan), | |
| "B_avg_weather_delay_min": fb.get("avg_weather_delay_min", np.nan), | |
| "B_p75_weather_delay_min": fb.get("p75_weather_delay_min", np.nan), | |
| "B_p95_weather_delay_min": fb.get("p95_weather_delay_min", np.nan), | |
| "B_nas_delay_rate": fb.get("nas_delay_rate", np.nan), | |
| "B_overall_weather_delay_rate": fb.get("overall_weather_delay_rate",np.nan), | |
| "B_overall_avg_weather_delay_min":fb.get("overall_avg_weather_delay_min",np.nan), | |
| "Month": month, | |
| "is_spring_summer": int(month in (3,4,5,6,7,8)), | |
| "median_turnaround_min": 90.0, # default 90-min turnaround at DFW | |
| } | |
| a_rate = feat["A_weather_delay_rate"] or 0 | |
| b_rate = feat["B_weather_delay_rate"] or 0 | |
| feat["pair_combined_weather_rate"] = a_rate * b_rate | |
| feat["pair_max_weather_rate"] = max(a_rate, b_rate) | |
| feat["pair_min_weather_rate"] = min(a_rate, b_rate) | |
| feat["pair_weather_rate_sum"] = a_rate + b_rate | |
| feat["pair_avg_weather_delay_min"] = ( | |
| (feat["A_avg_weather_delay_min"] or 0) + | |
| (feat["B_avg_weather_delay_min"] or 0) | |
| ) / 2 | |
| # both_high_risk: both airports above 75th percentile of delay rate | |
| all_rates = self.airport_features["weather_delay_rate"].dropna() | |
| p75 = all_rates.quantile(0.75) | |
| feat["both_high_risk"] = int(a_rate > p75 and b_rate > p75) | |
| # Season dummies | |
| season = {3:"spring",4:"spring",5:"spring",6:"summer",7:"summer",8:"summer", | |
| 9:"fall",10:"fall",11:"fall",12:"winter",1:"winter",2:"winter"}[month] | |
| for s in ("fall","spring","summer","winter"): | |
| feat[f"season_{s}"] = int(season == s) | |
| return pd.DataFrame([feat]) | |
| def predict_historical(self, airport_a: str, airport_b: str, month: int = None) -> dict: | |
| """Predict using historical patterns only (no live weather).""" | |
| if month is None: | |
| month = datetime.date.today().month | |
| fv = self._build_feature_vector(airport_a, airport_b, month) | |
| model_cols = self.model.get_booster().feature_names | |
| for col in model_cols: | |
| if col not in fv.columns: | |
| fv[col] = 0.0 | |
| fv = fv[model_cols].astype(float) | |
| base_score = float(self.model.predict_proba(fv)[0, 1]) | |
| # Look up observed rate from historical data if available | |
| hist = self.pair_scores[ | |
| (self.pair_scores["airport_A"] == airport_a) & | |
| (self.pair_scores["airport_B"] == airport_b) & | |
| (self.pair_scores["Month"] == month) | |
| ] | |
| observed = float(hist["observed_bad_rate"].iloc[0]) if not hist.empty else None | |
| label, advice = risk_label(base_score) | |
| return { | |
| "airport_a": airport_a, | |
| "airport_b": airport_b, | |
| "hub": "DFW", | |
| "month": month, | |
| "base_risk_score": base_score, | |
| "risk_label": label, | |
| "advice": advice, | |
| "observed_bad_rate": observed, | |
| } | |
| def predict_live(self, airport_a: str, airport_b: str) -> dict: | |
| """ | |
| Full real-time prediction: historical model + live weather adjustment. | |
| Fetches current METAR for airport_A, DFW, and airport_B. | |
| """ | |
| month = datetime.date.today().month | |
| result = self.predict_historical(airport_a, airport_b, month) | |
| print(f" Fetching live METAR for {airport_a}, DFW, {airport_b}...") | |
| weather = fetch_live_metar([airport_a, "DFW", airport_b]) | |
| wa = weather.get(airport_a, _empty_weather()) | |
| wdfw= weather.get("DFW", _empty_weather()) | |
| wb = weather.get(airport_b, _empty_weather()) | |
| # DFW weather affects both legs — weight it higher | |
| weather_penalty = max( | |
| wdfw["weather_severity"] * 0.6, # DFW: hub, both legs affected | |
| wa["weather_severity"] * 0.4, # leg 1: A→DFW | |
| wb["weather_severity"] * 0.4, # leg 2: DFW→B | |
| ) | |
| base = result["base_risk_score"] | |
| # Blend: bad weather pushes score toward 1; clear weather doesn't reduce below base | |
| live_score = base + (1.0 - base) * weather_penalty | |
| live_score = float(np.clip(live_score, 0, 1)) | |
| label, advice = risk_label(live_score) | |
| result.update({ | |
| "live_risk_score": live_score, | |
| "weather_penalty": weather_penalty, | |
| "risk_label": label, | |
| "advice": advice, | |
| "weather_airport_a": wa, | |
| "weather_dfw": wdfw, | |
| "weather_airport_b": wb, | |
| }) | |
| return result | |
| # --------------------------------------------------------------------------- | |
| # CLI | |
| # --------------------------------------------------------------------------- | |
| def _fmt_weather(label: str, w: dict) -> str: | |
| raw = w.get("raw", "") | |
| sev = w.get("weather_severity", 0) | |
| flags = [] | |
| if w.get("has_thunderstorm"): flags.append("THUNDERSTORM") | |
| if w.get("has_fog"): flags.append("FOG/MIST") | |
| if w.get("has_snow_ice"): flags.append("SNOW/ICE") | |
| if w.get("has_low_ceiling"): flags.append(f"LOW CEILING ({w.get('ceiling_ft',0):.0f}ft)") | |
| vis = w.get("min_visibility_mi", 10) | |
| if vis < 3: flags.append(f"LOW VIS ({vis:.1f}mi)") | |
| wind = w.get("max_wind_kt", 0) | |
| if wind > 20: flags.append(f"HIGH WIND ({wind:.0f}kt)") | |
| flag_str = ", ".join(flags) if flags else "Clear" | |
| return ( | |
| f" {label:12s} severity={sev:.2f} [{flag_str}]\n" | |
| f" METAR: {raw[:80] if raw else 'unavailable'}" | |
| ) | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Predict crew sequence risk for airport_A → DFW → airport_B") | |
| parser.add_argument("airport_a", help="Inbound airport IATA code (e.g. ORD)") | |
| parser.add_argument("airport_b", help="Outbound airport IATA code (e.g. LAX)") | |
| parser.add_argument("--month", type=int, default=None, help="Month 1-12 (default: current month)") | |
| parser.add_argument("--no-live", action="store_true", help="Skip live weather fetch") | |
| args = parser.parse_args() | |
| predictor = PairRiskPredictor() | |
| print(f"\n{'='*60}") | |
| print(f" Crew Sequence Risk: {args.airport_a.upper()} → DFW → {args.airport_b.upper()}") | |
| print(f"{'='*60}") | |
| if args.no_live or args.month: | |
| month = args.month or datetime.date.today().month | |
| result = predictor.predict_historical(args.airport_a.upper(), args.airport_b.upper(), month) | |
| score = result["base_risk_score"] | |
| label = result["risk_label"] | |
| print(f"\n Historical risk score : {score:.3f}") | |
| print(f" Risk level : {label}") | |
| print(f" Advice : {result['advice']}") | |
| if result["observed_bad_rate"] is not None: | |
| print(f" Observed bad rate : {result['observed_bad_rate']:.1%} (historical month {month})") | |
| else: | |
| result = predictor.predict_live(args.airport_a.upper(), args.airport_b.upper()) | |
| base = result["base_risk_score"] | |
| live = result["live_risk_score"] | |
| print(f"\n Historical base score : {base:.3f}") | |
| print(f" Live weather penalty : +{result['weather_penalty']:.3f}") | |
| print(f" FINAL risk score : {live:.3f}") | |
| print(f" Risk level : {result['risk_label']}") | |
| print(f" Advice : {result['advice']}") | |
| if result.get("observed_bad_rate"): | |
| print(f" Historical bad rate : {result['observed_bad_rate']:.1%}") | |
| print(f"\n Current conditions:") | |
| print(_fmt_weather(args.airport_a.upper(), result["weather_airport_a"])) | |
| print(_fmt_weather("DFW (hub)", result["weather_dfw"])) | |
| print(_fmt_weather(args.airport_b.upper(), result["weather_airport_b"])) | |
| print(f"\n{'='*60}\n") | |
| if __name__ == "__main__": | |
| main() | |