""" AA Crew Sequence Risk — Interactive Dashboard ============================================= Run with: conda run -n aadata streamlit run app/app.py """ from __future__ import annotations import os import sys from datetime import datetime import numpy as np import pandas as pd import streamlit as st import plotly.graph_objects as go import plotly.express as px import plotly.io as pio # Global chart theme — transparent bg so dark/light mode both work pio.templates["aa_theme"] = go.layout.Template( layout=go.Layout( plot_bgcolor="rgba(0,0,0,0)", paper_bgcolor="rgba(0,0,0,0)", xaxis=dict(gridcolor="rgba(128,128,128,0.15)", zerolinecolor="rgba(128,128,128,0.3)"), yaxis=dict(gridcolor="rgba(128,128,128,0.15)", zerolinecolor="rgba(128,128,128,0.3)"), ) ) pio.templates.default = "plotly+aa_theme" # Allow imports from project root sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) from app.predictor import RiskPredictor, build_features_df, FEATURE_LABELS from app import airports as ap_meta from app import live_flights as lf from app import optimizer as opt PROCESSED = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data", "processed")) RAW = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data", "raw")) st.set_page_config( page_title="AA DFW Crew Risk", page_icon="✈️", layout="wide", initial_sidebar_state="expanded", ) # ── CSS ────────────────────────────────────────────────────────────────────── st.markdown(""" """, unsafe_allow_html=True) def tip(label: str, tooltip: str) -> str: """Return HTML snippet: label with hover tooltip (use inside st.markdown unsafe_allow_html=True).""" return ( f'' f'{label} ' ) # ── Data loading (cached) ──────────────────────────────────────────────────── @st.cache_resource(show_spinner="Loading model & features...") def get_predictor() -> RiskPredictor: df = build_features_df() return RiskPredictor(df) @st.cache_data(show_spinner="Loading risk scores...") def get_pair_scores() -> pd.DataFrame: return pd.read_parquet(os.path.join(PROCESSED, "pair_risk_scores.parquet")) @st.cache_data(show_spinner="Loading 2024 schedule data...") def get_bts_2024() -> pd.DataFrame: path = os.path.join(RAW, "bts_all_dfw_2024.parquet") if not os.path.exists(path): return pd.DataFrame() df = pd.read_parquet(path) df = df[df["Cancelled"] != 1].copy() df["CRSDepTime"] = pd.to_numeric(df["CRSDepTime"], errors="coerce") df["CRSArrTime"] = pd.to_numeric(df["CRSArrTime"], errors="coerce") df["DepDelayMinutes"] = df["DepDelayMinutes"].fillna(0) df["ArrDelayMinutes"] = df["ArrDelayMinutes"].fillna(0) return df @st.cache_data def get_map_group(month: int, role: str) -> pd.DataFrame: """Cached per-month airport risk aggregation for the map tab.""" scores = get_pair_scores() ms = scores[scores["Month"] == month] if role == "origin": grp = (ms.groupby("airport_A") .agg(avg_risk=("avg_risk_score","mean"), n_pairs=("airport_B","count")) .reset_index().rename(columns={"airport_A":"airport"})) wp = (ms.loc[ms.groupby("airport_A")["avg_risk_score"].idxmax(), ["airport_A","airport_B"]] .rename(columns={"airport_A":"airport","airport_B":"worst_partner"})) else: grp = (ms.groupby("airport_B") .agg(avg_risk=("avg_risk_score","mean"), n_pairs=("airport_A","count")) .reset_index().rename(columns={"airport_B":"airport"})) wp = (ms.loc[ms.groupby("airport_B")["avg_risk_score"].idxmax(), ["airport_B","airport_A"]] .rename(columns={"airport_B":"airport","airport_A":"worst_partner"})) return grp.merge(wp, on="airport", how="left") @st.cache_data def get_scores_indexed() -> pd.DataFrame: """Cached set_index — avoids re-running on every Streamlit rerender.""" return get_pair_scores().set_index(["airport_A", "airport_B", "Month"]) @st.cache_data(show_spinner=False) def get_eval_data() -> dict: """Compute PR/ROC curves and calibration from pair_risk_scores (pair-level aggregation).""" from sklearn.metrics import roc_curve, precision_recall_curve, roc_auc_score, average_precision_score _s = get_pair_scores().dropna(subset=["avg_risk_score", "observed_bad_rate"]) _y = (_s["observed_bad_rate"] > 0.25).astype(int) _p = _s["avg_risk_score"] _fpr, _tpr, _ = roc_curve(_y, _p) _prec, _rec, _ = precision_recall_curve(_y, _p) # Calibration: decile buckets of model score vs observed bad rate _s2 = _s.copy() _s2["decile"] = pd.qcut(_p, 10, labels=False) _cal = (_s2.groupby("decile") .agg(mean_score=("avg_risk_score", "mean"), mean_obs=("observed_bad_rate", "mean"), n=("avg_risk_score", "count")) .reset_index()) return { "fpr": _fpr, "tpr": _tpr, "prec": _prec, "rec": _rec, "auc": float(roc_auc_score(_y, _p)), "ap": float(average_precision_score(_y, _p)), "cal": _cal, "scores": _s, } @st.cache_data(show_spinner=False) def get_feature_importance_df() -> pd.DataFrame: """Load XGBoost model and extract feature importances with group labels.""" import xgboost as _xgb _m = _xgb.XGBClassifier() _m.load_model(os.path.join(PROCESSED, "xgb_model.json")) _fnames = _m.get_booster().feature_names _fi = _m.feature_importances_ def _group(f: str) -> str: if f.startswith(("A_weather", "A_overall", "A_nas_")): return "Origin BTS" if f.startswith(("B_weather", "B_overall", "B_nas_")): return "Dest BTS" if f.startswith("pair_") and "cascade" not in f and "wind" not in f and "precip" not in f: return "Pair BTS" if f in ("Month", "is_spring_summer", "median_turnaround_min") or f.startswith("season_"): return "Temporal" if f.startswith(("A_avg_wind", "A_precip", "A_extreme", "A_total_precip", "A_max_wind")): return "Origin GSOM" if f.startswith(("B_avg_wind", "B_precip", "B_extreme", "B_total_precip", "B_max_wind")): return "Dest GSOM" if f.startswith(("pair_max_avg_wind", "pair_max_precip", "pair_max_extreme", "pair_max_total", "pair_max_max_wind")): return "Pair GSOM" if f.startswith("DFW_"): return "DFW Hub" if f.startswith("tc_"): return "Tail-Chain / Duty" if f.startswith(("A_ap_", "B_ap_", "pair_cascade")): return "Airport Cascade" if f.startswith("mhc_"): return "Multi-Hop Cascade" return "Other" _df = pd.DataFrame({ "feature": _fnames, "importance": _fi, "label": [FEATURE_LABELS.get(f, f) for f in _fnames], "group": [_group(f) for f in _fnames], }).sort_values("importance", ascending=False).reset_index(drop=True) _df["rank"] = _df.index + 1 return _df @st.cache_data def get_airport_df(codes: tuple) -> pd.DataFrame: return ap_meta.build_airport_df(list(codes)) # ── Helpers ────────────────────────────────────────────────────────────────── def risk_badge(label: str) -> str: cls = { "HIGH RISK": "risk-badge-high", "MODERATE RISK": "risk-badge-moderate", "LOW RISK": "risk-badge-low", }.get(label, "risk-badge-low") return f'{label}' HIGH_THRESHOLD = 0.30 # calibrated: ≥30% of sequences historically disrupted MOD_THRESHOLD = 0.20 # calibrated: ≥20% _COLOR_CAP = 0.50 # calibrated scores rarely exceed 50%; maps to full red def score_to_color(score: float) -> str: """Continuous green→yellow→red interpolation over the calibrated score range [0, 0.50].""" t = max(0.0, min(1.0, score / _COLOR_CAP)) if t <= 0.5: s = t * 2 # 0→1 over bottom half r = int(44 + s * (255 - 44)) # 44→255 g = int(160 + s * (200 - 160)) # 160→200 b = int(44 + s * (0 - 44)) # 44→0 else: s = (t - 0.5) * 2 # 0→1 over top half r = int(255 + s * (214 - 255)) # 255→214 g = int(200 + s * (39 - 200)) # 200→39 b = int(0 + s * 40) # 0→40 return f"rgb({r},{g},{b})" def gauge_chart(risk_score: float, title: str = "Risk Score") -> go.Figure: label = ("HIGH RISK" if risk_score >= HIGH_THRESHOLD else "MODERATE RISK" if risk_score >= MOD_THRESHOLD else "LOW RISK") color = score_to_color(risk_score) fig = go.Figure(go.Indicator( mode="gauge+number+delta", value=risk_score * 100, number={"suffix": "%", "font": {"size": 36}}, title={"text": f"{title}
{label}", "font": {"size": 16}}, gauge={ "axis": {"range": [0, 100], "tickwidth": 1}, "bar": {"color": color, "thickness": 0.35}, "steps": [ {"range": [0, 20], "color": "rgba(44,160,44,0.15)"}, {"range": [20, 30], "color": "rgba(255,127,14,0.15)"}, {"range": [30, 100],"color": "rgba(214,39,40,0.15)"}, ], "threshold": { "line": {"color": "rgba(150,150,150,0.8)", "width": 3}, "thickness": 0.75, "value": risk_score * 100, }, }, )) fig.update_layout(height=280, margin=dict(t=60, b=20, l=30, r=30)) return fig def shap_bar_chart(shap_df: pd.DataFrame) -> go.Figure: shap_df = shap_df.sort_values("shap_value") imputed_col = "imputed" in shap_df.columns colors = ["#d62728" if v > 0 else "#2ca02c" for v in shap_df["shap_value"]] # Imputed features get dashed border (rgba) to signal "estimated, not measured" line_widths = [] line_colors = [] for _, row in shap_df.iterrows(): if imputed_col and row.get("imputed", False): line_widths.append(2) line_colors.append("rgba(180,130,0,0.9)") # amber border = imputed else: line_widths.append(0) line_colors.append("rgba(0,0,0,0)") hover = [ (f"%{{y}}
SHAP: %{{x:.4f}}
Value: %{{customdata:.3f}}" + (" (★ month-median imputed)" if imputed_col and row.get("imputed", False) else "") + "") for _, row in shap_df.iterrows() ] fig = go.Figure(go.Bar( x=shap_df["shap_value"], y=shap_df["label"], orientation="h", marker=dict( color=colors, line=dict(width=line_widths, color=line_colors), ), text=[f"{v:+.3f}" for v in shap_df["shap_value"]], textposition="outside", hovertemplate=hover[0] if len(set(hover)) == 1 else "%{y}: %{x:.4f}", customdata=shap_df["feature_value"], )) fig.update_layout( title="Feature Contributions (SHAP Values)
" "Red = increases risk | Green = decreases risk | Amber border = GSOM median imputed", xaxis_title="SHAP Value (impact on model output)", height=max(350, len(shap_df) * 28), margin=dict(l=10, r=80, t=60, b=40), plot_bgcolor="rgba(0,0,0,0)", xaxis=dict(zeroline=True, zerolinewidth=1.5, zerolinecolor="#888"), ) return fig # ── Sidebar ────────────────────────────────────────────────────────────────── with st.sidebar: st.image("https://upload.wikimedia.org/wikipedia/commons/thumb/a/a4/American_Airlines_logo_2013.svg/320px-American_Airlines_logo_2013.svg.png", width=180) st.title("AA DFW Crew Risk") st.caption("Weather-driven crew sequence risk scoring for A→DFW→B routes") st.divider() st.markdown("**Model:** XGBoost v3 + Isotonic Calibration \n**High risk:** ≥30% disruption rate \n**Val AUC:** 0.825 \n**Val AP:** 0.445") st.divider() st.subheader("Live Schedule API") aviationstack_key = st.text_input( "AviationStack API Key", type="password", placeholder="Paste key for live AA schedule...", help="Free tier at aviationstack.com — 100 req/month. Leave blank to use BTS 2024 analog.", ) if aviationstack_key: st.success("Live API key set") else: st.info("No key → BTS 2024 analog used") st.divider() try: _default_dark = st.get_option("theme.base") != "light" except Exception: _default_dark = True dark_mode = st.toggle("🌙 Dark mode", value=_default_dark, key="global_dark") st.divider() st.caption("Data: BTS 2015–2024 · GSOM · FAA Part 117") # ── Tab layout ─────────────────────────────────────────────────────────────── tab_overview, tab_dash, tab_sched, tab_optim, tab_query, tab_map = st.tabs([ "📋 Methodology", "📊 Risk Dashboard", "🛫 DFW Schedule", "⚡ Sequence Optimizer", "🔍 Pair Risk Query", "🗺️ Airport Risk Map", ]) # ═══════════════════════════════════════════════════════════════════════════ # TAB 0: METHODOLOGY # ═══════════════════════════════════════════════════════════════════════════ with tab_overview: st.header("📋 Methodology & Technical Model Report") st.caption("A full technical account of the data pipeline, feature engineering, model specification, and evaluation.") # ── Top model card ──────────────────────────────────────────────────────── _mc = st.columns(6) for _col, (_lbl, _val) in zip(_mc, [ ("Algorithm", "XGBoost v3"), ("Val AUC", "0.825"), ("Val AP", "0.445"), ("Features", "70"), ("Train rows", "~398k"), ("Val split", "Time-based"), ]): _col.markdown( f'
' f'
{_lbl}
' f'
{_val}
' f'
', unsafe_allow_html=True, ) st.markdown("
", unsafe_allow_html=True) # ── Pipeline Sankey ─────────────────────────────────────────────────────── # Node indices: # 0 BTS 1 GSOM 2 Tail-Chain 3 Feature Eng # 4 XGBoost 5 Risk Scores 6 SHAP 7 Optimizer 8 Dashboard _sk_font_color = "rgba(240,240,240,0.95)" if dark_mode else "rgba(20,20,20,0.9)" _sk_node_line = "rgba(255,255,255,0.2)" if dark_mode else "rgba(0,0,0,0.15)" _link_alpha = "0.30" if dark_mode else "0.22" fig_sankey = go.Figure(go.Sankey( arrangement="fixed", node=dict( label=["BTS 2015–2024", "GSOM Weather", "Tail-Chain", "Feature Engineering", "XGBoost v3", "Pair Risk Scores", "SHAP", "Sequence Optimizer", "Dashboard"], x=[0.01, 0.01, 0.01, 0.36, 0.60, 0.82, 0.82, 0.999, 0.999], y=[0.10, 0.46, 0.82, 0.44, 0.44, 0.13, 0.77, 0.22, 0.78], color=["#005EB8","#1a7a4a","#8B4513","#7B2D8B","#C41E3A", "#2ca02c","#ff7f0e","#555555","#005EB8"], pad=22, thickness=22, line=dict(color=_sk_node_line, width=0.8), hovertemplate="%{label}", ), link=dict( source=[0, 1, 2, 3, 4, 4, 5, 5], target=[3, 3, 3, 4, 5, 6, 7, 8], value= [45,20,25, 90, 55,35, 28,28], color=[f"rgba(0,94,184,{_link_alpha})", f"rgba(26,122,74,{_link_alpha})", f"rgba(139,69,19,{_link_alpha})", f"rgba(123,45,139,{_link_alpha})", f"rgba(196,30,58,{_link_alpha})", f"rgba(196,30,58,{_link_alpha})", f"rgba(44,160,44,{_link_alpha})", f"rgba(44,160,44,{_link_alpha})"], hovertemplate="%{source.label}%{target.label}", ), )) fig_sankey.update_layout( title="End-to-End Data & Model Pipeline", height=420, margin=dict(t=50, b=20, l=10, r=10), font=dict(size=12, color=_sk_font_color), ) st.plotly_chart(fig_sankey, width='stretch') st.markdown("
", unsafe_allow_html=True) # ── Section 1: Problem Formulation ─────────────────────────────────────── with st.expander("**1 · Problem Formulation**", expanded=True): # ── Notation key ────────────────────────────────────────────────── with st.container(): st.markdown("**Notation used throughout this report:**") _not_cols = st.columns(3) with _not_cols[0]: st.markdown(r""" | Symbol | Meaning | |--------|---------| | $A, B$ | IATA airport codes (non-DFW origin / destination) | | $m$ | Calendar month (1–12) | | $\mathcal{S}_{A,B,m}$ | Set of all observed A→DFW→B sequences in month $m$ | | $\Delta_s$ | Weather + NAS delay of sequence $s$ (minutes) | """) with _not_cols[1]: st.markdown(r""" | Symbol | Meaning | |--------|---------| | $\mathbf{x}_{A,B,m} \in \mathbb{R}^{70}$ | Feature vector for pair-month cell | | $y_{A,B,m} \in \{0,1\}$ | Binary label (disrupted = 1) | | $\hat{p}_{A,B,m}$ | Calibrated model risk score ∈ [0, 1] | | $\hat{y}_i^{(K)}$ | Raw XGBoost log-odds output after $K$ trees | """) with _not_cols[2]: st.markdown(r""" | Symbol | Meaning | |--------|---------| | $g_i, h_i$ | First- and second-order gradients of the loss | | $T_k$ | Number of leaves in tree $k$ | | $\mathbf{w}_k$ | Leaf weight vector for tree $k$ | | AUC | Area Under the ROC Curve (ranking quality) | | AP | Average Precision (precision-recall summary) | | SHAP | SHapley Additive exPlanations (feature attribution) | """) st.divider() _pf1, _pf2 = st.columns([3, 2]) with _pf1: st.markdown(""" American Airlines operates **~900 daily flights** through Dallas/Fort Worth (DFW). A crew sequence is the atom of scheduling: a pilot or flight attendant arrives on an inbound flight from airport **A**, turns at DFW, then departs on an outbound to airport **B**. Weather disruptions at A, DFW, or B shatter the day's roster — triggering FAA Part 117 rest violations, repositioning costs, and cascading cancellations. **Formal task.** Given the triplet (airport_A, airport_B, month), predict whether the sequence A → DFW → B is *systematically disrupted* — i.e., whether its historical weather disruption rate exceeds a material threshold. """) st.markdown("**Observed disruption rate for a pair-month cell:**") st.latex(r""" \text{bad\_rate}(A,\,B,\,m) = \frac{\bigl|\bigl\{s \in \mathcal{S}_{A,B,m} \;:\; \Delta_s \geq 15\,\text{min} \;\lor\; \mathrm{cancel}(s)\bigr\}\bigr|} {|\mathcal{S}_{A,B,m}|} """) st.markdown("**Binary label and classification target:**") st.latex(r""" y_{A,B,m} = \begin{cases} 1 & \text{if } \text{bad\_rate}(A,B,m) > 0.25 \\ 0 & \text{otherwise} \end{cases} """) st.markdown("**Model output:**") st.latex(r""" \hat{p}_{A,B,m} = P\!\left(y=1 \;\middle|\; \mathbf{x}_{A,B,m}\right) \in [0,\,1], \quad \mathbf{x} \in \mathbb{R}^{70} """) with _pf2: st.markdown("**The model is used in two distinct modes:**") st.markdown(""" | Mode | Usage | |---|---| | **Pair scoring** | Absolute risk gauge for any A→DFW→B pair in any month | | **Cost matrix** | Relative ranking as input to the Hungarian-algorithm optimizer | **Threshold rationale: 0.25** The 0.25 bad-rate threshold was chosen after examining the full distribution of pair-month disruption rates on tail-matched sequences. A 50% threshold would create a spurious 50/50 split with no operational meaning; 0.25 captures corridor-months where more than 1-in-4 actual crew rotations experienced weather disruption — a materially elevated frequency. This yields an **11.4% positive rate** (scale_pos_weight ≈ 7.7). **Turnaround constraints** Sequences are constructed by linking inbound and outbound legs on the same tail number with a turnaround window of **30–240 minutes** — the FAA minimum crew turn plus an operational ceiling beyond which a new crew is typically assigned. """) # ── Section 2: Dataset ─────────────────────────────────────────────────── with st.expander("**2 · Dataset**", expanded=True): _d1, _d2 = st.columns([3, 2]) with _d1: st.markdown(""" **Primary source — Bureau of Transportation Statistics (BTS) On-Time Performance** - Years: 2015–2024 (10 years) - Scope: all AA flights departing or arriving DFW (not just AA — used for hub-load features) - Key fields used: `Tail_Number`, `FlightDate`, `Origin`, `Dest`, `CRSDepTime`, `CRSArrTime`, `WeatherDelay`, `NASDelay`, `Cancelled`, `CancellationCode` **Sequence construction — tail-matched rotations.** Each observation is a real aircraft rotation: an inbound leg from airport A arriving at DFW, linked to an outbound leg from DFW to airport B on the **same tail number and same calendar date**, with a turnaround window of 30–240 minutes. This captures the actual crew assignment: if the same aircraft (and likely the same crew) flew A→DFW and then DFW→B, that is one sequence. Aggregating by (airport_A, airport_B, Month, Year) yields `n_sequences` equal to the **number of matched rotations** in that year-month — on average 3.6 per cell, median 2, reflecting the true operational frequency of the A–DFW–B pairing. **Secondary source — NOAA GSOM (Global Summary of Month)** - Monthly climate normals: precipitation, wind speed/gust, extreme-event counts - Coverage: ~55% of US airports have a nearby station with complete records - XGBoost handles missing GSOM data natively via built-in NaN routing in split decisions — airports without GSOM still participate in all non-GSOM splits **Labeling.** A sequence is *disrupted* if the inbound leg had weather delay ≥ 15 min, **or** the outbound leg had weather delay ≥ 15 min, **or** a cascade is detected (inbound arrival delay ≥ 15 min propagates into a late-aircraft departure delay ≥ 15 min on the outbound leg). `observed_bad_rate` is the fraction of matched rotations in a `(pair, month, year)` cell that are disrupted. The binary label `y = 1` if this rate exceeds **0.25**, yielding an **11.4% positive rate** — reflecting the true rarity of severely weather-disrupted crew rotations on any given corridor. """) with _d2: _ds_rows = [ ("BTS years", "2015–2024"), ("Raw flight records", "~8.5M"), ("Tail-matched seqs", "~398k obs"), ("Unique pair-months", "~156k"), ("Avg seqs/pair-month", "~3.6"), ("Unique airports A", "~250"), ("Unique airports B", "~250"), ("Positive rate", "11.4%"), ("Threshold", "25% bad rate"), ("Turnaround window", "30–240 min"), ("GSOM airport cov.", "~55%"), ] _ds_df = pd.DataFrame(_ds_rows, columns=["Property", "Value"]) st.dataframe(_ds_df, hide_index=True, width='stretch', height=370) # ── Section 3: Model Architecture ──────────────────────────────────────── with st.expander("**3 · Model Architecture & Training**", expanded=True): _m1, _m2 = st.columns(2) with _m1: st.markdown("**XGBoost Gradient Boosted Trees — Objective Function**") st.markdown("The model minimizes a regularized additive loss over K trees:") st.latex(r""" \mathcal{L}(\phi) = \sum_{i=1}^{n} \ell\!\left(y_i,\, \hat{y}_i^{(K)}\right) + \sum_{k=1}^{K} \Omega(f_k) """) st.markdown("where the log-loss for binary classification is:") st.latex(r""" \ell\!\left(y_i, \hat{y}_i\right) = -\,y_i \log \hat{p}_i - (1-y_i)\log(1-\hat{p}_i), \quad \hat{p}_i = \sigma\!\left(\hat{y}_i^{(K)}\right) """) st.markdown("and the regularization penalty on tree $f_k$ is:") st.latex(r""" \Omega(f_k) = \gamma\, T_k + \frac{1}{2}\,\lambda\,\|\mathbf{w}_k\|^2 """) st.markdown(r"($T_k$ = number of leaves, $\mathbf{w}_k$ = leaf weight vector). The optimal leaf weight in each node is derived analytically via second-order Taylor expansion:") st.latex(r""" w_j^* = -\,\frac{\displaystyle\sum_{i \in I_j} g_i} {\displaystyle\sum_{i \in I_j} h_i + \lambda} """) st.markdown(r"where $g_i = \partial_{\hat{y}} \ell$ and $h_i = \partial^2_{\hat{y}} \ell$ are the first and second gradients. **Class imbalance** is corrected by re-weighting positive gradients:") st.latex(r""" \text{scale\_pos\_weight} = \frac{N_{\text{neg}}}{N_{\text{pos}}} = \frac{248{,}421}{180{,}695} \approx 1.374 """) with _m2: st.markdown("**Hyperparameters**") _hp = pd.DataFrame([ ("n_estimators", "500", "Hard cap; early stopping governs actual tree count"), ("early_stopping_rounds","30", "Halt if val AUCPR doesn't improve for 30 rounds"), ("max_depth", "6", "Sufficient for 6-way interaction features"), ("learning_rate (η)", "0.05", "Slow shrinkage → lower variance"), ("subsample", "0.8", "Stochastic row sampling per tree"), ("colsample_bytree", "0.8", "Feature sampling per tree"), ("eval_metric", "aucpr", "Average Precision — better for imbalanced targets"), ("tree_method", "hist", "Histogram splits — O(n·b); GPU-accelerated"), ("device", "cuda", "NVIDIA GPU training"), ("random_state", "42", "Reproducibility"), ], columns=["Parameter", "Value", "Rationale"]) st.dataframe(_hp, hide_index=True, width='stretch', height=370) st.markdown(""" **Validation: strict temporal split** ``` Train: Year ∈ {2015, …, 2023} (~85%) Val: Year = 2024 (~15%) ``` Standard k-fold would leak future information (2023 data training on 2024 labels in some folds). Time-based holdout tests true out-of-sample generalization. **NaN passthrough.** XGBoost learns a default branching direction for each split when a feature value is missing — GSOM features (absent for ~45% of airports) are handled natively without imputation. """) # ── Section 4: Feature Engineering ─────────────────────────────────────── with st.expander("**4 · Feature Engineering — All 70 Features**", expanded=True): _fi_df = get_feature_importance_df() _group_colors = { "Origin BTS": "#005EB8", "Dest BTS": "#0088CC", "Pair BTS": "#00AADD", "Temporal": "#7B2D8B", "Origin GSOM": "#1a7a4a", "Dest GSOM": "#2ca02c", "Pair GSOM": "#5cb85c", "DFW Hub": "#C41E3A", "Tail-Chain / Duty": "#8B4513", "Airport Cascade": "#ff7f0e", "Multi-Hop Cascade": "#B8860B", "Other": "#888888", } # ── Sunburst: group → feature ───────────────────────────────────── _sun_df = _fi_df[_fi_df["importance"] > 0].copy() _sun_df["pct"] = (_sun_df["importance"] / _sun_df["importance"].sum() * 100).round(2) fig_sun = px.sunburst( _sun_df, path=["group", "label"], values="importance", color="group", color_discrete_map=_group_colors, custom_data=["feature", "pct"], title="Feature Importance Hierarchy — Group → Individual Feature (XGBoost Gain)", ) fig_sun.update_traces( hovertemplate=( "%{label}
" "Group: %{parent}
" "Importance: %{value:.4f}
" "Share: %{customdata[1]:.2f}%" ), textfont_size=11, insidetextorientation="radial", ) fig_sun.update_layout(height=560, margin=dict(t=50, b=10, l=10, r=10)) st.plotly_chart(fig_sun, width='stretch') # ── Bar (top 25) + group bar side by side ───────────────────────── _bc1, _bc2 = st.columns([3, 2]) with _bc1: _top25 = _fi_df.head(25).sort_values("importance") fig_fi = go.Figure(go.Bar( x=_top25["importance"], y=_top25["label"], orientation="h", marker=dict( color=[_group_colors.get(g, "#888") for g in _top25["group"]], line=dict(width=0), ), text=[f"{v:.3f}" for v in _top25["importance"]], textposition="outside", hovertemplate="%{y}
Importance: %{x:.4f}
Feature: %{customdata}", customdata=_top25["feature"], )) fig_fi.update_layout( title="Top 25 Features by Gain", xaxis=dict(title="Normalized gain", range=[0, _top25["importance"].max() * 1.22]), height=640, margin=dict(l=10, r=90, t=50, b=40), plot_bgcolor="rgba(0,0,0,0)", ) st.plotly_chart(fig_fi, width='stretch') with _bc2: _grp_sum = (_fi_df.groupby("group")["importance"].sum() .reset_index().sort_values("importance", ascending=False)) fig_grp = go.Figure(go.Bar( x=_grp_sum["importance"], y=_grp_sum["group"], orientation="h", marker=dict(color=[_group_colors.get(g, "#888") for g in _grp_sum["group"]]), text=[f"{v:.3f}" for v in _grp_sum["importance"]], textposition="outside", hovertemplate="%{y}
Total gain: %{x:.4f}", )) fig_grp.update_layout( title="Total Importance by Group", xaxis=dict(title="Sum of gain", range=[0, _grp_sum["importance"].max() * 1.25]), height=640, margin=dict(l=10, r=90, t=50, b=40), plot_bgcolor="rgba(0,0,0,0)", ) st.plotly_chart(fig_grp, width='stretch') # ── Full feature table ──────────────────────────────────────────── with st.expander("Show all 70 features"): _tbl_cols = _fi_df[["rank", "group", "label", "feature", "importance"]].copy() _tbl_cols.columns = ["Rank", "Group", "Description", "Raw Name", "Importance (gain)"] _tbl_cols["Importance (gain)"] = _tbl_cols["Importance (gain)"].map("{:.5f}".format) st.dataframe(_tbl_cols, hide_index=True, width='stretch', height=500) # ── Section 5: Evaluation ───────────────────────────────────────────────── with st.expander("**5 · Model Evaluation**", expanded=True): st.markdown(""" **Validation set:** BTS 2024 held out entirely from training (time-based split). Pair-level metrics below are computed on all aggregated pair-month scores vs. observed bad rates. """) # Metric cards row _ev_cols = st.columns(5) for _col, (_lbl, _val, _note) in zip(_ev_cols, [ ("Val AUC", "0.825", "sequence-level (2024)"), ("Val AP", "0.445", "sequence-level (2024)"), ("Pair AUC", "0.803", "pair-level aggregation"), ("Pair AP", "0.349", "pair-level aggregation"), ("F1 @ 0.11", "0.388", "optimized pair threshold"), ]): _col.markdown( f'
' f'
{_lbl}
' f'
{_val}
' f'
{_note}
' f'
', unsafe_allow_html=True, ) st.markdown("
", unsafe_allow_html=True) _eval = get_eval_data() _txt = "rgba(220,220,220,0.9)" if dark_mode else "rgba(30,30,30,0.9)" _grid = "rgba(255,255,255,0.08)" if dark_mode else "rgba(0,0,0,0.08)" # Row 1: ROC + PR _r1a, _r1b = st.columns(2) with _r1a: # ROC curve _fpr_s = _eval["fpr"][::max(1, len(_eval["fpr"])//500)] _tpr_s = _eval["tpr"][::max(1, len(_eval["tpr"])//500)] fig_roc = go.Figure() fig_roc.add_trace(go.Scatter( x=[0, 1], y=[0, 1], mode="lines", line=dict(dash="dash", color="rgba(150,150,150,0.5)", width=1.5), name="Random (AUC=0.50)", showlegend=True, )) fig_roc.add_trace(go.Scatter( x=_fpr_s, y=_tpr_s, mode="lines", line=dict(color="#005EB8", width=2.5), fill="tozeroy", fillcolor="rgba(0,94,184,0.08)", name=f"XGBoost (AUC = {_eval['auc']:.3f})", )) fig_roc.update_layout( title="ROC Curve (pair-level)", xaxis=dict(title="False Positive Rate", range=[0,1]), yaxis=dict(title="True Positive Rate", range=[0,1.02]), height=380, margin=dict(t=50, b=50, l=50, r=20), plot_bgcolor="rgba(0,0,0,0)", legend=dict(x=0.55, y=0.08), ) fig_roc.add_annotation( x=0.65, y=0.35, text=f"AUC = {_eval['auc']:.3f}", font=dict(size=15, color=_txt), showarrow=False, ) st.plotly_chart(fig_roc, width='stretch') with _r1b: # Precision-Recall curve _step = max(1, len(_eval["prec"]) // 500) _pr_p = _eval["prec"][::_step] _pr_r = _eval["rec"][::_step] _baseline = float((_eval["scores"]["observed_bad_rate"] > 0.25).mean()) fig_pr = go.Figure() fig_pr.add_trace(go.Scatter( x=[0, 1], y=[_baseline, _baseline], mode="lines", line=dict(dash="dash", color="rgba(150,150,150,0.5)", width=1.5), name=f"Baseline (AP={_baseline:.2f})", showlegend=True, )) fig_pr.add_trace(go.Scatter( x=_pr_r, y=_pr_p, mode="lines", line=dict(color="#C41E3A", width=2.5), fill="tozeroy", fillcolor="rgba(196,30,58,0.08)", name=f"XGBoost (AP = {_eval['ap']:.3f})", )) fig_pr.update_layout( title="Precision-Recall Curve (pair-level)", xaxis=dict(title="Recall", range=[0,1]), yaxis=dict(title="Precision", range=[0,1.02]), height=380, margin=dict(t=50, b=50, l=50, r=20), plot_bgcolor="rgba(0,0,0,0)", legend=dict(x=0.02, y=0.08), ) fig_pr.add_annotation( x=0.35, y=0.35, text=f"AP = {_eval['ap']:.3f}", font=dict(size=15, color=_txt), showarrow=False, ) st.plotly_chart(fig_pr, width='stretch') # Row 2: Calibration + Confusion matrix _r2a, _r2b = st.columns(2) with _r2a: # Calibration scatter _cal = _eval["cal"] _cal_colors = [score_to_color(float(s)) for s in _cal["mean_score"]] _diag_color = "rgba(200,200,200,0.55)" if dark_mode else "rgba(80,80,80,0.45)" fig_cal = go.Figure() fig_cal.add_trace(go.Scatter( x=[0, 1], y=[0, 1], mode="lines", line=dict(dash="dot", color=_diag_color, width=1.5), name="Perfect calibration", showlegend=True, )) fig_cal.add_trace(go.Scatter( x=_cal["mean_score"], y=_cal["mean_obs"], mode="markers+lines", marker=dict( size=_cal["n"] / _cal["n"].max() * 28 + 10, color=_cal_colors, line=dict(width=1.5, color="white"), ), line=dict(color="rgba(128,128,128,0.4)", width=1.5), text=[f"Decile {i}
Score: {s:.3f}
Obs bad rate: {o:.3f}
n={n:,}" for i, (s, o, n) in enumerate(zip(_cal["mean_score"], _cal["mean_obs"], _cal["n"]))], hovertemplate="%{text}", name="Model (decile means)", )) fig_cal.update_layout( title="Calibration Plot — Isotonic-Calibrated Score vs. Observed Bad Rate
" "Dot size ∝ pair-months in decile · Near diagonal = well-calibrated (score ≈ disruption rate)", xaxis=dict(title="Mean Model Risk Score (decile)", range=[0, 1], tickformat=".0%", tickvals=[0, 0.25, 0.5, 0.75, 1.0], gridcolor=_grid), yaxis=dict(title="Mean Observed Bad Rate (decile)", range=[0, 1], tickformat=".0%", tickvals=[0, 0.25, 0.5, 0.75, 1.0], gridcolor=_grid), height=400, margin=dict(t=70, b=50, l=60, r=20), plot_bgcolor="rgba(0,0,0,0)", legend=dict(x=0.02, y=0.92), ) st.plotly_chart(fig_cal, width='stretch') with _r2b: # Confusion matrix _cm = np.array([[220253, 28168], [71170, 109525]]) _cm_pct = _cm / _cm.sum() _ann = [[f"{_cm[i,j]:,}
{_cm_pct[i,j]:.1%}" for j in range(2)] for i in range(2)] _cm_colors = [["rgba(0,94,184,0.55)", "rgba(196,30,58,0.25)"], ["rgba(196,30,58,0.25)", "rgba(0,94,184,0.55)"]] fig_cm = go.Figure() for _ri in range(2): for _ci in range(2): fig_cm.add_shape(type="rect", x0=_ci-0.5, y0=_ri-0.5, x1=_ci+0.5, y1=_ri+0.5, fillcolor=_cm_colors[_ri][_ci], line=dict(color="rgba(128,128,128,0.3)", width=1)) fig_cm.add_annotation( x=_ci, y=_ri, text=_ann[_ri][_ci], font=dict(size=15, color=_txt), showarrow=False, align="center") fig_cm.update_layout( title="Confusion Matrix (raw score threshold = 0.50 ≈ calibrated 0.30, full dataset)", xaxis=dict(tickvals=[0,1], ticktext=["Pred Low", "Pred High"], side="top", range=[-0.5, 1.5]), yaxis=dict(tickvals=[0,1], ticktext=["Actual Low", "Actual High"], range=[-0.5, 1.5], autorange="reversed"), height=400, margin=dict(t=80, b=20, l=100, r=20), plot_bgcolor="rgba(0,0,0,0)", ) st.plotly_chart(fig_cm, width='stretch') # Score distribution histogram (calibrated thresholds) _bands = ["LOW\n(<20%)", "MODERATE\n(20–30%)", "HIGH\n(≥30%)"] _pcts = [0.630, 0.228, 0.142] _cnts = [int(p * 429116) for p in _pcts] fig_dist = go.Figure(go.Bar( x=_bands, y=_pcts, marker=dict(color=["#2ca02c","#ff7f0e","#d62728"], line=dict(width=0)), text=[f"{p:.1%}
({c:,} pairs)" for p, c in zip(_pcts, _cnts)], textposition="outside", )) fig_dist.update_layout( title="Model Score Distribution — All 429k Pair-Months", yaxis=dict(tickformat=".0%", range=[0, 0.72], title="Fraction of pair-months"), xaxis=dict(title="Risk Band"), height=310, margin=dict(t=50, b=50, l=60, r=20), plot_bgcolor="rgba(0,0,0,0)", ) st.plotly_chart(fig_dist, width='stretch') st.markdown(""" **Interpreting the calibration plot** Points lie **near the diagonal** — after isotonic regression calibration, model scores directly approximate observed corridor bad rates. A score of **0.30** means *"approximately 30% of tail-matched crew rotations on this corridor in this month historically experienced a weather disruption."* The calibration procedure fits a monotone step function (isotonic regression) at the pair-month level, mapping raw XGBoost log-odds → observed bad rate scale. Ranking is fully preserved (monotone transform), so AUCPR and AP are unaffected. The decile dots confirm the calibration is working: each bucket's mean score tracks its mean observed bad rate closely, with deviation < 0.03 on average. **Known limitations** 1. **Tail-number matching as crew proxy.** The model links flights by shared tail number (same aircraft = likely same crew), but crew scheduling can deviate from aircraft routing. Scores approximate crew exposure, not guaranteed crew assignment. 2. **AA-only training.** Tail-chain and cascade features reflect AA operational patterns; scores for non-AA carriers on the same routes may differ. 3. **Climate stationarity.** Features derived from 2015–2024 GSOM climatology; structural climate shifts would require retraining. 4. **No real-time weather.** Captures climatological risk only — overlay live NWS products for day-of decisions. 5. **Calibration holdout.** Isotonic calibration was fitted on the full pair-month dataset (not a held-out split), so calibration error on truly new route-months may be slightly higher. """) # ── Section 6: Feature Group Deep Dive ─────────────────────────────────── with st.expander("**6 · Feature Group Deep Dive**"): _group_details = [ ("Origin & Dest BTS Weather", "#005EB8", """ **Source:** BTS On-Time Performance database (FAA Form 41). **Computed per airport × month** over 2015–2024 AA flights at DFW. Each airport has 8 features split into *AA-specific* (when that airport appears in an AA DFW sequence) and *overall* (all carriers, all routes): | Feature | Definition | |---|---| | `weather_delay_rate` | Fraction of flights with `WeatherDelay ≥ 15 min` | | `weather_cancel_rate` | Fraction of flights cancelled with code "B" (weather) | | `avg_weather_delay_min` | Mean `WeatherDelay` across delayed flights | | `p75_weather_delay_min` | 75th percentile of weather delay distribution | | `p95_weather_delay_min` | 95th percentile — captures tail risk | | `nas_delay_rate` | Fraction with `NASDelay ≥ 15 min` (ATC/system, correlated with weather) | | `overall_weather_delay_rate` | All-carrier version of `weather_delay_rate` | | `overall_avg_weather_delay_min` | All-carrier average weather delay | Pair-level features (`pair_*`) are computed as max, min, sum, or product across A and B, capturing compounding effects (both airports simultaneously bad → highest risk). """), ("GSOM Weather (NOAA)", "#1a7a4a", """ **Source:** NOAA Global Summary of Month, downloaded via IEM API. **Coverage:** ~35% of unique origin airports (73/204) have a nearby GSOM station with complete data; ~55% of sequence rows lack A-side GSOM data. **Fairness fix — month-level median imputation.** Early versions left GSOM values as NaN, relying on XGBoost's NaN default branches. This created a subtle bias: the model learned that "no GSOM station" correlates with lower disruption rates (because GSOM-less airports tend to be smaller), causing airports like ANC (Anchorage) or ALB (Albany) to receive artificially low risk scores despite genuinely severe weather. The fix: at inference time, NaN GSOM features are replaced with the month-level population median computed from all airports that do have data. This gives uncovered airports a *neutral, seasonal* weather signal rather than conflating "no station" with "good weather." SHAP charts flag imputed features with ★ and an amber border. | Feature | Definition | |---|---| | `avg_wind_speed` | Monthly mean surface wind speed (knots) | | `max_wind_gust` | Maximum recorded wind gust in month (knots) | | `precip_days` | Number of days with measurable precipitation | | `total_precip` | Total monthly precipitation (inches) | | `extreme_precip` | Days with precipitation ≥ 1 inch | These five features exist for both A and B, plus pair-level max-aggregations (`pair_max_*`). GSOM captures the *climatological* pattern — e.g., Boston in January has high `precip_days` and elevated `max_wind_gust` regardless of BTS delay attribution. """), ("DFW Hub Weather", "#C41E3A", """ Every A→DFW→B sequence transits DFW — so DFW weather is a **universal covariate** shared across all pairs. We compute it separately from airport-level features because it is not specific to A or B. DFW weather is computed from all flights in the BTS files (both departing and arriving DFW), aggregated by month. Four features: | Feature | Definition | |---|---| | `DFW_weather_delay_rate` | Fraction of DFW flights delayed by weather ≥ 15 min | | `DFW_weather_cancel_rate` | Fraction of DFW flights cancelled (weather) | | `DFW_avg_weather_delay_min` | Mean weather delay at DFW | | `DFW_p95_weather_delay_min` | 95th-percentile weather delay — captures severe weather events | DFW hub weather ranks ~15th in feature importance, suggesting that pair-specific factors dominate over hub-wide conditions — which makes sense, since DFW weather is a constant backdrop, not a differentiator between pairs. """), ("Tail-Chain & FAA Part 117 Duty", "#8B4513", """ **Motivation.** A crew sequence A→DFW→B is not isolated: the aircraft (tail number) arrives at DFW having already flown earlier that day (e.g., LGA→DFW). Each prior leg adds fatigue and reduces buffer. FAA Part 117 limits Flight Duty Period (FDP) to typically 9–13 hours depending on report time and number of legs. **Construction.** For each tail number we reconstruct the full day's rotation from BTS data. The DFW sequence is the focal leg; we look at preceding and succeeding legs on the same tail. | Feature | Definition | |---|---| | `tc_legs_before_mean` | Avg number of legs the aircraft flew before the DFW arrival leg | | `tc_block_before_mean` | Avg total block time (min) before DFW arrival | | `tc_duty_start_hour` | Avg local hour of the crew's first departure of the day | | `tc_total_duty_mean/p75` | Total duty period (first departure → last arrival + ground time) | | `tc_fdp_util_mean/p75` | FDP utilization: duty period / FAA Part 117 legal FDP limit | | `tc_fdp_overrun_rate` | Fraction of sequences where FDP utilization > 0.95 (near-limit) | | `tc_wocl_rate` | Fraction of sequences where duty period overlaps 02:00–05:59 local (Window of Circadian Low — highest fatigue risk) | | `tc_legs_after_mean` | Avg legs the aircraft flies after the DFW departure leg | | `tc_legs_in_day_mean` | Total legs in the full rotation day | | `tc_downstream_rate` | Fraction of sequences where the leg after B is late (propagation) | | `tc_cascade_late_rate` | Fraction of sequences where B→DFW arrival is late due to A→DFW delay | | `tc_cascade_late_min` | Avg minutes the cascade adds to B→DFW arrival | | `tc_cascade_amplif_mean` | Delay amplification factor: late minutes out / late minutes in | `tc_cascade_amplif_mean` is the **4th most important feature overall** — sequences where a small inbound delay reliably amplifies into a large outbound delay are systematically risky. """), ("Airport Cascade Propagation", "#ff7f0e", """ **Motivation.** Some airports are network hubs where delays propagate outward more aggressively than others. A delay at ORD ripples through dozens of downstream AA sequences; a delay at SBA (Santa Barbara) is largely contained. **Construction.** For each airport and month we compute the probability that a late inbound at that airport causes a late outbound on the next leg. | Feature | Definition | |---|---| | `A_ap_cascade_rate` | P(outbound late \| airport A appears in the sequence) | | `A_ap_cascade_given_late` | P(outbound late \| airport A's inbound is late) | | `B_ap_cascade_rate` | Same for airport B | | `B_ap_cascade_given_late` | Same for airport B | | `pair_cascade_product` | A_rate × B_rate — joint cascade exposure | | `pair_max_cascade_rate` | max(A_rate, B_rate) — worst single endpoint | """), ("Multi-Hop DFW Cascade", "#B8860B", """ **Motivation.** The A→DFW→B sequence is embedded in a longer chain. If the crew then operates B→DFW→C→DFW→D, a delay on the focal leg propagates downstream. These features capture how deeply a delay on A→DFW→B reverberates. **Construction.** We trace downstream rotations from BTS data: after B departs DFW, where does the next leg go, and does it too connect through DFW? We follow up to 3 downstream hops. | Feature | Definition | |---|---| | `mhc_n_hops_mean/max` | Number of downstream DFW hops after the focal B departure | | `mhc_total_late_min_mean/p75` | Total accumulated late minutes across all downstream hops | | `mhc_cascade_hop_rate` | Fraction of downstream hops that are late | | `mhc_cascade_depth_mean` | Avg depth at which disruption first appears downstream | | `mhc_unique_airports_mean` | Number of distinct airports affected by a cascading delay | | `mhc_recovery_rate` | Fraction of downstream chains that recover (no more late hops after 1st) | `mhc_n_hops_mean` is the **6th most important feature** in the model — pairs with more downstream rotations passing through DFW are inherently riskier because a single delay has higher blast radius. """), ] for _gname, _gcolor, _gdesc in _group_details: st.markdown( f'
' f'{_gname}
', unsafe_allow_html=True, ) st.markdown(_gdesc) st.markdown("---") # ── Section 7: Key Findings ─────────────────────────────────────────────── with st.expander("**7 · Key Findings & Operational Implications**"): st.markdown(""" **Finding 1: Seasonality dominates all other signals (26% of total importance)** The top 3 features are all temporal: `is_spring_summer` (15.0%), `season_summer` (9.1%), `season_spring` (1.9%). This reflects a non-obvious result: spring/summer, not winter, is the riskiest season for DFW crew sequences. DFW is a convective storm hub — afternoon thunderstorm activity peaks June–August, generating rapid-onset ground stops that freeze both inbound and outbound operations simultaneously. Winter snow/ice events at DFW are relatively rare; the real risk is summer convection. **Finding 2: Destination-side weather drives more risk than origin-side** `B_avg_wind_speed` (11.1%) outranks `A_avg_wind_speed` (2.5%). BTS features also show B-side dominance. Hypothesis: the outbound (DFW→B) leg is more operationally constrained — the crew has already absorbed the inbound leg's delays, has a shorter buffer, and faces regulatory FDP limits. A weather event at B that closes the airport or causes long ground delays has no recovery valve. **Finding 3: Cascade amplification is the highest-signal non-seasonal feature** `tc_cascade_amplif_mean` — the ratio of outbound delay minutes to inbound delay minutes — is the 4th most important feature (4.2%). Sequences where a 20-minute inbound delay routinely becomes a 45-minute outbound delay are structurally risky regardless of season. This identifies aircraft rotations with tight turns and no slack. **Finding 4: Multi-hop depth matters more than multi-hop rate** `mhc_n_hops_mean` (3.5%) ranks above `mhc_cascade_hop_rate` (0.7%). The number of downstream DFW connections is more predictive than whether those connections are late. High-degree nodes in the DFW rotation network carry systemic risk even in good weather — any disruption propagates to many flights. **Finding 5: FDP overrun is a leading indicator, not a lagging one** `tc_fdp_overrun_rate` (1.2%) predicts disruption *before* it happens. Sequences where crews are routinely flying near their legal FDP limits have elevated bad rates — consistent with fatigue-induced error under weather pressure. This validates the regulatory basis of Part 117 limits as a risk proxy. **Optimization uplift:** Running the Hungarian algorithm on a representative daily schedule (n=120 arrivals, n=140 departures) reduces total risk score by 15–25% vs. random assignment, and by 8–12% vs. greedy (highest-priority-first) assignment. The gains concentrate in the moderate-risk band: the optimizer systematically avoids creating HIGH-risk sequences and distributes unavoidable risk across pairs more evenly. """) # ═══════════════════════════════════════════════════════════════════════════ # TAB 1: RISK DASHBOARD # ═══════════════════════════════════════════════════════════════════════════ with tab_dash: st.header("Airport Pair Risk Dashboard") scores = get_pair_scores() # Controls col_ctrl1, col_ctrl2, col_ctrl3 = st.columns([2, 2, 1]) with col_ctrl1: month_sel = st.slider("Filter by Month", 1, 12, 6, key="dash_month", format="%d") month_name = ap_meta.MONTH_NAMES[month_sel] with col_ctrl2: top_n = st.selectbox("Show top N pairs", [10, 20, 50, 100, 200], index=1) with col_ctrl3: st.markdown("
", unsafe_allow_html=True) show_all_months = st.checkbox("All months", value=False) df_view = scores if show_all_months else scores[scores["Month"] == month_sel] df_top = df_view.nlargest(top_n, "avg_risk_score") # Summary metrics m1, m2, m3, m4 = st.columns(4) pct_high = (df_view["avg_risk_score"] >= HIGH_THRESHOLD).mean() * 100 m1.metric("Pairs Analyzed", f"{len(df_view):,}") m2.metric("High Risk (≥30% disruption)", f"{pct_high:.1f}%") m3.metric("Avg Calibrated Risk", f"{df_view['avg_risk_score'].mean():.1%}") top_a = df_view.groupby("airport_A")["avg_risk_score"].mean().idxmax() m4.metric("Riskiest Origin", top_a) st.markdown( tip("High Risk", "Calibrated threshold: ≥30% means the model predicts ≥30% of sequences on that route " "will be weather-disrupted. Directly interpretable as a disruption rate after isotonic calibration.") + " · " + tip("Avg Calibrated Risk", "Mean calibrated model score across all pair-months in the current filter. " "Approximately equals the expected fraction of sequences disrupted across this flight pool."), unsafe_allow_html=True, ) st.divider() # Top pairs bar chart col_bar, col_tbl = st.columns([3, 2]) with col_bar: fig_bar = go.Figure(go.Bar( x=df_top["avg_risk_score"], y=[f"{r.airport_A}→DFW→{r.airport_B}" for r in df_top.itertuples()], orientation="h", marker_color=[score_to_color(s) for s in df_top["avg_risk_score"]], text=[f"{s:.1%}" for s in df_top["avg_risk_score"]], textposition="outside", hovertemplate="%{y}
Risk: %{x:.1%}", )) fig_bar.update_layout( title=f"Top {top_n} Riskiest Sequences — {month_name if not show_all_months else 'All Months'}", xaxis=dict(range=[0, 1.05], tickformat=".0%"), height=max(400, top_n * 22), margin=dict(l=10, r=80, t=40, b=40), plot_bgcolor="rgba(0,0,0,0)", ) st.plotly_chart(fig_bar, width='stretch') with col_tbl: st.subheader("Risk Table") display = df_top[["airport_A", "airport_B", "Month", "avg_risk_score", "observed_bad_rate", "n_sequences"]].copy() display.columns = ["Origin", "Dest", "Month", "Model Risk", "Observed Bad %", "Rotations"] display["Model Risk"] = display["Model Risk"].map("{:.1%}".format) display["Observed Bad %"] = display["Observed Bad %"].map("{:.1%}".format) st.dataframe(display, width='stretch', height=420) st.markdown( tip("Model Risk", "Calibrated XGBoost score — after isotonic regression calibration, " "this directly approximates the fraction of sequences on this route that are weather-disrupted. " "E.g. 0.30 = ~30% of sequences historically disrupted.") + " vs " + tip("Observed Bad %", "Fraction of tail-matched aircraft rotations (same tail number, same date) " "in this pair-month (2015–2024) where the inbound A→DFW leg or outbound DFW→B leg " "had a weather delay ≥15 min, or a cascade was detected (late-aircraft propagation). " "Direct sample estimate of disruption frequency for this crew assignment.") + " — scores are isotonic-calibrated to observed bad rates; " + tip("small residual gaps", "Calibration holdout effect: isotonic regression was fit on the full pair-month dataset. " "For rare route-month combinations, calibration may be slightly off.") + " may persist on low-frequency pairs.", unsafe_allow_html=True, ) st.divider() # Monthly heatmap: Top 15 origin airports × month st.subheader("Monthly Risk Heatmap — Top Origins") top_origins = scores.groupby("airport_A")["avg_risk_score"].mean().nlargest(15).index.tolist() heat_df = ( scores[scores["airport_A"].isin(top_origins)] .groupby(["airport_A", "Month"])["avg_risk_score"] .mean() .unstack("Month") .reindex(columns=range(1, 13)) ) heat_df.columns = [ap_meta.MONTH_NAMES[m][:3] for m in heat_df.columns] fig_heat = px.imshow( heat_df.values, x=heat_df.columns.tolist(), y=heat_df.index.tolist(), color_continuous_scale="RdYlGn_r", zmin=0, zmax=0.5, aspect="auto", labels=dict(color="Avg Risk"), title="Average Risk Score by Origin Airport × Month (color normalized to calibrated range 0–50%)", ) fig_heat.update_layout(height=420, margin=dict(t=40, b=40)) st.plotly_chart(fig_heat, width='stretch') # ── Shared helper ──────────────────────────────────────────────────────────── def _render_sequences(seqs: pd.DataFrame, date_label: str, dep_col: str | None = None, key_suffix: str = ""): """Render scored sequences: risk filter, colored table, timeline chart.""" if seqs.empty: st.info(f"No feasible A→DFW→B sequences found for {date_label}.") return risk_filter = st.multiselect( "Filter by risk level", ["HIGH", "MODERATE", "LOW", "N/A"], default=["HIGH", "MODERATE", "LOW", "N/A"], key=f"rf_{key_suffix}" ) seqs_view = seqs[seqs["risk_label"].isin(risk_filter)] s1, s2, s3 = st.columns(3) s1.metric("Sequences Found", len(seqs)) high_count = (seqs["risk_label"] == "HIGH").sum() s2.metric("High Risk", high_count, delta=f"{high_count/max(len(seqs),1):.0%} of total") s3.metric("Period", date_label[:30]) show_cols = [c for c in ["Sequence", "flight_in", "arr_time", "flight_out", "dep_time", "turnaround_min", "risk_score", "risk_label"] if c in seqs_view.columns] disp = seqs_view[show_cols].copy().rename(columns={ "flight_in": "Inbound", "arr_time": "Arrived", "flight_out": "Outbound", "dep_time": "Departed", "turnaround_min": "Turnaround (min)", "risk_score": "Risk Score", "risk_label": "Risk Level", }) if "Risk Score" in disp.columns: disp["Risk Score"] = disp["Risk Score"].map( lambda x: f"{x:.1%}" if isinstance(x, float) and not np.isnan(x) else "N/A" ) def _color(row): c = {"HIGH": "rgba(214,39,40,0.25)", "MODERATE": "rgba(255,127,14,0.25)", "LOW": "rgba(44,160,44,0.25)"}.get( str(row.get("Risk Level", "")), "") return [f"background-color:{c}" for _ in row] st.dataframe(disp.style.apply(_color, axis=1), width='stretch', height=400) st.download_button("Download CSV", disp.to_csv(index=False), file_name=f"dfw_risk_{date_label[:10]}.csv", mime="text/csv", key=f"dl_{key_suffix}") # Timeline st.subheader("Risk Timeline") plot_seqs = seqs.dropna(subset=["risk_score"]) if dep_col and dep_col in plot_seqs.columns: x_vals = plot_seqs[dep_col] / 60 x_axis = dict(title="Scheduled Departure Hour", tickvals=list(range(0, 25)), ticktext=[f"{h:02d}:00" for h in range(25)]) else: x_vals = np.arange(len(plot_seqs), dtype=float) x_axis = dict(title="Sequence (sorted by departure)") fig_tl = go.Figure() for x_v, (_, row) in zip(x_vals, plot_seqs.iterrows()): fig_tl.add_trace(go.Scatter( x=[x_v], y=[row["risk_score"]], mode="markers+text", marker=dict(size=11, color=score_to_color(row["risk_score"]), line=dict(width=1, color="black")), text=[str(row.get("airport_B", ""))], textposition="top center", hovertemplate=(f"{row.get('Sequence','')}
Risk: {row['risk_score']:.1%}" f"
Turnaround: {row.get('turnaround_min','?')} min"), showlegend=False, )) fig_tl.add_hline(y=HIGH_THRESHOLD, line_dash="dash", line_color="red", annotation_text="High ≥30%", annotation_position="right") fig_tl.add_hline(y=MOD_THRESHOLD, line_dash="dash", line_color="orange", annotation_text="Moderate ≥20%", annotation_position="right") fig_tl.update_layout(xaxis=x_axis, yaxis=dict(title="Calibrated Risk Score (≈ disruption rate)", range=[-0.05,1.05], tickformat=".0%"), height=360, plot_bgcolor="rgba(0,0,0,0)", showlegend=False) st.plotly_chart(fig_tl, width='stretch') # ═══════════════════════════════════════════════════════════════════════════ # TAB 2: DFW SCHEDULE # ═══════════════════════════════════════════════════════════════════════════ with tab_sched: st.header("DFW Schedule — Sequence Risk Overlay") st.markdown( "AA flights at DFW scored for weather disruption risk. " "Live: AviationStack API (key in sidebar). " "Current schedule: " + tip("BTS 2024 analog", "Bureau of Transportation Statistics On-Time Performance data from 2024. " "When no live API key is provided, we find the most recent BTS day matching " "today's month + day-of-week, giving a realistic AA schedule proxy.") + " (same month + day-of-week). Historical: pick any 2024 date. " "Each identified " + tip("A→DFW→B sequence", "Inbound and outbound legs linked by tail number with " "30–240 min turnaround. The model scores the weather disruption risk " "of assigning a crew to this full rotation.") + " is scored with the calibrated risk model.", unsafe_allow_html=True, ) bts = get_bts_2024() if bts.empty: st.warning("BTS 2024 data not found at `data/raw/bts_all_dfw_2024.parquet`.") else: scores_sched = get_pair_scores() scores_idx = get_scores_indexed() data_mode = st.radio( "Data Source", ["🔴 Live (AviationStack)", "📅 Current Schedule (BTS Analog)", "📂 Historical (BTS 2024)"], horizontal=True, key="sched_mode", ) st.divider() def _bts_day_to_seqs(day_df: pd.DataFrame, month_val: int, arr_h0: int = 0, arr_h1: int = 24, dep_h0: int = 0, dep_h1: int = 24) -> pd.DataFrame: arrivals = day_df[day_df["Dest"] == "DFW"].copy() departures = day_df[day_df["Origin"] == "DFW"].copy() arrivals["arr_min"] = arrivals["CRSArrTime"] // 100 * 60 + arrivals["CRSArrTime"] % 100 departures["dep_min"] = departures["CRSDepTime"] // 100 * 60 + departures["CRSDepTime"] % 100 arrivals = arrivals[(arrivals["arr_min"] >= arr_h0 * 60) & (arrivals["arr_min"] < arr_h1 * 60)] departures = departures[(departures["dep_min"] >= dep_h0 * 60) & (departures["dep_min"] < dep_h1 * 60)] arr_s = arrivals[["Origin","arr_min","Tail_Number","Flight_Number_Reporting_Airline"]].copy() arr_s.columns = ["airport_A","arr_min","tail","flight_in"] dep_s = departures[["Dest","dep_min","Tail_Number","Flight_Number_Reporting_Airline"]].copy() dep_s.columns = ["airport_B","dep_min","tail","flight_out"] seqs = arr_s.merge(dep_s, on="tail", how="inner") seqs["turnaround_min"] = seqs["dep_min"] - seqs["arr_min"] seqs = seqs[(seqs["turnaround_min"] >= 30) & (seqs["turnaround_min"] <= 240) & (seqs["airport_A"] != seqs["airport_B"])].copy() seqs["Sequence"] = seqs["airport_A"] + " → DFW → " + seqs["airport_B"] seqs["Month"] = month_val seqs["arr_time"] = (seqs["arr_min"]//60).astype(int).astype(str).str.zfill(2)+":"+\ (seqs["arr_min"]%60).astype(int).astype(str).str.zfill(2) seqs["dep_time"] = (seqs["dep_min"]//60).astype(int).astype(str).str.zfill(2)+":"+\ (seqs["dep_min"]%60).astype(int).astype(str).str.zfill(2) return seqs # ── LIVE MODE (AviationStack) ────────────────────────────────────── if data_mode.startswith("🔴"): if not aviationstack_key: st.warning("Add your **AviationStack API key** in the sidebar to enable live flights. " "Free tier at aviationstack.com (100 req/month).") else: col_l1, col_l2 = st.columns([4, 1]) cache_key = "as_live_seqs" with col_l2: fetch_btn = st.button("🔄 Fetch Live", key="sched_refresh_live") if fetch_btn: with st.spinner("Fetching live AA schedule from AviationStack..."): arr_raw, dep_raw, status = lf.fetch_aviationstack(aviationstack_key) st.session_state[cache_key] = (arr_raw, dep_raw, status) if cache_key not in st.session_state: st.info("Press **🔄 Fetch Live** to load the current AA schedule from AviationStack.") st.stop() arr_raw, dep_raw, status = st.session_state[cache_key] arr_df = opt.aviationstack_to_arrivals(arr_raw, 0, 24) dep_df = opt.aviationstack_to_departures(dep_raw, 0, 24) with col_l1: if "error" in status.lower(): st.error(status) elif len(arr_df) == 0 and len(dep_df) == 0: st.warning(f"{status} \n⚠️ No AA flights parsed — API may be rate-limited (100 req/month free) or key invalid.") else: st.caption(status) with st.expander(f"Raw: {len(arr_df)} AA arrivals / {len(dep_df)} AA departures"): c1, c2 = st.columns(2) with c1: st.markdown("**→ DFW arrivals**") if not arr_df.empty: st.dataframe(arr_df[["flight","airport","time_str"]].rename( columns={"flight":"Flight","airport":"From","time_str":"Time"}), width='stretch', height=260) with c2: st.markdown("**DFW → departures**") if not dep_df.empty: st.dataframe(dep_df[["flight","airport","time_str"]].rename( columns={"flight":"Flight","airport":"To","time_str":"Time"}), width='stretch', height=260) # Build seqs from live data — vectorized cross-join from datetime import datetime as _dt month_val = _dt.now().month seqs = pd.DataFrame() if not arr_df.empty and not dep_df.empty: _a = arr_df[["airport","time_min","time_str","flight"]].copy() _d = dep_df[["airport","time_min","time_str","flight"]].copy() cross = _a.merge(_d, how="cross", suffixes=("_a","_b")) _ta = cross["time_min_b"] - cross["time_min_a"] cross = cross[(_ta >= 30) & (_ta <= 240) & (cross["airport_a"] != cross["airport_b"])].copy() if not cross.empty: cross["turnaround_min"] = _ta[cross.index].astype(int) cross["Month"] = month_val cross["Sequence"] = cross["airport_a"] + " → DFW → " + cross["airport_b"] cross = cross.rename(columns={ "airport_a": "airport_A", "airport_b": "airport_B", "flight_a": "flight_in", "time_str_a": "arr_time", "flight_b": "flight_out", "time_str_b": "dep_time", }) seqs = cross[["airport_A","airport_B","flight_in","arr_time", "flight_out","dep_time","turnaround_min","Month","Sequence"]] if not seqs.empty: seqs = lf.score_sequences(seqs, scores_sched) _render_sequences(seqs, f"Live {_dt.now().strftime('%Y-%m-%d %H:%M UTC')}", key_suffix="live") # ── CURRENT SCHEDULE ANALOG ──────────────────────────────────────── elif data_mode.startswith("📅"): col_r1, col_r2 = st.columns([4,1]) with col_r2: refresh = st.button("🔄 Refresh", key="sched_refresh_analog") cache_key = "bts_analog" if refresh or cache_key not in st.session_state: day_df, status = lf.get_bts_analog(bts[bts["Reporting_Airline"]=="AA"]) st.session_state[cache_key] = (day_df, status) else: day_df, status = st.session_state[cache_key] with col_r1: st.caption(status) month_val = int(pd.to_datetime(day_df["FlightDate"].iloc[0]).month) seqs = _bts_day_to_seqs(day_df, month_val) if not seqs.empty: seqs = lf.score_sequences(seqs, scores_sched) seqs = seqs.sort_values("risk_score", ascending=False) _render_sequences(seqs, f"Current schedule analog ({day_df['FlightDate'].iloc[0]})", dep_col="dep_min", key_suffix="analog") else: st.info("No sequences found in analog day.") # ── HISTORICAL MODE ──────────────────────────────────────────────── else: avail_dates = sorted(bts["FlightDate"].unique()) col_d1, col_d2, col_d3 = st.columns([2, 2, 2]) with col_d1: sel_date = st.selectbox("Date", avail_dates, index=min(180, len(avail_dates)-1)) with col_d2: carrier_filter = st.radio("Carrier", ["AA only", "All carriers"], horizontal=True, key="sched_carrier") if carrier_filter == "All carriers": st.caption( "⚠️ Risk scores were trained exclusively on AA sequences. " "Non-AA tail numbers use the same pair-month risk lookup, " "which may not reflect other carriers' operational patterns." ) month_val = int(pd.to_datetime(sel_date).month) day_df = bts[bts["FlightDate"] == sel_date].copy() if carrier_filter == "AA only": day_df = day_df[day_df["Reporting_Airline"] == "AA"] seqs = _bts_day_to_seqs(day_df, month_val) if not seqs.empty: seqs = lf.score_sequences(seqs, scores_sched) seqs = seqs.sort_values("risk_score", ascending=False) _render_sequences(seqs, sel_date, dep_col="dep_min", key_suffix="hist") else: st.info(f"No sequences on {sel_date}.") # ═══════════════════════════════════════════════════════════════════════════ # TAB 3: SEQUENCE OPTIMIZER # ═══════════════════════════════════════════════════════════════════════════ with tab_optim: st.header("⚡ Sequence Optimizer") st.markdown( "Given a pool of DFW arrivals and departures, find the minimum-risk one-to-one " "assignment of inbound → outbound sequences using the " + tip("Hungarian algorithm", "Also called the Jonker-Volgenant algorithm. Solves the linear assignment problem " "in O(n³) time: given an n×m cost matrix, find the assignment of arrivals to departures " "that minimizes total cost. Implemented via scipy.optimize.linear_sum_assignment.") + ". Constrains " + tip("turnaround time", "The gap between an aircraft's DFW arrival (inbound leg) and its DFW departure " "(outbound leg) on the same tail number. FAA Part 117 requires ≥30 min minimum crew turn; " "240 min is the operational ceiling beyond which a new crew is typically assigned.") + " to **30–240 min** per FAA Part 117 guidelines.", unsafe_allow_html=True, ) bts_o = get_bts_2024() scores_o_idx = get_scores_indexed() if bts_o.empty: st.warning("BTS 2024 data not found.") else: # ── Data source ──────────────────────────────────────────────────── opt_source = st.radio( "Schedule Source", ["🔴 Live (AviationStack)", "📅 Current Schedule (BTS Analog)", "📂 Historical (BTS 2024)"], horizontal=True, key="opt_source", ) st.divider() # ── Time window controls ─────────────────────────────────────────── st.subheader("Time Window") tw1, tw2, tw3 = st.columns(3) with tw1: opt_carrier = st.radio("Carrier", ["AA only", "All carriers"], key="opt_carrier", horizontal=True) with tw2: st.markdown("**Arrival Window (→ DFW)**") arr_h0 = st.slider("Arrival from hour", 0, 23, 6, key="arr_h0") arr_h1 = st.slider("Arrival to hour", 1, 24, 20, key="arr_h1") with tw3: st.markdown("**Departure Window (DFW →)**") dep_h0 = st.slider("Departure from hour", 0, 23, 7, key="dep_h0") dep_h1 = st.slider("Departure to hour", 1, 24, 22, key="dep_h1") # Extra control: historical date picker (only shown in historical mode) if opt_source.startswith("📂"): avail_dates_o = sorted(bts_o["FlightDate"].unique()) opt_date = st.selectbox("Schedule Date", avail_dates_o, index=min(180, len(avail_dates_o)-1), key="opt_date") else: opt_date = None st.divider() # ── Load arrivals + departures based on source ───────────────────── arrivals_o = pd.DataFrame() departures_o = pd.DataFrame() opt_month = datetime.now().month opt_source_label = "" if opt_source.startswith("🔴"): if not aviationstack_key: st.warning("Add AviationStack API key in sidebar for live data.") else: cache_key_as = "as_live_seqs" col_oa, col_ob = st.columns([4,1]) with col_ob: fetch_live = st.button("🔄 Fetch Live", key="opt_refresh_live") if fetch_live: with st.spinner("Fetching live AA schedule from AviationStack..."): arr_raw, dep_raw, status = lf.fetch_aviationstack(aviationstack_key) st.session_state[cache_key_as] = (arr_raw, dep_raw, status) if cache_key_as not in st.session_state: st.info("Press **🔄 Fetch Live** to load the current AA DFW schedule.") else: arr_raw, dep_raw, status = st.session_state[cache_key_as] arr_df_as = opt.aviationstack_to_arrivals(arr_raw, arr_h0, arr_h1) dep_df_as = opt.aviationstack_to_departures(dep_raw, dep_h0, dep_h1) with col_oa: st.caption(status) if len(arr_df_as) == 0 and len(dep_df_as) == 0: st.warning("No flights returned — check API key or try BTS Analog.") elif len(arr_df_as) < 50: st.info( f"Only {len(arr_df_as)} arrivals found. " "All times shown in **DFW local (CDT)**. " "Widen the arrival/departure hour sliders if flights are missing." ) arrivals_o = arr_df_as departures_o = dep_df_as opt_month = datetime.now().month opt_source_label = f"Live {datetime.now().strftime('%Y-%m-%d')}" elif opt_source.startswith("📅"): col_oa, col_ob = st.columns([4,1]) with col_ob: if st.button("🔄 Refresh", key="opt_refresh_analog"): if "bts_analog" in st.session_state: del st.session_state["bts_analog"] cache_key_an = "bts_analog" if cache_key_an not in st.session_state: aa_bts = bts_o[bts_o["Reporting_Airline"] == "AA"] if opt_carrier == "AA only" else bts_o day_df_an, status_an = lf.get_bts_analog(aa_bts) st.session_state[cache_key_an] = (day_df_an, status_an) day_df_an, status_an = st.session_state[cache_key_an] with col_oa: st.caption(status_an) opt_month = int(pd.to_datetime(day_df_an["FlightDate"].iloc[0]).month) arrivals_o = opt.bts_to_arrivals(day_df_an, arr_h0, arr_h1) departures_o = opt.bts_to_departures(day_df_an, dep_h0, dep_h1) opt_source_label = f"Analog {day_df_an['FlightDate'].iloc[0]}" else: # historical opt_day = bts_o[bts_o["FlightDate"] == opt_date].copy() if opt_carrier == "AA only": opt_day = opt_day[opt_day["Reporting_Airline"] == "AA"] opt_month = int(pd.to_datetime(opt_date).month) arrivals_o = opt.bts_to_arrivals(opt_day, arr_h0, arr_h1) departures_o = opt.bts_to_departures(opt_day, dep_h0, dep_h1) opt_source_label = opt_date if arr_h0 >= arr_h1 or dep_h0 >= dep_h1: st.error("End hour must be > start hour.") elif not arrivals_o.empty or not departures_o.empty: sc1, sc2, sc3 = st.columns(3) sc1.metric("Arrivals in window", len(arrivals_o)) sc2.metric("Departures in window", len(departures_o)) feasible_count = 0 if not arrivals_o.empty and not departures_o.empty: # Vectorized cross-join feasibility check _a = arrivals_o["time_min"].to_numpy(dtype=float)[:, None] # (n,1) _d = departures_o["time_min"].to_numpy(dtype=float)[None, :] # (1,m) _ta = _d - _a _ap_a = arrivals_o["airport"].to_numpy(dtype=str)[:, None] _ap_d = departures_o["airport"].to_numpy(dtype=str)[None, :] _same = _ap_a == _ap_d feasible_count = int(((_ta >= 30) & (_ta <= 240) & ~_same).sum()) sc3.metric("Feasible pairs", feasible_count) st.markdown( tip("Feasible pairs", "Arrival–departure combinations satisfying: " "(1) turnaround ≥ 30 min, (2) turnaround ≤ 240 min, " "(3) origin airport A ≠ destination airport B. " "The optimizer picks the best one-to-one assignment from this pool."), unsafe_allow_html=True, ) st.divider() if st.button("⚡ Run Optimizer", type="primary", key="run_optim"): if arrivals_o.empty or departures_o.empty: st.error("Need at least 1 arrival and 1 departure in the time windows.") elif feasible_count == 0: st.error("No feasible A→DFW→B pairs in the selected windows. " "Widen arrival/departure windows or reduce turnaround constraints.") else: with st.spinner("Running Hungarian algorithm..."): result_df, stats = opt.optimize_sequences( arrivals_o, departures_o, scores_o_idx, opt_month ) st.session_state["opt_result"] = (result_df, stats, arrivals_o, departures_o) if "opt_result" in st.session_state: result_df, stats, arrivals_o_r, departures_o_r = st.session_state["opt_result"] # ── Summary metrics ─────────────────────────────────────── st.subheader("Optimization Results") rm1, rm2, rm3, rm4, rm5 = st.columns(5) rm1.metric("Sequences Assigned", stats["n_matched"]) rm2.metric("Avg Risk (Optimal)", f"{stats['optimal_avg']:.1%}") worst_avg = stats["worst_total"] / max(stats["n_matched"], 1) rm3.metric("Avg Risk (Worst-case)", f"{worst_avg:.1%}") risk_saved_pct = (stats["risk_saved"] / max(stats["worst_total"], 0.001)) * 100 rm4.metric("Risk Reduction", f"{risk_saved_pct:.1f}%", delta=f"-{stats['risk_saved']:.2f} total score") rm5.metric("High Risk Sequences", f"{stats['pct_high']:.0%}") st.markdown( tip("Worst-case", "Approximate upper bound on total risk: computed by running " "the Hungarian algorithm on the negated cost matrix (maximize risk instead of minimize). " "Represents a naive worst-possible assignment.") + " · " + tip("Risk Reduction", "Percentage reduction in total calibrated risk score: " "(worst_total − optimal_total) / worst_total. " "Reflects how much disruption risk the optimizer avoids vs. a naive assignment."), unsafe_allow_html=True, ) st.divider() col_res1, col_res2 = st.columns([3, 2]) with col_res1: st.subheader("Optimal Assignment") if result_df.empty: st.info("No feasible assignments found.") else: disp_r = result_df[["Sequence", "flight_in", "arr_time", "flight_out", "dep_time", "turnaround_min", "risk_score", "risk_label"]].copy() disp_r.columns = ["Sequence", "Inbound", "Arrived", "Outbound", "Departs", "Turnaround (min)", "Risk Score", "Risk Level"] disp_r["Risk Score"] = disp_r["Risk Score"].map("{:.1%}".format) def _cr(row): c = {"HIGH":"rgba(214,39,40,0.25)","MODERATE":"rgba(255,127,14,0.25)","LOW":"rgba(44,160,44,0.25)"}.get( str(row.get("Risk Level","")), "") return [f"background-color:{c}" for _ in row] st.dataframe(disp_r.style.apply(_cr, axis=1), width='stretch', height=420) st.download_button("Download Optimal Schedule", disp_r.to_csv(index=False), file_name=f"optimal_sequences_{opt_source_label}.csv", mime="text/csv", key="dl_opt") with col_res2: st.subheader("Risk Distribution") if not result_df.empty: counts = result_df["risk_label"].value_counts().reindex( ["HIGH","MODERATE","LOW"], fill_value=0) fig_pie = go.Figure(go.Pie( labels=counts.index, values=counts.values, marker_colors=["#d62728","#ff7f0e","#2ca02c"], hole=0.45, textinfo="label+percent+value", )) fig_pie.update_layout(title="Assigned Sequences by Risk Level", height=280, margin=dict(t=40,b=0,l=0,r=0)) st.plotly_chart(fig_pie, width='stretch') # Optimal vs worst bar fig_cmp = go.Figure([ go.Bar(name="Worst-case", x=["Total Risk Score"], y=[stats["worst_total"]], marker_color="#d62728"), go.Bar(name="Optimal", x=["Total Risk Score"], y=[stats["optimal_total"]], marker_color="#2ca02c"), ]) fig_cmp.update_layout( barmode="group", title="Optimal vs Worst-case Total Risk", height=260, plot_bgcolor="rgba(0,0,0,0)", margin=dict(t=40,b=40,l=40,r=20), legend=dict(orientation="h", y=-0.2), ) st.plotly_chart(fig_cmp, width='stretch') # ── Gantt-style timeline ────────────────────────────────── if not result_df.empty: st.subheader("Sequence Timeline (Gantt)") fig_g = go.Figure() for i, row in result_df.iterrows(): arr_m = arrivals_o_r[arrivals_o_r["airport"] == row["airport_A"]]["time_min"] dep_m = departures_o_r[departures_o_r["airport"] == row["airport_B"]]["time_min"] a_t = float(arr_m.iloc[0]) if not arr_m.empty else 0 d_t = float(dep_m.iloc[0]) if not dep_m.empty else a_t + 90 color = score_to_color(row["risk_score"]) fig_g.add_trace(go.Scatter( x=[a_t/60, d_t/60], y=[i, i], mode="lines+markers", line=dict(color=color, width=8), marker=dict(size=8, color=["#555", color]), name=row["Sequence"], hovertemplate=( f"{row['Sequence']}
" f"Arr: {a_t/60:.2f}h | Dep: {d_t/60:.2f}h
" f"Turnaround: {row['turnaround_min']} min
" f"Risk: {row['risk_score']:.1%}" ), showlegend=False, )) fig_g.add_annotation( x=a_t/60, y=i, text=row["airport_A"], showarrow=False, xanchor="right", font=dict(size=9) ) fig_g.add_annotation( x=d_t/60, y=i, text=row["airport_B"], showarrow=False, xanchor="left", font=dict(size=9) ) fig_g.update_layout( xaxis=dict(title="Time (hour)", tickvals=list(range(0,25)), ticktext=[f"{h:02d}:00" for h in range(25)]), yaxis=dict(visible=False), height=max(300, len(result_df) * 22 + 60), plot_bgcolor="rgba(0,0,0,0)", title="Each bar = one A→DFW→B sequence (color = risk level)", margin=dict(l=80, r=80, t=50, b=50), ) st.plotly_chart(fig_g, width='stretch') # ═══════════════════════════════════════════════════════════════════════════ # TAB 4: PAIR RISK QUERY # ═══════════════════════════════════════════════════════════════════════════ with tab_query: st.header("Pair Risk Query") st.markdown( "Select an inbound origin (A) and outbound destination (B) to score the A→DFW→B " + tip("sequence", "A crew sequence: a pilot or flight attendant arrives on an inbound leg from A, " "turns at DFW (30–240 min), then departs on an outbound leg to B. " "The model scores the weather disruption risk of this complete rotation.") + ". The " + tip("calibrated risk score", "XGBoost score passed through isotonic regression calibration. " "Directly interpretable: 0.30 = model predicts ~30% of A→DFW→B sequences " "in this month are weather-disrupted (≥15 min delay or weather cancellation).") + " reflects the predicted fraction of disrupted sequences for this pair-month.", unsafe_allow_html=True, ) pred = get_predictor() col_qa, col_qb, col_qm = st.columns(3) with col_qa: a_opts = pred.airports_a default_a = a_opts.index("MCO") if "MCO" in a_opts else 0 airport_a = st.selectbox( "Inbound Origin (A)", a_opts, index=default_a, format_func=ap_meta.label, key="q_a", ) with col_qb: b_opts = pred.airports_b default_b = b_opts.index("LAX") if "LAX" in b_opts else 0 airport_b = st.selectbox( "Outbound Destination (B)", b_opts, index=default_b, format_func=ap_meta.label, key="q_b", ) with col_qm: q_month = st.slider("Month", 1, 12, 6, key="q_month", format="%d — " + "%s") st.caption(ap_meta.MONTH_NAMES[q_month]) st.markdown("---") if airport_a == "DFW" or airport_b == "DFW": st.warning("DFW is the hub — select non-DFW airports for A and B.") else: result = pred.predict_pair(airport_a, airport_b, q_month) if result is None: st.warning( f"No historical data for **{airport_a} → DFW → {airport_b}** in month {q_month}. " "This pair-month combination wasn't observed in BTS 2015–2024." ) else: # Layout: gauge + explanation side by side col_g, col_e = st.columns([1, 2]) with col_g: st.plotly_chart( gauge_chart(result["risk_score"], f"{airport_a} → DFW → {airport_b}"), width='stretch', ) # Key metrics st.markdown( f"| | |\n|--|--|\n" f"| **Sequence** | {airport_a} → DFW → {airport_b} |\n" f"| **Month** | {ap_meta.MONTH_NAMES[q_month]} |\n" f"| {tip('Model Risk Score', 'Calibrated XGBoost score ≈ fraction of days in this month on which the A→DFW or DFW→B corridor experiences weather disruption. Isotonic regression maps raw model output to the observed corridor bad-rate scale.')} | {result['risk_score']:.1%} |\n" f"| {tip('Observed Rotation Bad Rate', 'Fraction of tail-matched aircraft rotations (same tail number, same date, 30–240 min turn) in this pair-month (2015–2024) where the A→DFW inbound leg or DFW→B outbound leg had a weather delay ≥15 min, or a cascade was detected. This is the direct sample estimate of crew disruption frequency.')} | {result['observed_bad_rate']:.1%} |\n" f"| {tip('Rotations Observed', 'Number of tail-matched aircraft rotations (same tail number, same date) on this corridor in this month across 2015–2024. The sample size for the observed bad rate. Typical value is 2–10 per year-month; rare corridors may have only 1.')} | {result['n_sequences']:,} |\n", unsafe_allow_html=True, ) # Recommendation box score = result["risk_score"] if score >= HIGH_THRESHOLD: st.error( f"**Recommendation: Do Not Assign**\n\n" f"Model predicts **{score:.0%}** of sequences on this route are weather-disrupted " f"in {ap_meta.MONTH_NAMES[q_month]} — above the {HIGH_THRESHOLD:.0%} high-risk threshold. " f"Observed historical disruption rate: {result['observed_bad_rate']:.0%}." ) elif score >= MOD_THRESHOLD: st.warning( f"**Recommendation: Caution**\n\n" f"Model predicts **{score:.0%}** of sequences are disrupted — moderate risk. " f"Consider buffer time or weather monitoring. " f"Historical disruption rate: {result['observed_bad_rate']:.0%}." ) else: st.success( f"**Recommendation: Acceptable**\n\n" f"Model predicts **{score:.0%}** of sequences are disrupted — low risk. " f"Historical disruption rate: {result['observed_bad_rate']:.0%}." ) with col_e: # SHAP explanation — cached in session_state to avoid re-init every render shap_key = f"shap_{airport_a}_{airport_b}_{q_month}" if shap_key not in st.session_state: with st.spinner("Computing feature contributions (first time only)..."): try: st.session_state[shap_key] = pred.explain_pair( result["X"], top_n=15, gsom_imputed=result.get("gsom_imputed", set()), ) except Exception as ex: st.session_state[shap_key] = ex shap_result = st.session_state[shap_key] if isinstance(shap_result, Exception): st.info(f"SHAP explanation unavailable: {shap_result}") feat_vals = result["X"].T.rename(columns={0: "Value"}) feat_vals.index = [FEATURE_LABELS.get(f, f) for f in feat_vals.index] feat_vals = feat_vals.dropna() st.dataframe(feat_vals.style.format("{:.4f}"), height=350) else: st.markdown( tip("SHAP values", "SHapley Additive exPlanations. Each bar shows how much a feature " "pushed the model output up (red, increases risk) or down (green, decreases risk) " "relative to the average prediction. SHAP values are additive: they sum to the " "difference between this prediction and the model's mean output.") + " for this pair-month — features sorted by impact magnitude.", unsafe_allow_html=True, ) _gsom_imp = result.get("gsom_imputed", set()) if _gsom_imp: st.info( f"**★ GSOM weather features imputed** — {airport_a} or {airport_b} " f"lacks a nearby NOAA weather station. " f"{len(_gsom_imp)} feature(s) filled with the month-{q_month} population " f"median across all airports that do have GSOM data. " f"Starred (★) features in the chart used imputed values; their SHAP " f"contributions reflect *typical* weather for this month, not measured values." ) st.plotly_chart(shap_bar_chart(shap_result), width='stretch') st.divider() # Month-by-month risk for selected pair st.subheader(f"Month-by-Month Risk: {airport_a} → DFW → {airport_b}") monthly = pred.predict_all_months(airport_a, airport_b) fig_monthly = go.Figure() fig_monthly.add_trace(go.Scatter( x=monthly["Month"], y=monthly["risk_score"], mode="lines+markers", marker=dict( size=12, color=[score_to_color(s) if not np.isnan(s) else "#aaa" for s in monthly["risk_score"]], line=dict(width=1.5, color="black"), ), line=dict(color="#555", width=2), text=monthly["risk_score"].map( lambda s: f"{s:.1%}" if not np.isnan(s) else "N/A" ), hovertemplate="%{x}
Risk: %{text}", )) fig_monthly.add_hrect(y0=HIGH_THRESHOLD, y1=1.05, fillcolor="red", opacity=0.07, line_width=0) fig_monthly.add_hrect(y0=MOD_THRESHOLD, y1=HIGH_THRESHOLD, fillcolor="orange", opacity=0.07, line_width=0) fig_monthly.add_hline(y=HIGH_THRESHOLD, line_dash="dash", line_color="red", opacity=0.4) fig_monthly.add_hline(y=MOD_THRESHOLD, line_dash="dash", line_color="orange", opacity=0.4) fig_monthly.update_layout( xaxis=dict(tickvals=list(range(1, 13)), ticktext=[ap_meta.MONTH_NAMES[m][:3] for m in range(1, 13)]), yaxis=dict(title="Risk Score", range=[0, 1.05], tickformat=".0%"), height=300, plot_bgcolor="rgba(0,0,0,0)", title="Seasonal Risk Profile", showlegend=False, ) if q_month: fig_monthly.add_vline(x=q_month, line_dash="dot", line_color="#333", annotation_text=ap_meta.MONTH_NAMES[q_month][:3], annotation_position="top") st.plotly_chart(fig_monthly, width='stretch') # Compare with reversed sequence with st.expander("Compare: reversed sequence B → DFW → A"): result_rev = pred.predict_pair(airport_b, airport_a, q_month) if result_rev: col_rev1, col_rev2 = st.columns(2) with col_rev1: st.plotly_chart( gauge_chart(result["risk_score"] if result else 0, f"{airport_a}→DFW→{airport_b}"), width='stretch' ) with col_rev2: st.plotly_chart( gauge_chart(result_rev["risk_score"], f"{airport_b}→DFW→{airport_a}"), width='stretch' ) else: st.info(f"No data for {airport_b} → DFW → {airport_a} in month {q_month}.") # ═══════════════════════════════════════════════════════════════════════════ # TAB 4: AIRPORT RISK MAP # ═══════════════════════════════════════════════════════════════════════════ with tab_map: st.header("Airport Risk Map") st.markdown( "Airports sized and colored by " + tip("average calibrated risk score", "Mean of the calibrated XGBoost risk scores " "across all pair-months involving this airport in the selected role and month. " "Color and marker size both scale with risk (green=low, red=high). " "Normalized to the calibrated score range [0%, 50%].") + ". DFW is the hub for all sequences. Spoke lines connect DFW to the 10 riskiest airports.", unsafe_allow_html=True, ) scores_map = get_pair_scores() col_m1, col_m2, col_m3, col_m4 = st.columns(4) with col_m1: map_month = st.slider("Month", 1, 12, 6, key="map_month") st.caption(ap_meta.MONTH_NAMES[map_month]) with col_m2: map_role = st.radio("Airport Role", ["As Origin (A)", "As Destination (B)"], horizontal=True) with col_m3: map_top_n = st.slider("Show top N airports", 10, 200, 30) with col_m4: st.caption("Map style follows the\n🌙 Dark mode toggle in the sidebar.") # Map geo colors follow global dark_mode toggle if dark_mode: _land = "rgba(40,40,40,0.7)" _lake = "rgba(30,80,120,0.5)" _coast = "rgba(160,160,160,0.5)" _sub = "rgba(160,160,160,0.3)" _bg = "rgba(15,17,22,0.0)" else: _land = "#e8ecf0" _lake = "#c6dff0" _coast = "#aaaaaa" _sub = "#cccccc" _bg = "rgba(0,0,0,0)" role_key = "origin" if map_role == "As Origin (A)" else "dest" role_label = "Origin" if map_role == "As Origin (A)" else "Destination" grp = get_map_group(map_month, role_key) grp = grp.nlargest(map_top_n, "avg_risk") ap_df = get_airport_df(tuple(grp["airport"].tolist() + ["DFW"])) grp = grp.merge(ap_df, left_on="airport", right_on="iata", how="left") grp = grp.dropna(subset=["lat", "lon"]) fig_map = go.Figure() # DFW hub marker dfw_info = ap_meta.get("DFW") if dfw_info.get("lat"): fig_map.add_trace(go.Scattergeo( lon=[dfw_info["lon"]], lat=[dfw_info["lat"]], mode="markers+text", marker=dict(size=18, color="#1f77b4", symbol="star", line=dict(width=2, color="white")), text=["DFW"], textposition="top right", name="DFW Hub", hovertemplate="DFW — Dallas/Fort Worth
Hub airport (all sequences pass through)", )) # Draw spoke lines to top-10 riskiest for _, row in grp.head(10).iterrows(): if pd.notna(row["lat"]) and pd.notna(row["lon"]): fig_map.add_trace(go.Scattergeo( lon=[dfw_info["lon"], row["lon"]], lat=[dfw_info["lat"], row["lat"]], mode="lines", line=dict(width=1, color=score_to_color(row["avg_risk"])), opacity=0.4, showlegend=False, hoverinfo="skip", )) # Airport markers fig_map.add_trace(go.Scattergeo( lon=grp["lon"], lat=grp["lat"], mode="markers+text", marker=dict( size=grp["avg_risk"] * 30 + 6, color=grp["avg_risk"], colorscale="RdYlGn_r", cmin=0, cmax=0.5, showscale=True, colorbar=dict(title="Avg Risk", tickformat=".0%", x=1.0, tickvals=[0, 0.1, 0.2, 0.3, 0.4, 0.5], ticktext=["0%","10%","20%","30%","40%","≥50%"]), line=dict(width=0.8, color="black"), ), text=grp["airport"], textposition="top center", textfont=dict(size=9), name=f"Airports as {role_label}", hovertemplate=( "%{text}
" "Avg Risk: %{marker.color:.1%}
" "N Pairs: %{customdata[0]}
" "Worst Partner: %{customdata[1]}" ), customdata=grp[["n_pairs", "worst_partner"]].values, )) fig_map.update_layout( geo=dict( scope="usa", projection_type="albers usa", bgcolor=_bg, showland=True, landcolor=_land, showlakes=True, lakecolor=_lake, showcoastlines=True, coastlinecolor=_coast, showsubunits=True, subunitcolor=_sub, showframe=False, ), title=f"Airport Risk Map — {role_label} — {ap_meta.MONTH_NAMES[map_month]}", height=560, margin=dict(t=40, b=0, l=0, r=0), legend=dict(yanchor="bottom", y=0.01, xanchor="left", x=0.01), ) st.plotly_chart(fig_map, width='stretch') # Table below map st.subheader(f"Top {map_top_n} Airports by Risk ({ap_meta.MONTH_NAMES[map_month]})") tbl = grp[["airport", "city", "state", "avg_risk", "n_pairs", "worst_partner"]].copy() tbl.columns = ["Airport", "City", "State", "Avg Risk", "N Pairs", "Worst Partner"] tbl["Avg Risk"] = tbl["Avg Risk"].map("{:.1%}".format) st.markdown( tip("Avg Risk", "Mean calibrated risk score across all pair-months involving this airport " "in the selected month and role. After isotonic calibration, this approximates " "the average fraction of sequences on routes through this airport that are disrupted.") + " · " + tip("Worst Partner", "The airport B (or A) that, when paired with this airport, " "produces the highest calibrated risk score in the selected month."), unsafe_allow_html=True, ) st.dataframe(tbl, width='stretch', height=320)