Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Build a lean planes.json for the flight generator. | |
| Sources | |
| ------- | |
| - OpenFlights planes.dat (curated list of ~173 passenger aircraft) :contentReference[oaicite:0]{index=0} | |
| - OpenFlights routes.dat (equipment field gives 3-letter IATA codes) :contentReference[oaicite:1]{index=1} | |
| - OpenFlights airlines.dat (to ignore defunct airlines if desired) | |
| Output (records-oriented JSON) | |
| ----------------------------- | |
| [ | |
| { | |
| "code" : "738", | |
| "icao" : "B738", | |
| "name" : "Boeing 737-800", | |
| "routeCount": 4219, | |
| "airlines" : { "AA": 0.18, "DL": 0.14, "WN": 0.12, ... } | |
| }, | |
| ... | |
| ] | |
| """ | |
| from pathlib import Path | |
| import json | |
| import numpy as np | |
| import pandas as pd | |
| # ────────── paths & params ────────── | |
| PLANES_FILE = Path("data_scripts/planes.dat") | |
| ROUTES_FILE = Path("data_scripts/routes.dat") | |
| RETIRED_AIRLINES_FILE = Path("data_scripts/retired_airlines.csv") | |
| OUT_FILE = Path("data/planes.json") | |
| KEEP_CODES_WITH_MIN_ROUTES = 5 # discard ultra-rare types (set None to keep all) | |
| ROUND_DIGITS = 4 # decimals for airline weights | |
| # ────────── load raw files ────────── | |
| planes_cols = ["name", "iata", "icao"] | |
| planes = pd.read_csv( | |
| PLANES_FILE, | |
| header=None, | |
| names=planes_cols, | |
| na_values="\\N" | |
| ) | |
| routes_cols = [ | |
| "airline_code", "airline_id", | |
| "src_iata", "src_id", | |
| "dst_iata", "dst_id", | |
| "codeshare", "stops", | |
| "equipment" | |
| ] | |
| routes = pd.read_csv( | |
| ROUTES_FILE, | |
| header=None, | |
| names=routes_cols, | |
| na_values="\\N" | |
| ) | |
| # Load retired airlines | |
| retired_airlines = pd.read_csv(RETIRED_AIRLINES_FILE) | |
| retired_codes = set(retired_airlines["code"].values) | |
| # Filter out routes from retired airlines | |
| routes = routes[~routes["airline_code"].isin(retired_codes)] | |
| # ────────── explode equipment codes ────────── | |
| routes = routes.dropna(subset=["equipment", "airline_code"]) | |
| routes["equipment_list"] = routes["equipment"].str.split() | |
| routes_exploded = routes.explode("equipment_list", ignore_index=True) | |
| # ────────── popularity proxy ────────── | |
| popularity = ( | |
| routes_exploded["equipment_list"] | |
| .value_counts() | |
| .rename_axis("code") | |
| .reset_index(name="routeCount") | |
| ) | |
| # ────────── airlines per aircraft ────────── | |
| equip_airline_counts = ( | |
| routes_exploded | |
| .groupby(["equipment_list", "airline_code"]) | |
| .size() | |
| .reset_index(name="flight_count") | |
| ) | |
| def airlines_dict_for(code: str) -> dict: | |
| subset = equip_airline_counts[equip_airline_counts["equipment_list"] == code] | |
| if subset.empty: | |
| return {} | |
| total = subset["flight_count"].sum() | |
| subset = subset.sort_values("flight_count", ascending=False) | |
| return { | |
| row["airline_code"]: round(row["flight_count"] / total, ROUND_DIGITS) | |
| for _, row in subset.iterrows() | |
| } | |
| # ────────── merge & tidy ────────── | |
| planes = planes[planes["iata"].notna()].drop_duplicates(subset="iata") | |
| planes = planes.merge(popularity, how="left", left_on="iata", right_on="code") | |
| planes = planes.drop(columns=["code"]) | |
| planes = planes.rename(columns={"iata": "code", "name": "name", "icao": "icao"}) | |
| planes["routeCount"] = planes["routeCount"].fillna(0).astype(int) | |
| planes["airlines"] = planes["code"].apply(airlines_dict_for) | |
| # optional trimming of rare types | |
| if KEEP_CODES_WITH_MIN_ROUTES: | |
| planes = planes[planes["routeCount"] >= KEEP_CODES_WITH_MIN_ROUTES] | |
| # ────────── save ────────── | |
| planes.to_json(OUT_FILE, orient="records", indent=2) | |
| print(f"Saved {len(planes):,} plane types → {OUT_FILE.resolve()}") |