Spaces:
Sleeping
Sleeping
| import json | |
| import random | |
| from collections import Counter | |
| from pathlib import Path | |
| SEED = 2026 | |
| rng = random.Random(SEED) | |
| DATA_DIR = Path("data") | |
| DATA_DIR.mkdir(exist_ok=True) | |
| FTO_COUNT = 2500 | |
| INCIDENT_COUNT = 18000 | |
| RESOURCE_SCENARIO_COUNT = 900 | |
| SOURCE_CATALOG = [ | |
| { | |
| "source_id": "dgca_incident_reports", | |
| "authority": "DGCA", | |
| "record_type": "incident_report", | |
| "collection_mode": "planned_ingestion", | |
| "url": "https://dgca.gov.in/digigov-portal/?page=reports/accident_incident_report/229597/aviation-safety", | |
| "notes": "Official DGCA incident and accident reporting surfaces.", | |
| }, | |
| { | |
| "source_id": "aaib_investigation_reports", | |
| "authority": "AAIB India", | |
| "record_type": "investigation_report", | |
| "collection_mode": "planned_ingestion", | |
| "url": "https://aaib.gov.in/InvestigationReports", | |
| "notes": "Investigation narratives and causal findings for serious events.", | |
| }, | |
| { | |
| "source_id": "aai_airport_context", | |
| "authority": "AAI", | |
| "record_type": "airport_context", | |
| "collection_mode": "planned_ingestion", | |
| "url": "https://www.aai.aero/en/annual-reports", | |
| "notes": "Airport traffic and operational context for risk exposure calibration.", | |
| }, | |
| ] | |
| INDIAN_AIRPORTS = [ | |
| {"code": "DEL", "name": "Indira Gandhi International Airport", "city": "Delhi", "state": "Delhi", "flights_per_day": 1320, "tier": "metro"}, | |
| {"code": "BOM", "name": "Chhatrapati Shivaji Maharaj International Airport", "city": "Mumbai", "state": "Maharashtra", "flights_per_day": 1050, "tier": "metro"}, | |
| {"code": "BLR", "name": "Kempegowda International Airport", "city": "Bengaluru", "state": "Karnataka", "flights_per_day": 810, "tier": "metro"}, | |
| {"code": "HYD", "name": "Rajiv Gandhi International Airport", "city": "Hyderabad", "state": "Telangana", "flights_per_day": 560, "tier": "metro"}, | |
| {"code": "MAA", "name": "Chennai International Airport", "city": "Chennai", "state": "Tamil Nadu", "flights_per_day": 560, "tier": "metro"}, | |
| {"code": "CCU", "name": "Netaji Subhas Chandra Bose International Airport", "city": "Kolkata", "state": "West Bengal", "flights_per_day": 430, "tier": "metro"}, | |
| {"code": "AMD", "name": "Sardar Vallabhbhai Patel International Airport", "city": "Ahmedabad", "state": "Gujarat", "flights_per_day": 320, "tier": "major"}, | |
| {"code": "COK", "name": "Cochin International Airport", "city": "Kochi", "state": "Kerala", "flights_per_day": 250, "tier": "major"}, | |
| {"code": "PNQ", "name": "Pune Airport", "city": "Pune", "state": "Maharashtra", "flights_per_day": 210, "tier": "major"}, | |
| {"code": "GOI", "name": "Dabolim Airport", "city": "Goa", "state": "Goa", "flights_per_day": 135, "tier": "major"}, | |
| {"code": "GOX", "name": "Manohar International Airport", "city": "North Goa", "state": "Goa", "flights_per_day": 125, "tier": "major"}, | |
| {"code": "VNS", "name": "Lal Bahadur Shastri International Airport", "city": "Varanasi", "state": "Uttar Pradesh", "flights_per_day": 100, "tier": "regional"}, | |
| {"code": "IDR", "name": "Devi Ahilyabai Holkar Airport", "city": "Indore", "state": "Madhya Pradesh", "flights_per_day": 110, "tier": "regional"}, | |
| {"code": "BBI", "name": "Biju Patnaik International Airport", "city": "Bhubaneswar", "state": "Odisha", "flights_per_day": 92, "tier": "regional"}, | |
| {"code": "TRV", "name": "Trivandrum International Airport", "city": "Thiruvananthapuram", "state": "Kerala", "flights_per_day": 85, "tier": "regional"}, | |
| {"code": "CCJ", "name": "Calicut International Airport", "city": "Kozhikode", "state": "Kerala", "flights_per_day": 78, "tier": "regional"}, | |
| {"code": "JAI", "name": "Jaipur International Airport", "city": "Jaipur", "state": "Rajasthan", "flights_per_day": 165, "tier": "major"}, | |
| {"code": "GAU", "name": "Lokpriya Gopinath Bordoloi International Airport", "city": "Guwahati", "state": "Assam", "flights_per_day": 130, "tier": "regional"}, | |
| {"code": "PAT", "name": "Jay Prakash Narayan International Airport", "city": "Patna", "state": "Bihar", "flights_per_day": 105, "tier": "regional"}, | |
| {"code": "SXR", "name": "Srinagar Airport", "city": "Srinagar", "state": "Jammu and Kashmir", "flights_per_day": 95, "tier": "regional"}, | |
| {"code": "IXC", "name": "Chandigarh Airport", "city": "Chandigarh", "state": "Chandigarh", "flights_per_day": 118, "tier": "regional"}, | |
| {"code": "LKO", "name": "Chaudhary Charan Singh International Airport", "city": "Lucknow", "state": "Uttar Pradesh", "flights_per_day": 135, "tier": "major"}, | |
| {"code": "NAG", "name": "Dr. Babasaheb Ambedkar International Airport", "city": "Nagpur", "state": "Maharashtra", "flights_per_day": 98, "tier": "regional"}, | |
| {"code": "ATQ", "name": "Sri Guru Ram Dass Jee International Airport", "city": "Amritsar", "state": "Punjab", "flights_per_day": 80, "tier": "regional"}, | |
| {"code": "IXE", "name": "Mangaluru International Airport", "city": "Mangaluru", "state": "Karnataka", "flights_per_day": 62, "tier": "regional"}, | |
| {"code": "RPR", "name": "Swami Vivekananda Airport", "city": "Raipur", "state": "Chhattisgarh", "flights_per_day": 74, "tier": "regional"}, | |
| {"code": "IXB", "name": "Bagdogra Airport", "city": "Siliguri", "state": "West Bengal", "flights_per_day": 69, "tier": "regional"}, | |
| {"code": "JDH", "name": "Jodhpur Airport", "city": "Jodhpur", "state": "Rajasthan", "flights_per_day": 40, "tier": "regional"}, | |
| {"code": "BHO", "name": "Raja Bhoj Airport", "city": "Bhopal", "state": "Madhya Pradesh", "flights_per_day": 52, "tier": "regional"}, | |
| {"code": "IXJ", "name": "Jammu Airport", "city": "Jammu", "state": "Jammu and Kashmir", "flights_per_day": 54, "tier": "regional"}, | |
| {"code": "IMF", "name": "Imphal Airport", "city": "Imphal", "state": "Manipur", "flights_per_day": 34, "tier": "regional"}, | |
| {"code": "IXA", "name": "Agartala Airport", "city": "Agartala", "state": "Tripura", "flights_per_day": 30, "tier": "regional"}, | |
| {"code": "IXS", "name": "Silchar Airport", "city": "Silchar", "state": "Assam", "flights_per_day": 22, "tier": "regional"}, | |
| {"code": "DIB", "name": "Dibrugarh Airport", "city": "Dibrugarh", "state": "Assam", "flights_per_day": 25, "tier": "regional"}, | |
| {"code": "IXZ", "name": "Veer Savarkar International Airport", "city": "Port Blair", "state": "Andaman and Nicobar Islands", "flights_per_day": 28, "tier": "regional"}, | |
| {"code": "SHG", "name": "Shillong Airport", "city": "Shillong", "state": "Meghalaya", "flights_per_day": 12, "tier": "regional"}, | |
| {"code": "UDR", "name": "Maharana Pratap Airport", "city": "Udaipur", "state": "Rajasthan", "flights_per_day": 38, "tier": "regional"}, | |
| {"code": "RAJ", "name": "Rajkot Airport", "city": "Rajkot", "state": "Gujarat", "flights_per_day": 29, "tier": "regional"}, | |
| {"code": "VTZ", "name": "Visakhapatnam Airport", "city": "Visakhapatnam", "state": "Andhra Pradesh", "flights_per_day": 58, "tier": "regional"}, | |
| {"code": "TIR", "name": "Tirupati Airport", "city": "Tirupati", "state": "Andhra Pradesh", "flights_per_day": 33, "tier": "regional"}, | |
| ] | |
| AIRLINES = [ | |
| "IndiGo", | |
| "Air India", | |
| "Air India Express", | |
| "SpiceJet", | |
| "Akasa Air", | |
| "Alliance Air", | |
| "Blue Dart", | |
| "Vistara Legacy Ops", | |
| "Star Air", | |
| "Fly91", | |
| "TruJet Legacy Ops", | |
| "Zoom Air Legacy Ops", | |
| "Deccan Charters", | |
| "Pawan Hans", | |
| "IndiaOne Air", | |
| "Quikjet Cargo", | |
| "Pradhaan Air Express", | |
| "Taj Air", | |
| ] | |
| INCIDENT_TYPES = [ | |
| "runway_incursion", | |
| "technical_snag", | |
| "atc_deviation", | |
| "fdtl_violation", | |
| "maintenance_lapse", | |
| "bird_strike", | |
| "fuel_irregularity", | |
| "unauthorized_access", | |
| "tail_strike", | |
| "unstable_approach", | |
| "pressurization_alert", | |
| "smoke_fumes_event", | |
| "ground_collision", | |
| "weather_diversion", | |
| "navigation_system_fault", | |
| ] | |
| AIRCRAFT_TYPES = [ | |
| "A320", "A321", "A319", "B737", "B737 MAX", "ATR72", "Q400", "B777", "B787", "A350", "A330", "ERJ145", "Cessna 172", "DA42", "H125" | |
| ] | |
| FTO_BASE_NAMES = [ | |
| "Indira Gandhi Rashtriya Uran Akademi", | |
| "National Flying Training Institute", | |
| "Chimes Aviation Academy", | |
| "Bombay Flying Club", | |
| "Government Flying Training School", | |
| "Madhya Pradesh Flying Club", | |
| "Rajasthan State Flying School", | |
| "Orient Flight Academy", | |
| "Asia Pacific Flight Training Academy", | |
| "Wings India Flying School", | |
| "Alchemist Aviation", | |
| "Garg Aviations", | |
| "Falcon Flying Academy", | |
| "Flytech Aviation Academy", | |
| "International Pioneer Flying Academy", | |
| "Karnal Aviation Club", | |
| "Patiala Aviation Club", | |
| "Sha-Shib Flying Academy", | |
| "Taneja Aerospace and Aviation", | |
| "Rajiv Gandhi Academy for Aviation Technology", | |
| ] | |
| FTO_SUFFIXES = [ | |
| "Pilot Training Campus", | |
| "Aviation Skills Centre", | |
| "Flight Operations School", | |
| "Cadet Academy", | |
| "Rotor and Fixed Wing Training Hub", | |
| "Regional Flying College", | |
| "Safety and Standards Campus", | |
| ] | |
| NOISY_DESCRIPTIONS = [ | |
| "ATC logged anomaly; detailed sequencing review pending.", | |
| "Crew report submitted with inconsistent engineering closure notes.", | |
| "Ground handling variance observed during a post-stand review.", | |
| "Verbally reported by PIC; formal written account delayed beyond target window.", | |
| "Near-miss evidence surfaced in data logs; airline disputes severity classification.", | |
| "Historic lapse discovered during a routine follow-up, with timeline gaps in records.", | |
| "Automated alert triggered first; human verification remains incomplete.", | |
| "Inspector noted a deviation during a ramp or simulator-adjacent review.", | |
| "Third-party complaint received; primary operator contests the incident narrative.", | |
| "Flight data trace indicates anomaly, but crew debrief lacks consensus.", | |
| "Trainee-originated report conflicts with supervisor recollection.", | |
| "Multiple stand-side events overlapped, obscuring single-cause attribution.", | |
| "Weather cited as mitigation while internal reviewers raised procedural concerns.", | |
| "Repeat occurrence surfaced after a previous closure may have been premature.", | |
| "High-visibility movement triggered additional scrutiny because of passenger exposure.", | |
| "Cross-functional records disagree on whether the event was resolved or deferred.", | |
| ] | |
| def choose_weighted_grade() -> str: | |
| return rng.choices(["A+", "A", "B", "C"], [2, 9, 24, 65])[0] | |
| def get_flags(incidents: int, solo_hours: float, pass_rate: float, grievances: int) -> list[str]: | |
| flags = [] | |
| if incidents >= 3: | |
| flags.append("high_incident_rate") | |
| if solo_hours < 20: | |
| flags.append("insufficient_solo_hours") | |
| if pass_rate < 0.55: | |
| flags.append("low_pass_rate") | |
| if grievances >= 8: | |
| flags.append("excessive_student_grievances") | |
| if incidents >= 5: | |
| flags.append("safety_critical") | |
| return flags | |
| def get_action(grade: str) -> str: | |
| return {"A+": "clear", "A": "clear", "B": "self_assessment_required", "C": "dgca_notice_issued"}[grade] | |
| def get_acceptable_actions(grade: str) -> list[str]: | |
| return { | |
| "A+": ["clear"], | |
| "A": ["clear", "self_assessment_required"], | |
| "B": ["self_assessment_required", "dgca_notice_issued"], | |
| "C": ["dgca_notice_issued", "immediate_audit"], | |
| }[grade] | |
| def build_fto_name(idx: int, airport: dict) -> str: | |
| base = FTO_BASE_NAMES[idx % len(FTO_BASE_NAMES)] | |
| suffix = FTO_SUFFIXES[idx % len(FTO_SUFFIXES)] | |
| return f"{base} {suffix} ({airport['city']})" | |
| def make_fto(idx: int, target_grade: str) -> dict: | |
| airport = rng.choice(INDIAN_AIRPORTS) | |
| noise = rng.random() | |
| if target_grade == "A+": | |
| if noise < 0.18: | |
| perf = rng.uniform(17.4, 18.6) | |
| ops = rng.uniform(35.8, 37.2) | |
| safety = rng.uniform(17.4, 18.8) | |
| compliance = rng.uniform(8.4, 9.1) | |
| student = rng.uniform(8.6, 9.2) | |
| else: | |
| perf = rng.uniform(18, 20) | |
| ops = rng.uniform(36, 40) | |
| safety = rng.uniform(18, 20) | |
| compliance = rng.uniform(9, 10) | |
| student = rng.uniform(9, 10) | |
| incidents = 0 | |
| solo_hours = rng.uniform(48, 72) | |
| pass_rate = rng.uniform(0.86, 0.98) | |
| grievances = rng.randint(0, 1) | |
| elif target_grade == "A": | |
| perf = rng.uniform(14, 18) | |
| ops = rng.uniform(28, 36) | |
| safety = rng.uniform(14, 18) | |
| compliance = rng.uniform(7, 9) | |
| student = rng.uniform(7, 9) | |
| incidents = rng.randint(0, 1) | |
| solo_hours = rng.uniform(36, 54) | |
| pass_rate = rng.uniform(0.72, 0.88) | |
| grievances = rng.randint(1, 4) | |
| if noise < 0.20: | |
| incidents = 2 | |
| pass_rate = rng.uniform(0.74, 0.83) | |
| elif target_grade == "B": | |
| perf = rng.uniform(8, 15) | |
| ops = rng.uniform(16, 30) | |
| safety = rng.uniform(8, 15) | |
| compliance = rng.uniform(4, 7.5) | |
| student = rng.uniform(4, 7.5) | |
| incidents = rng.randint(1, 4) | |
| solo_hours = rng.uniform(18, 42) | |
| pass_rate = rng.uniform(0.55, 0.76) | |
| grievances = rng.randint(2, 8) | |
| if noise < 0.34: | |
| delta = rng.uniform(-3, 3) | |
| perf += delta / 5 | |
| ops += delta / 2.5 | |
| safety += delta / 5 | |
| compliance += delta / 10 | |
| student += delta / 10 | |
| else: | |
| profile_type = rng.choices(["failing", "near_boundary", "conflicting", "ghost_fto"], [50, 25, 18, 7])[0] | |
| if profile_type == "failing": | |
| perf = rng.uniform(1, 8) | |
| ops = rng.uniform(3, 16) | |
| safety = rng.uniform(1, 8) | |
| compliance = rng.uniform(0.5, 4) | |
| student = rng.uniform(0.5, 4) | |
| incidents = rng.randint(4, 15) | |
| solo_hours = rng.uniform(3, 18) | |
| pass_rate = rng.uniform(0.15, 0.55) | |
| grievances = rng.randint(8, 25) | |
| elif profile_type == "near_boundary": | |
| perf = rng.uniform(9, 11) | |
| ops = rng.uniform(18, 22) | |
| safety = rng.uniform(9, 11) | |
| compliance = rng.uniform(4.5, 5.5) | |
| student = rng.uniform(4.5, 5.5) | |
| incidents = rng.randint(3, 5) | |
| solo_hours = rng.uniform(22, 30) | |
| pass_rate = rng.uniform(0.58, 0.68) | |
| grievances = rng.randint(6, 10) | |
| elif profile_type == "conflicting": | |
| perf = rng.uniform(5, 12) | |
| ops = rng.uniform(8, 20) | |
| safety = rng.uniform(1, 6) | |
| compliance = rng.uniform(7, 9) | |
| student = rng.uniform(7, 9) | |
| incidents = rng.randint(5, 12) | |
| solo_hours = rng.uniform(30, 55) | |
| pass_rate = rng.uniform(0.75, 0.90) | |
| grievances = rng.randint(0, 3) | |
| else: | |
| perf = rng.uniform(0, 3) | |
| ops = rng.uniform(0, 5) | |
| safety = rng.uniform(0, 3) | |
| compliance = rng.uniform(0, 2) | |
| student = rng.uniform(0, 2) | |
| incidents = rng.randint(0, 2) | |
| solo_hours = 0.0 | |
| pass_rate = 0.0 | |
| grievances = rng.randint(0, 2) | |
| total = perf + ops + safety + compliance + student | |
| training_load = rng.randint(0, 220) | |
| aircraft_count = rng.randint(0, 24) | |
| instructor_count = rng.randint(0, 18) | |
| return { | |
| "fto_id": f"FTO_{idx:05d}", | |
| "name": build_fto_name(idx, airport), | |
| "location": f"{airport['city']}, {airport['state']}, India", | |
| "performance_score": round(perf, 2), | |
| "operational_score": round(ops, 2), | |
| "safety_score": round(safety, 2), | |
| "compliance_score": round(compliance, 2), | |
| "student_support_score": round(student, 2), | |
| "total_students": training_load, | |
| "aircraft_count": aircraft_count, | |
| "instructor_count": instructor_count, | |
| "recent_incidents": incidents, | |
| "solo_hours_per_student": round(solo_hours, 1), | |
| "pass_rate": round(pass_rate, 3), | |
| "grievances_last_6_months": grievances, | |
| "source_profile": { | |
| "mode": "hybrid_synthetic", | |
| "source_basis": ["dgca_incident_reports", "aaib_investigation_reports"], | |
| "ingestion_ready": True, | |
| }, | |
| "_ground_truth": { | |
| "expected_grade": target_grade, | |
| "true_score": round(total, 2), | |
| "expected_flags": get_flags(incidents, solo_hours, pass_rate, grievances), | |
| "expected_action": get_action(target_grade), | |
| "acceptable_actions": get_acceptable_actions(target_grade), | |
| }, | |
| } | |
| def make_incident(idx: int) -> dict: | |
| airport = rng.choice(INDIAN_AIRPORTS) | |
| inc_type = rng.choice(INCIDENT_TYPES) | |
| airline = rng.choice(AIRLINES) | |
| severity = rng.choices(["low", "medium", "high", "critical"], [38, 35, 20, 7])[0] | |
| recurrence_profile = rng.choices(["zero", "low", "moderate", "chronic", "extreme"], [24, 30, 26, 15, 5])[0] | |
| recurrence_map = { | |
| "zero": 0, | |
| "low": rng.randint(1, 2), | |
| "moderate": rng.randint(3, 6), | |
| "chronic": rng.randint(7, 12), | |
| "extreme": rng.randint(13, 25), | |
| } | |
| recurrence = recurrence_map[recurrence_profile] | |
| days_since = rng.choices( | |
| [rng.randint(1, 30), rng.randint(31, 180), rng.randint(181, 500), rng.randint(501, 1500)], | |
| [21, 34, 30, 15], | |
| )[0] | |
| is_resolved = rng.random() < (0.72 if severity in ("low", "medium") else 0.18) | |
| operator_type = "cargo" if "Cargo" in airline or "Express" in airline or airline == "Blue Dart" else "passenger" | |
| source_basis = ["dgca_incident_reports"] | |
| if severity in ("high", "critical"): | |
| source_basis.append("aaib_investigation_reports") | |
| source_basis.append("aai_airport_context") | |
| return { | |
| "incident_id": f"INC_{idx:06d}", | |
| "date": f"2025-{rng.randint(1, 12):02d}-{rng.randint(1, 28):02d}", | |
| "airport_code": airport["code"], | |
| "airline": airline, | |
| "incident_type": inc_type, | |
| "severity": severity, | |
| "description": rng.choice(NOISY_DESCRIPTIONS), | |
| "recurrence_count": recurrence, | |
| "aircraft_type": rng.choice(AIRCRAFT_TYPES), | |
| "flights_per_day_at_airport": airport["flights_per_day"], | |
| "days_since_last_inspection": days_since, | |
| "is_resolved": is_resolved, | |
| "source_profile": { | |
| "mode": "hybrid_synthetic", | |
| "source_basis": source_basis, | |
| "operator_type": operator_type, | |
| "airport_tier": airport["tier"], | |
| "ingestion_ready": True, | |
| }, | |
| } | |
| def make_resource_scenario(idx: int, ftos: list[dict], incidents: list[dict]) -> dict: | |
| n_ftos = rng.randint(8, 24) | |
| n_incs = rng.randint(12, 34) | |
| inspectors = rng.randint(1, 5) | |
| total_items = n_ftos + n_incs | |
| tight_budget = rng.randint(int(total_items * 3), int(total_items * 6)) | |
| return { | |
| "scenario_id": f"SCEN_{idx:04d}", | |
| "fto_ids": [item["fto_id"] for item in rng.sample(ftos, n_ftos)], | |
| "incident_ids": [item["incident_id"] for item in rng.sample(incidents, n_incs)], | |
| "inspector_capacity": inspectors, | |
| "week_budget_hours": tight_budget, | |
| "source_profile": { | |
| "mode": "hybrid_synthetic", | |
| "source_basis": ["dgca_incident_reports", "aai_airport_context"], | |
| "ingestion_ready": True, | |
| }, | |
| } | |
| def dump_json(path: Path, payload: object) -> None: | |
| with path.open("w", encoding="utf-8") as handle: | |
| json.dump(payload, handle, indent=2, ensure_ascii=True) | |
| def build_manifest(ftos: list[dict], incidents: list[dict], scenarios: list[dict]) -> dict: | |
| grade_dist = Counter(item["_ground_truth"]["expected_grade"] for item in ftos) | |
| sev_dist = Counter(item["severity"] for item in incidents) | |
| airports = sorted({item["airport_code"] for item in incidents}) | |
| airlines = sorted({item["airline"] for item in incidents}) | |
| total_records = len(ftos) + len(incidents) + len(scenarios) | |
| return { | |
| "version": "3.0", | |
| "seed": SEED, | |
| "generation_mode": "hybrid_synthetic_with_real_ingestion_plan", | |
| "summary": { | |
| "total_records": total_records, | |
| "fto_profiles": len(ftos), | |
| "incident_reports": len(incidents), | |
| "resource_scenarios": len(scenarios), | |
| "unique_airports": len(airports), | |
| "unique_airlines": len(airlines), | |
| }, | |
| "distributions": { | |
| "fto_grade_distribution": dict(sorted(grade_dist.items())), | |
| "incident_severity_distribution": dict(sorted(sev_dist.items())), | |
| }, | |
| "coverage": { | |
| "airport_codes": airports, | |
| "airlines": airlines, | |
| "incident_types": INCIDENT_TYPES, | |
| }, | |
| "source_catalog": SOURCE_CATALOG, | |
| "space_ready": True, | |
| } | |
| def main() -> None: | |
| ftos = [make_fto(idx, choose_weighted_grade()) for idx in range(FTO_COUNT)] | |
| incidents = [make_incident(idx) for idx in range(INCIDENT_COUNT)] | |
| scenarios = [make_resource_scenario(idx, ftos, incidents) for idx in range(RESOURCE_SCENARIO_COUNT)] | |
| manifest = build_manifest(ftos, incidents, scenarios) | |
| dump_json(DATA_DIR / "fto_profiles.json", ftos) | |
| dump_json(DATA_DIR / "incident_reports.json", incidents) | |
| dump_json(DATA_DIR / "resource_scenarios.json", scenarios) | |
| dump_json(DATA_DIR / "source_catalog.json", SOURCE_CATALOG) | |
| dump_json(DATA_DIR / "corpus_manifest.json", manifest) | |
| recur_zero = sum(1 for item in incidents if item["recurrence_count"] == 0) | |
| recur_extreme = sum(1 for item in incidents if item["recurrence_count"] >= 13) | |
| critical_resolved = sum(1 for item in incidents if item["severity"] == "critical" and item["is_resolved"]) | |
| print("Avigilance 3.0 hybrid corpus generation complete.") | |
| print(f" Total records: {manifest['summary']['total_records']}") | |
| print(f" FTO profiles: {manifest['summary']['fto_profiles']}") | |
| print(f" Incidents: {manifest['summary']['incident_reports']}") | |
| print(f" Scenarios: {manifest['summary']['resource_scenarios']}") | |
| print(f" Airports: {manifest['summary']['unique_airports']} | Airlines: {manifest['summary']['unique_airlines']}") | |
| print(f" FTO grades: {manifest['distributions']['fto_grade_distribution']}") | |
| print(f" Incident sev: {manifest['distributions']['incident_severity_distribution']}") | |
| print(f" Recurrence=0: {recur_zero} | Extreme(>=13): {recur_extreme}") | |
| print(f" Critical+resolved: {critical_resolved}") | |
| if __name__ == "__main__": | |
| main() |