Spaces:

rdelyon
/

aviation-report-classification

Sleeping

App Files Files Community

rdelyon commited on Apr 24

Commit

1f143ac

verified ·

1 Parent(s): 9a07fb5

Deploy FastAPI NER-ADREP classifier with fixed requirements

Browse files

Files changed (4) hide show

Dockerfile +13 -0
README.md +51 -5
app.py +361 -0
requirements_api.txt +8 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Install only what the inference API needs
+COPY requirements_api.txt .
+RUN pip install --no-cache-dir -r requirements_api.txt
+COPY app.py .
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,56 @@
 ---
 title: Aviation Report Classification
-emoji: 😻
-colorFrom: red
-colorTo: gray
 sdk: docker
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Aviation Report Classification
+emoji: ✈️
+colorFrom: blue
+colorTo: indigo
 sdk: docker
+app_port: 7860
+short_description: ADREP classification via NER (SafeAeroBERT)
 ---
+# Aviation ADREP Classification API
+FastAPI endpoint that classifies aviation incident narratives into ICAO ADREP occurrence categories using Named Entity Recognition with NASA SafeAeroBERT.
+## Pipeline
+```
+POST /predict
+  └── NER inference (theophilusowiti/asn-ner-aerobert)
+  └── Multi-word entity merging (B-/I- subword tokens)
+  └── ADREP keyword scoring (weighted by entity role)
+  └── Confidence normalisation → top-5 response
+```
+## Usage
+```bash
+curl -X POST https://rdelyon-aviation-report-classification.hf.space/predict \
+  -H "Content-Type: application/json" \
+  -d '{"narrative": "The aircraft experienced severe turbulence leading to cabin crew injury."}'
+```
+## Response Schema
+```json
+{
+  "model_id": "theophilusowiti/asn-ner-aerobert",
+  "display_name": "SafeAeroBERT NER + ADREP Classifier",
+  "prediction": {
+    "top_class": "TURB",
+    "confidence": 0.612,
+    "top_5": [
+      {"class": "TURB", "confidence": 0.612},
+      {"class": "CABIN", "confidence": 0.183},
+      {"class": "LOC-I", "confidence": 0.091},
+      {"class": "WSTRW", "confidence": 0.061},
+      {"class": "OTHR", "confidence": 0.011}
+    ]
+  },
+  "inference_time_ms": 312
+}
+```
+## Part of CMU 18-786 Deep Learning Group Project
+Model fine-tuned on aviation safety reports for NER-based ADREP classification.

app.py ADDED Viewed

	@@ -0,0 +1,361 @@

+# FastAPI endpoint for Aviation ADREP Classification
+# Pipeline: narrative -> NER (SafeAeroBERT) -> entity extraction -> ADREP scoring -> API response
+from fastapi import FastAPI
+from pydantic import BaseModel
+import torch
+from transformers import AutoTokenizer, AutoModelForTokenClassification
+import time
+from collections import defaultdict
+app = FastAPI()
+MODEL_ID = "theophilusowiti/asn-ner-aerobert"
+MODEL_DISPLAY_NAME = "SafeAeroBERT NER + ADREP Classifier"
+# Keywords matched by substring against extracted entity text.
+# TRIGGER entities are weighted 3x, so list the most discriminative TRIGGER phrases first.
+ADREP_KEYWORDS: dict = {
+    # Codes correspond to ECCAIRS Aviation 7.1.0.0 Attribute 430 "Occurrence category"
+    "AMAN": [
+        "abrupt manoeuvre", "abrupt maneuver", "evasive manoeuvre", "evasive maneuver",
+        "sudden pull", "sudden push", "rapid pitch", "avoidance manoeuvre",
+    ],
+    "ARC": [
+        "hard landing", "tail strike", "tailstrike", "nose gear collapse",
+        "gear up landing", "bounced", "firm touchdown", "rough landing",
+    ],
+    "ADRM": [
+        "aerodrome", "airport obstacle", "runway light", "taxiway sign",
+        "apron collision", "airport infrastructure",
+    ],
+    "ATM": [
+        "atc error", "air traffic control", "clearance error", "separation instruction",
+        "atc instruction", "controller error", "atm failure", "cns failure",
+    ],
+    "BIRD": [
+        "bird strike", "bird ingestion", "birdstrike", "bird impact",
+        "avian strike", "bird hit",
+    ],
+    "CABIN": [
+        "cabin injury", "passenger injury", "turbulence injury", "unsecured",
+        "galley fire", "cabin depressurisation", "cabin pressurisation",
+        "cabin crew injury", "oxygen mask deployed",
+    ],
+    "CFIT": [
+        "controlled flight into terrain", "struck trees", "struck high ground",
+        "hit terrain", "terrain impact", "ground impact", "terrain", "mountain",
+        "hill", "tree",
+    ],
+    "CTOL": [
+        "collision during takeoff", "collision during landing", "obstacle takeoff",
+        "obstacle landing", "struck obstacle", "hit obstacle on takeoff",
+        "hit obstacle on landing",
+    ],
+    "EVAC": [
+        "evacuation", "emergency evacuation", "evacuation slide", "passengers evacuated",
+        "rapid disembarkation", "cabin evacuation",
+    ],
+    "EXTL": [
+        "external load", "slung load", "underslung", "cargo net", "longline",
+        "load release", "load shift",
+    ],
+    "F-NI": [
+        "in-flight fire", "engine fire", "electrical fire", "cargo fire",
+        "smoke in cockpit", "smoke in cabin", "fumes", "fire warning",
+        "fire non-impact", "smoke", "fire",
+    ],
+    "F-POST": [
+        "post-crash fire", "post-impact fire", "fuel ignition after impact",
+        "post-accident fire",
+    ],
+    "FUEL": [
+        "fuel exhaustion", "fuel starvation", "fuel contamination", "fuel imbalance",
+        "low fuel", "exhaustion", "starvation", "fuel",
+    ],
+    "RAMP": [
+        "ground handling", "ramp incident", "towing accident", "pushback collision",
+        "baggage loader", "ground equipment", "jet bridge", "service vehicle",
+        "fuelling incident",
+    ],
+    "GCOL": [
+        "ground collision", "taxiway collision", "tug", "pushback",
+    ],
+    "GTOW": [
+        "glider tow", "aerotow", "tow rope", "glider towing",
+    ],
+    "ICE": [
+        "ice accretion", "ice ingestion", "icing", "frost", "frozen",
+        "deice", "anti-ice", "ice",
+    ],
+    "ISEC": [
+        "cyber attack", "information security", "gps spoofing", "gps jamming",
+        "data link compromise", "avionics cyber",
+    ],
+    "LALT": [
+        "low altitude", "low level flight", "below minimum altitude", "msaw",
+        "controlled flight low", "low flying",
+    ],
+    "LOC-G": [
+        "loss of control ground", "ground loss of control", "veer off ground",
+        "directional control loss", "ground loop", "skidded", "aquaplaning",
+    ],
+    "LOC-I": [
+        "loss of control", "departure from controlled flight", "unusual attitude",
+        "uncontrolled descent", "spiral dive", "pitch up", "stall", "upset", "spin",
+    ],
+    "LOLI": [
+        "loss of lifting conditions", "downdraft", "loss of lift en route",
+        "density altitude", "helicopter settling with power", "vortex ring",
+    ],
+    "MAC": [
+        "mid-air collision", "midair collision", "airprox", "near miss", "tcas",
+        "traffic alert", "loss of separation", "acas alert",
+    ],
+    "MED": [
+        "medical", "pilot incapacitation", "crew incapacitation", "heart attack",
+        "stroke", "medical emergency", "unconscious pilot",
+    ],
+    "NAV": [
+        "navigation error", "wrong runway", "off course", "navigational error",
+        "rnav error", "gps error", "position error", "wrong approach",
+        "flew to wrong airport",
+    ],
+    "RE": [
+        "runway excursion", "runway overrun", "overran runway", "veered off runway",
+        "skidded off runway", "overrun", "excursion",
+    ],
+    "RI": [
+        "runway incursion", "unauthorised runway entry", "runway occupied",
+        "aircraft on runway", "vehicle on runway", "person on runway",
+    ],
+    "SCF-NP": [
+        "gear failure", "gear collapse", "nose gear", "main gear", "landing gear",
+        "hydraulic failure", "hydraulic leak", "avionics failure", "flap failure",
+        "flight control failure", "structural failure", "electrical failure",
+        "hydraulic", "avionics", "flap", "rudder", "elevator", "aileron",
+    ],
+    "SCF-PP": [
+        "engine failure", "engine malfunction", "engine problem",
+        "engine separation", "engine shutdown", "engine surge", "power loss",
+        "flameout", "oil leak", "fuel leak", "compressor stall", "turbine failure",
+        "propeller failure", "rpm rollback", "powerplant", "turbine", "compressor",
+        "propeller", "engine",
+    ],
+    "SEC": [
+        "hijack", "hijacking", "air piracy", "security threat", "bomb threat",
+        "weapon", "unruly passenger", "assault", "attack",
+    ],
+    "TURB": [
+        "severe turbulence", "clear air turbulence", "wake turbulence",
+        "turbulence", "chop", "jolt",
+    ],
+    "UIMC": [
+        "inadvertent imc", "vfr into imc", "flew into cloud",
+        "instrument meteorological conditions", "unintended flight in imc", "imc",
+    ],
+    "UNK": [],
+    "USOS": [
+        "undershoot", "overshoot", "short landing", "long landing",
+        "landed short", "landed long", "threshold undershoot",
+    ],
+    "WILD": [
+        "wildlife strike", "animal strike", "wildlife", "animal",
+    ],
+    "WSTRW": [
+        "microburst", "downburst", "windshear", "wind shear", "thunderstorm",
+        "shear", "microburst encounter",
+    ],
+    "OTHR": [],
+}
+# Entity type scoring weights — TRIGGERs are primary accident drivers
+ENTITY_WEIGHTS = {
+    "TRIGGER": 3.0,
+    "OUTCOME": 2.0,
+    "SYSTEM":  1.5,
+    "PHASE":   0.5,
+    "ACTOR":   0.5,
+}
+ALL_ADREP_CODES = list(ADREP_KEYWORDS.keys())
+# Full descriptions from ECCAIRS Aviation 7.1.0.0 Attribute 430 "Occurrence category"
+ADREP_DESCRIPTIONS: dict = {
+    "AMAN":   "Abrupt Manoeuvre",
+    "ARC":    "Abnormal Runway Contact",
+    "ADRM":   "Aerodrome",
+    "ATM":    "ATM/CNS",
+    "BIRD":   "Birdstrike",
+    "CABIN":  "Cabin Safety Events",
+    "CFIT":   "Controlled Flight Into or Toward Terrain",
+    "CTOL":   "Collision with Obstacle(s) During Take-off and Landing",
+    "EVAC":   "Evacuation",
+    "EXTL":   "External Load Related Occurrences",
+    "F-NI":   "Fire/Smoke (Non-Impact)",
+    "F-POST": "Fire/Smoke (Post-Impact)",
+    "FUEL":   "Fuel Related",
+    "RAMP":   "Ground Handling",
+    "GCOL":   "Ground Collision",
+    "GTOW":   "Glider Towing Related Events",
+    "ICE":    "Icing",
+    "ISEC":   "Information Security Related",
+    "LALT":   "Low Altitude Operations",
+    "LOC-G":  "Loss of Control - Ground",
+    "LOC-I":  "Loss of Control - Inflight",
+    "LOLI":   "Loss of Lifting Conditions En-Route",
+    "MAC":    "Airprox/ACAS Alert/Loss of Separation/(Near) Midair Collision",
+    "MED":    "Medical",
+    "NAV":    "Navigation Error",
+    "RE":     "Runway Excursion",
+    "RI":     "Runway Incursion - Vehicle, Aircraft or Person",
+    "SCF-NP": "System/Component Failure or Malfunction (Non-Powerplant)",
+    "SCF-PP": "Powerplant Failure or Malfunction",
+    "SEC":    "Security Related",
+    "TURB":   "Turbulence Encounter",
+    "UIMC":   "Unintended Flight in IMC",
+    "UNK":    "Unknown or Undetermined",
+    "USOS":   "Undershoot/Overshoot",
+    "WILD":   "Collision with Wildlife",
+    "WSTRW":  "Windshear or Thunderstorm",
+    "OTHR":   "Other",
+}
+print("Loading NER model...")
+_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+_model = AutoModelForTokenClassification.from_pretrained(MODEL_ID)
+_model.eval()
+_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+_model.to(_device)
+print(f"Model loaded on {_device}.")
+class IncidentRequest(BaseModel):
+    narrative: str
+    event_id: str = None
+def extract_entities(text: str) -> list:
+    """Run NER inference; return [(token, label), ...] with subwords merged."""
+    inputs = _tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    inputs = {k: v.to(_device) for k, v in inputs.items()}
+    with torch.no_grad():
+        logits = _model(**inputs).logits
+    preds = torch.argmax(logits, dim=-1)[0].cpu().numpy()
+    tokens = _tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+    results = []
+    for token, label_id in zip(tokens, preds):
+        if token in _tokenizer.all_special_tokens:
+            continue
+        label = _model.config.id2label[label_id]
+        if token.startswith("##"):
+            if results:
+                results[-1] = (results[-1][0] + token[2:], results[-1][1])
+        else:
+            results.append((token, label))
+    return results
+def build_event_dict(entities: list) -> dict:
+    """
+    Merge consecutive B-/I- tokens into multi-word phrases.
+    Returns {"ACTOR": [...], "SYSTEM": [...], "PHASE": [...],
+             "TRIGGER": [...], "OUTCOME": [...]}
+    """
+    event = {role: [] for role in ENTITY_WEIGHTS}
+    current_tokens = []
+    current_role = None
+    for token, label in entities:
+        if label == "O":
+            if current_tokens and current_role:
+                event[current_role].append(" ".join(current_tokens))
+            current_tokens, current_role = [], None
+            continue
+        prefix, role = label.split("-", 1)
+        if role not in event:
+            continue
+        if prefix == "B":
+            if current_tokens and current_role:
+                event[current_role].append(" ".join(current_tokens))
+            current_tokens = [token]
+            current_role = role
+        elif prefix == "I" and role == current_role:
+            current_tokens.append(token)
+        else:
+            if current_tokens and current_role:
+                event[current_role].append(" ".join(current_tokens))
+            current_tokens = [token]
+            current_role = role
+    if current_tokens and current_role:
+        event[current_role].append(" ".join(current_tokens))
+    return event
+def score_adrep(event: dict) -> dict:
+    """
+    For each entity role, check which ADREP codes have a keyword that is a
+    substring of the extracted phrase. Accumulate weighted scores.
+    """
+    scores: dict = defaultdict(float)
+    for role, phrases in event.items():
+        weight = ENTITY_WEIGHTS.get(role, 1.0)
+        combined = " ".join(phrases).lower()
+        for code, keywords in ADREP_KEYWORDS.items():
+            for kw in keywords:
+                if kw in combined:
+                    scores[code] += weight
+                    break  # count each code once per entity type
+    if not scores:
+        scores["OTHR"] = 1.0
+    return dict(scores)
+def scores_to_top5(scores: dict) -> tuple:
+    """Normalise scores → confidences; return (top_class, confidence, top_5)."""
+    total = sum(scores.values())
+    normalised = {k: v / total for k, v in scores.items()}
+    # Assign a tiny residual to every code not already scored
+    unscored = [c for c in ALL_ADREP_CODES if c not in normalised]
+    residual = max((1.0 - sum(normalised.values())) / max(len(unscored), 1), 0.001)
+    for code in unscored:
+        normalised[code] = residual
+    sorted_codes = sorted(normalised.items(), key=lambda x: x[1], reverse=True)
+    top_code, top_conf = sorted_codes[0]
+    top_5 = [{"class": c, "confidence": round(conf, 4)} for c, conf in sorted_codes[:5]]
+    return top_code, round(top_conf, 4), top_5
+@app.post("/predict")
+async def predict(request: IncidentRequest):
+    start = time.time()
+    entities = extract_entities(request.narrative)
+    event = build_event_dict(entities)
+    scores = score_adrep(event)
+    top_code, confidence, top_5 = scores_to_top5(scores)
+    return {
+        "model_id": MODEL_ID,
+        "display_name": MODEL_DISPLAY_NAME,
+        "prediction": {
+            "top_class": ADREP_DESCRIPTIONS.get(top_code, top_code),
+            "confidence": confidence,
+            "top_5": top_5,
+        },
+        "inference_time_ms": int((time.time() - start) * 1000),
+    }

requirements_api.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi==0.115.0
+uvicorn==0.34.0
+transformers==4.47.0
+torch==2.6.0
+pydantic==2.7.0
+huggingface_hub==0.27.0
+safetensors==0.5.3
+tokenizers==0.21.0