Spaces:
Build error
Build error
| """ | |
| Subcategory Classification Service for Gapura AI | |
| Predicts Terminal_Area_Category and Apron_Area_Category from report text | |
| """ | |
| import os | |
| import logging | |
| import pickle | |
| from typing import List, Dict, Any, Optional, Tuple | |
| import re | |
| logger = logging.getLogger(__name__) | |
| class SubcategoryClassifier: | |
| """ | |
| Classifies reports into subcategories based on text content | |
| Terminal Area Categories: | |
| - Baggage/Special/Irregularities Handling | |
| - Passenger, Baggage & Document Profilling | |
| - Boarding Management | |
| - Procedure Competencies | |
| - Accuracy & Completeness of Service | |
| - Lack communication skills | |
| Apron Area Categories: | |
| - Procedure Competencies | |
| - Accurancy & Completeness of Service (Apron) | |
| - The Availability of GSE | |
| - Flight Document Handling | |
| - Preparation Before ETA | |
| - Safety Performance | |
| - Officer Competencies | |
| - Qualified Competencies (Apron) | |
| - Cleanliness of GSE | |
| - Prompt service and certainty | |
| """ | |
| TERMINAL_CATEGORIES = { | |
| "Baggage/Special/Irregularities Handling": [ | |
| "bagasi", | |
| "baggage", | |
| "baggages", | |
| "koper", | |
| "lost baggage", | |
| "missing baggage", | |
| "baggage claim", | |
| "baggage handling", | |
| "special handling", | |
| "irregularities", | |
| "torn", | |
| "dented", | |
| "wet", | |
| "damaged baggage", | |
| "delayed baggage", | |
| ], | |
| "Passenger, Baggage & Document Profilling": [ | |
| "passenger profiling", | |
| "document", | |
| "passport", | |
| "visa", | |
| "identification", | |
| "profilling", | |
| "screening", | |
| "security check", | |
| "dokumen", | |
| "penumpang", | |
| "wrong pax", | |
| "passenger wrong", | |
| "incorrect passenger", | |
| ], | |
| "Boarding Management": [ | |
| "boarding", | |
| "gate", | |
| "embarkation", | |
| "boarding pass", | |
| "boarding card", | |
| "gate closure", | |
| "late boarding", | |
| "boarding delay", | |
| "wrong gate", | |
| "naik pesawat", | |
| "pintu", | |
| "kapasitas", | |
| ], | |
| "Procedure Competencies": [ | |
| "procedure", | |
| "prosedur", | |
| "sop", | |
| "standard operating", | |
| "competency", | |
| "training", | |
| "skill", | |
| "knowledge", | |
| "tidak sesuai prosedur", | |
| "kelalaian", | |
| "negligence", | |
| "mistake", | |
| "human error", | |
| ], | |
| "Accuracy & Completeness of Service": [ | |
| "accuracy", | |
| "completeness", | |
| "kelengkapan", | |
| "ketepatan", | |
| "incomplete", | |
| "missing information", | |
| "wrong information", | |
| "data entry", | |
| "input error", | |
| "administrasi", | |
| "dokumentasi", | |
| "documentation", | |
| ], | |
| "Lack communication skills": [ | |
| "communication", | |
| "komunikasi", | |
| "language", | |
| "bahasa", | |
| "english", | |
| "tidak bisa komunikasi", | |
| "miscommunication", | |
| "tidak mengerti", | |
| "tidak paham", | |
| "instruction", | |
| "penjelasan", | |
| ], | |
| } | |
| APRON_CATEGORIES = { | |
| "Procedure Competencies": [ | |
| "procedure", | |
| "prosedur", | |
| "sop", | |
| "load sheet", | |
| "instruction", | |
| "loading instruction", | |
| "unloading", | |
| "bertentangan", | |
| "tidak sesuai", | |
| "mistake", | |
| "error", | |
| "human error", | |
| "kelalaian", | |
| ], | |
| "Accurancy & Completeness of Service (Apron)": [ | |
| "accuracy", | |
| "completeness", | |
| "wrong", | |
| "salah", | |
| "incorrect", | |
| "mismatch", | |
| "tidak sesuai", | |
| "different", | |
| "beda", | |
| "not match", | |
| "count", | |
| "jumlah", | |
| "pieces", | |
| "pcs", | |
| "weight", | |
| "berat", | |
| ], | |
| "The Availability of GSE": [ | |
| "gse", | |
| "ground support", | |
| "equipment", | |
| "forklift", | |
| "pallet", | |
| "container", | |
| "uld", | |
| "belt loader", | |
| "stairs", | |
| "towing", | |
| "peralatan", | |
| "alat", | |
| "broken", | |
| "rusak", | |
| "tidak tersedia", | |
| ], | |
| "Flight Document Handling": [ | |
| "document", | |
| "dokumen", | |
| "manifest", | |
| "load sheet", | |
| "awb", | |
| "air waybill", | |
| "flight plan", | |
| "notoc", | |
| "dangerous goods", | |
| "paperwork", | |
| "dokumentasi", | |
| " kelengkapan dokumen", | |
| ], | |
| "Preparation Before ETA": [ | |
| "preparation", | |
| "persiapan", | |
| "before eta", | |
| "standby", | |
| "ready", | |
| "belum siap", | |
| "not ready", | |
| "late preparation", | |
| "tardiness", | |
| "timing", | |
| "schedule", | |
| "jadwal", | |
| ], | |
| "Safety Performance": [ | |
| "safety", | |
| "keselamatan", | |
| "danger", | |
| "bahaya", | |
| "hazard", | |
| "incident", | |
| "accident", | |
| "injury", | |
| "cedera", | |
| "emergency", | |
| "unsafe", | |
| "risk", | |
| "potensi bahaya", | |
| ], | |
| "Officer Competencies": [ | |
| "officer", | |
| "petugas", | |
| "staff", | |
| "staffing", | |
| "competency", | |
| "skill", | |
| "kemampuan", | |
| "pengalaman", | |
| "experience", | |
| "training", | |
| "pelatihan", | |
| "certification", | |
| ], | |
| "Qualified Competencies (Apron)": [ | |
| "qualified", | |
| "certified", | |
| "bersertifikat", | |
| "license", | |
| "lisensi", | |
| "authorized", | |
| "terlatih", | |
| "trained", | |
| "qualification", | |
| ], | |
| "Cleanliness of GSE": [ | |
| "cleanliness", | |
| "kebersihan", | |
| "dirty", | |
| "kotor", | |
| "clean", | |
| "bersih", | |
| "hygiene", | |
| "sanitation", | |
| "maintain", | |
| ], | |
| "Prompt service and certainty": [ | |
| "prompt", | |
| "cepat", | |
| "quick", | |
| "fast", | |
| "slow", | |
| "lambat", | |
| "delay", | |
| "terlambat", | |
| "waiting time", | |
| "tunggu", | |
| "response time", | |
| ], | |
| } | |
| ISSUE_TYPE_MAPPING = { | |
| "Pax Handling": [ | |
| "Passenger, Baggage & Document Profilling", | |
| "Boarding Management", | |
| ], | |
| "Baggage Handling": ["Baggage/Special/Irregularities Handling"], | |
| "Cargo Problems": [ | |
| "Accurancy & Completeness of Service (Apron)", | |
| "The Availability of GSE", | |
| ], | |
| "GSE": ["The Availability of GSE", "Cleanliness of GSE"], | |
| "Operation": ["Procedure Competencies", "Preparation Before ETA"], | |
| "Flight Document Handling": ["Flight Document Handling"], | |
| "Procedure Competencies": ["Procedure Competencies", "Officer Competencies"], | |
| } | |
| def __init__(self): | |
| self.model = None | |
| self.vectorizer = None | |
| self.terminal_label_encoder = None | |
| self.apron_label_encoder = None | |
| self._load_model() | |
| def _load_model(self): | |
| """Load trained model if available""" | |
| base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| model_path = os.path.join(base_dir, "models", "subcategory", "classifier.pkl") | |
| if os.path.exists(model_path): | |
| try: | |
| with open(model_path, "rb") as f: | |
| model_data = pickle.load(f) | |
| self.model = model_data.get("model") | |
| self.vectorizer = model_data.get("vectorizer") | |
| self.terminal_label_encoder = model_data.get("terminal_encoder") | |
| self.apron_label_encoder = model_data.get("apron_encoder") | |
| logger.info("Subcategory classifier loaded") | |
| except Exception as e: | |
| logger.warning(f"Failed to load subcategory model: {e}") | |
| def classify( | |
| self, | |
| report: str, | |
| area: str = None, | |
| issue_type: str = None, | |
| root_cause: str = None, | |
| ) -> Dict[str, Any]: | |
| """ | |
| Classify report into subcategory | |
| Args: | |
| report: Report text | |
| area: Area type (Terminal Area / Apron Area / General) | |
| issue_type: Issue category | |
| root_cause: Root cause text | |
| Returns: | |
| Dict with predicted subcategory and confidence | |
| """ | |
| combined_text = f"{report} {root_cause or ''}".lower() | |
| if area == "Terminal Area": | |
| return self._classify_terminal(combined_text, issue_type) | |
| elif area == "Apron Area": | |
| return self._classify_apron(combined_text, issue_type) | |
| else: | |
| # Auto-detect area and classify | |
| terminal_result = self._classify_terminal(combined_text, issue_type) | |
| apron_result = self._classify_apron(combined_text, issue_type) | |
| if terminal_result["confidence"] > apron_result["confidence"]: | |
| return {**terminal_result, "detected_area": "Terminal Area"} | |
| else: | |
| return {**apron_result, "detected_area": "Apron Area"} | |
| def _classify_terminal(self, text: str, issue_type: str = None) -> Dict[str, Any]: | |
| """Classify into Terminal Area category""" | |
| scores = {} | |
| for category, keywords in self.TERMINAL_CATEGORIES.items(): | |
| score = sum(1 for kw in keywords if kw in text) | |
| # Boost score if issue type matches | |
| if issue_type and issue_type in self.ISSUE_TYPE_MAPPING: | |
| if category in self.ISSUE_TYPE_MAPPING[issue_type]: | |
| score += 2 | |
| scores[category] = score | |
| total = sum(scores.values()) or 1 | |
| if max(scores.values()) == 0: | |
| return { | |
| "subcategory": "Baggage/Special/Irregularities Handling", | |
| "confidence": 0.5, | |
| "all_scores": {k: round(v / total, 2) for k, v in scores.items()}, | |
| } | |
| best_category = max(scores, key=scores.get) | |
| best_score = scores[best_category] | |
| confidence = min(0.95, 0.5 + (best_score / total) * 0.45) | |
| return { | |
| "subcategory": best_category, | |
| "confidence": round(confidence, 2), | |
| "all_scores": {k: round(v / total, 2) for k, v in scores.items()}, | |
| } | |
| def _classify_apron(self, text: str, issue_type: str = None) -> Dict[str, Any]: | |
| """Classify into Apron Area category""" | |
| scores = {} | |
| for category, keywords in self.APRON_CATEGORIES.items(): | |
| score = sum(1 for kw in keywords if kw in text) | |
| if issue_type and issue_type in self.ISSUE_TYPE_MAPPING: | |
| if category in self.ISSUE_TYPE_MAPPING[issue_type]: | |
| score += 2 | |
| scores[category] = score | |
| total = sum(scores.values()) or 1 | |
| if max(scores.values()) == 0: | |
| return { | |
| "subcategory": "Procedure Competencies", | |
| "confidence": 0.5, | |
| "all_scores": {k: round(v / total, 2) for k, v in scores.items()}, | |
| } | |
| best_category = max(scores, key=scores.get) | |
| best_score = scores[best_category] | |
| confidence = min(0.95, 0.5 + (best_score / total) * 0.45) | |
| return { | |
| "subcategory": best_category, | |
| "confidence": round(confidence, 2), | |
| "all_scores": {k: round(v / total, 2) for k, v in scores.items()}, | |
| } | |
| def classify_batch(self, records: List[Dict]) -> List[Dict[str, Any]]: | |
| """Classify multiple records""" | |
| results = [] | |
| for record in records: | |
| result = self.classify( | |
| report=record.get("Report", ""), | |
| area=record.get("Area"), | |
| issue_type=record.get("Irregularity_Complain_Category"), | |
| root_cause=record.get("Root_Caused"), | |
| ) | |
| results.append(result) | |
| return results | |
| def get_available_categories(self, area: str = None) -> Dict[str, List[str]]: | |
| """Get list of available categories""" | |
| if area == "Terminal Area": | |
| return {"terminal": list(self.TERMINAL_CATEGORIES.keys())} | |
| elif area == "Apron Area": | |
| return {"apron": list(self.APRON_CATEGORIES.keys())} | |
| else: | |
| return { | |
| "terminal": list(self.TERMINAL_CATEGORIES.keys()), | |
| "apron": list(self.APRON_CATEGORIES.keys()), | |
| } | |
| _subcategory_classifier: Optional[SubcategoryClassifier] = None | |
| def get_subcategory_classifier() -> SubcategoryClassifier: | |
| """Get singleton subcategory classifier instance""" | |
| global _subcategory_classifier | |
| if _subcategory_classifier is None: | |
| _subcategory_classifier = SubcategoryClassifier() | |
| return _subcategory_classifier | |