""" Support Ticket Classifier — Prediction Module Loads trained SetFit category model + calibration + keyword urgency rules. """ import re import pickle import numpy as np from setfit import SetFitModel # ── Text cleaning (must match Phase 1 exactly) ──────────────────────── def clean_text(text: str) -> str: if not isinstance(text, str): return "" text = text.lower() text = re.sub(r"\{[^}]+\}", "", text) text = re.sub(r"http\S+|www\.\S+", "", text) text = re.sub(r"\S+@\S+", "", text) text = re.sub(r"[^a-z0-9\s.,!?'\-]", " ", text) text = re.sub(r"\s+", " ", text) return text.strip() # ── Keyword-based urgency rules ─────────────────────────────────────── HIGH_KEYWORDS = [ "urgent", "asap", "immediately", "emergency", "critical", "can't access", "cannot access", "locked out", "account hacked", "charged twice", "double charged", "unauthorized charge", "service down", "not working", "completely broken", "data loss", "refund immediately", "cancel immediately", "fraud", "security breach", "down all day" ] LOW_KEYWORDS = [ "just wondering", "question about", "when will", "how do i", "could you explain", "i would like to know", "curious about", "general question", "update my", "change my" ] def get_urgency(text: str) -> str: t = text.lower() if any(kw in t for kw in HIGH_KEYWORDS): return "high" if any(kw in t for kw in LOW_KEYWORDS): return "low" return "medium" # ── Classifier (lazy-loaded singleton) ──────────────────────────────── class TicketClassifier: _instance = None def __new__(cls): if cls._instance is None: cls._instance = super().__new__(cls) cls._instance._loaded = False return cls._instance def load(self): if self._loaded: return print("Loading category model...") self.cat_model = SetFitModel.from_pretrained("category_model") print("Loading calibration...") with open("calibration.pkl", "rb") as f: self.platt_scaler = pickle.load(f) print("Loading label mappings...") with open("label_mappings.pkl", "rb") as f: mappings = pickle.load(f) self.cat_encoder = mappings["category"] self._loaded = True print("✅ Model ready.") def predict(self, ticket_text: str) -> dict: if not self._loaded: self.load() cleaned = clean_text(ticket_text) # Category + calibrated confidence raw_probs = np.array(self.cat_model.predict_proba([cleaned])) cal_probs = self.platt_scaler.predict_proba(raw_probs) cat_idx = int(np.argmax(cal_probs[0])) category = self.cat_encoder.inverse_transform([cat_idx])[0] confidence = round(float(cal_probs[0][cat_idx]), 4) # Urgency from keyword rules urgency = get_urgency(ticket_text) return { "category": category, "confidence": confidence, "urgency": urgency } # Convenience function _classifier = TicketClassifier() def predict_ticket(text: str) -> dict: return _classifier.predict(text) # Quick local test if __name__ == "__main__": samples = [ "I was charged twice and need a refund immediately", "How do I update my billing address?", "The app is completely broken, I cannot access my account", "Just wondering when my subscription renews", ] for s in samples: result = predict_ticket(s) print(f"\nTicket : {s}") print(f"Category : {result['category']} ({result['confidence']*100:.1f}%)") print(f"Urgency : {result['urgency']}")