faysal725's picture
folder is added
5a863b3 verified
Raw
History Blame Contribute Delete
3.94 kB
"""
Support Ticket Classifier β€” Prediction Module
Loads trained SetFit category model + calibration + keyword urgency rules.
"""
import re
import pickle
import numpy as np
from setfit import SetFitModel
# ── Text cleaning (must match Phase 1 exactly) ────────────────────────
def clean_text(text: str) -> str:
if not isinstance(text, str):
return ""
text = text.lower()
text = re.sub(r"\{[^}]+\}", "", text)
text = re.sub(r"http\S+|www\.\S+", "", text)
text = re.sub(r"\S+@\S+", "", text)
text = re.sub(r"[^a-z0-9\s.,!?'\-]", " ", text)
text = re.sub(r"\s+", " ", text)
return text.strip()
# ── Keyword-based urgency rules ───────────────────────────────────────
HIGH_KEYWORDS = [
"urgent", "asap", "immediately", "emergency", "critical",
"can't access", "cannot access", "locked out", "account hacked",
"charged twice", "double charged", "unauthorized charge",
"service down", "not working", "completely broken", "data loss",
"refund immediately", "cancel immediately", "fraud", "security breach",
"down all day"
]
LOW_KEYWORDS = [
"just wondering", "question about", "when will", "how do i",
"could you explain", "i would like to know", "curious about",
"general question", "update my", "change my"
]
def get_urgency(text: str) -> str:
t = text.lower()
if any(kw in t for kw in HIGH_KEYWORDS):
return "high"
if any(kw in t for kw in LOW_KEYWORDS):
return "low"
return "medium"
# ── Classifier (lazy-loaded singleton) ────────────────────────────────
class TicketClassifier:
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._loaded = False
return cls._instance
def load(self):
if self._loaded:
return
print("Loading category model...")
self.cat_model = SetFitModel.from_pretrained("category_model")
print("Loading calibration...")
with open("calibration.pkl", "rb") as f:
self.platt_scaler = pickle.load(f)
print("Loading label mappings...")
with open("label_mappings.pkl", "rb") as f:
mappings = pickle.load(f)
self.cat_encoder = mappings["category"]
self._loaded = True
print("βœ… Model ready.")
def predict(self, ticket_text: str) -> dict:
if not self._loaded:
self.load()
cleaned = clean_text(ticket_text)
# Category + calibrated confidence
raw_probs = np.array(self.cat_model.predict_proba([cleaned]))
cal_probs = self.platt_scaler.predict_proba(raw_probs)
cat_idx = int(np.argmax(cal_probs[0]))
category = self.cat_encoder.inverse_transform([cat_idx])[0]
confidence = round(float(cal_probs[0][cat_idx]), 4)
# Urgency from keyword rules
urgency = get_urgency(ticket_text)
return {
"category": category,
"confidence": confidence,
"urgency": urgency
}
# Convenience function
_classifier = TicketClassifier()
def predict_ticket(text: str) -> dict:
return _classifier.predict(text)
# Quick local test
if __name__ == "__main__":
samples = [
"I was charged twice and need a refund immediately",
"How do I update my billing address?",
"The app is completely broken, I cannot access my account",
"Just wondering when my subscription renews",
]
for s in samples:
result = predict_ticket(s)
print(f"\nTicket : {s}")
print(f"Category : {result['category']} ({result['confidence']*100:.1f}%)")
print(f"Urgency : {result['urgency']}")