""" Intelligent Ticket Auto-Routing System — Hugging Face Spaces App ================================================================ Converts support tickets into structured routing decisions: • Multi-label tag classification • Department routing (hybrid: tag-voting + semantic similarity) • Priority prediction • Duplicate detection via FAISS """ import csv import os import time import uuid from datetime import datetime from pathlib import Path import faiss import gradio as gr import joblib import numpy as np import pandas as pd from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity # ── Paths ──────────────────────────────────────────────────────────────────── APP_DIR = Path(__file__).resolve().parent MODEL_DIR = APP_DIR / "Models" DATA_DIR = APP_DIR / "Datasets" import tempfile LOG_PATH = os.path.join(tempfile.gettempdir(), "routing_evaluation_log.csv") # ── Load Models ────────────────────────────────────────────────────────────── print("Loading SBERT model...") sbert = SentenceTransformer( "sentence-transformers/all-mpnet-base-v2", cache_folder="/data" ) print("Loading classifiers...") tag_model = joblib.load(MODEL_DIR / "sbert_classifier.pkl") tag_calibrators = joblib.load(MODEL_DIR / "tag_calibrators.pkl") priority_bundle = joblib.load(MODEL_DIR / "tuned_priority_model.pkl") priority_model = ( priority_bundle["model"] if isinstance(priority_bundle, dict) and "model" in priority_bundle else priority_bundle ) priority_encoder = joblib.load(MODEL_DIR / "priority_encoder.pkl") hf_scaler = joblib.load(MODEL_DIR / "hf_scaler.pkl") tag_binarizer = joblib.load(MODEL_DIR / "mlb_tag_binarizer.pkl") tag_list = list(tag_binarizer.classes_) dept_prototypes = joblib.load(MODEL_DIR / "department_prototypes.pkl") print(f"[OK] Tags: {len(tag_list)}, Departments: {len(dept_prototypes)}") # ── Load Dataset & Build FAISS Index ───────────────────────────────────────── print("Loading dataset and embeddings...") df = pd.read_csv(DATA_DIR / "Domain-A_Dataset_Clean.csv") embeddings = np.load(MODEL_DIR / "db_embeddings.npy").astype("float32") index = faiss.IndexFlatIP(embeddings.shape[1]) faiss.normalize_L2(embeddings) index.add(embeddings) print(f"[OK] FAISS index: {index.ntotal} vectors") # ── Duplicate Detection ────────────────────────────────────────────────────── DUP_THRESHOLD = 0.7623 submitted_texts = list(df["text"].astype(str).tolist()) def check_duplicate(query_emb): """Check if query is a duplicate of any ticket in the index.""" q = query_emb.astype("float32").reshape(1, -1).copy() faiss.normalize_L2(q) D, I = index.search(q, 20) best_idx = int(I[0][0]) best_score = float(D[0][0]) if best_score >= DUP_THRESHOLD: matched = ( submitted_texts[best_idx] if best_idx < len(submitted_texts) else "(unknown)" ) return True, matched, best_score return False, None, best_score def register_ticket(query_emb, text): """Add a new ticket to the FAISS index.""" v = query_emb.astype("float32").reshape(1, -1).copy() faiss.normalize_L2(v) index.add(v) submitted_texts.append(text) # ── Tag Prediction ─────────────────────────────────────────────────────────── def predict_tags(text, emb): raw_probs = np.asarray(tag_model.predict_proba([emb])[0], dtype=float) calibrated = np.array(raw_probs, dtype=float) for i, cal in enumerate(tag_calibrators): if cal is None: continue calibrated[i] = float( cal.predict(np.asarray([raw_probs[i]], dtype=float))[0] ) top_idx = calibrated.argsort()[-5:][::-1] return top_idx, calibrated[top_idx], calibrated # ── Priority Prediction ───────────────────────────────────────────────────── def extract_features(text): words = text.split() return [ len(text), len(words), len(set(words)) / (len(words) + 1), np.mean([len(w) for w in words]) if words else 0, sum(w in text.lower() for w in ["urgent", "critical", "down"]), sum(w in text.lower() for w in ["not", "cannot", "no"]), ] def predict_priority(text, emb): features = extract_features(text) features_scaled = hf_scaler.transform([features]) x = np.hstack([emb.reshape(1, -1), features_scaled]) pred_idx = int(priority_model.predict(x)[0]) return str(priority_encoder.classes_[pred_idx]) # ── Routing Engine ─────────────────────────────────────────────────────────── def route_ticket(emb, text): tag_idx, top_probs, all_probs = predict_tags(text, emb) vote_score = np.mean(top_probs) best_dept, best_sim = None, -1 for dept, proto in dept_prototypes.items(): sim = cosine_similarity([emb], [proto])[0][0] if sim > best_sim: best_sim = sim best_dept = dept hybrid = 0.7 * vote_score + 0.3 * best_sim threshold = np.clip( np.mean(all_probs) + np.std(all_probs), 0.45, 0.70 ) if hybrid >= threshold: mode, review = "AUTO_ROUTE", False elif vote_score >= 0.40 and hybrid >= 0.40: mode, review = "AUTO_ROUTE_VOTE", False elif best_sim >= 0.65: mode, review = "AUTO_ROUTE_SEMANTIC", False elif hybrid >= 0.30: mode, review = "AUTO_ROUTE_LOW_CONF", True else: mode, review = "HUMAN_REVIEW", True priority = predict_priority(text, emb) return mode, best_dept, priority, hybrid, review # ── Logging ────────────────────────────────────────────────────────────────── LOG_COLUMNS = [ "ticket_id", "timestamp", "ticket_text", "duplicate_flag", "duplicate_score", "routing_mode", "department", "department_confidence", "priority", "priority_confidence", "selected_tags", "routing_score", "prediction_latency_ms", "explanation", ] def _ensure_log_header(): if not os.path.exists(LOG_PATH): with open(LOG_PATH, "w", newline="", encoding="utf-8") as f: csv.writer(f).writerow(LOG_COLUMNS) def _append_log(row_dict): _ensure_log_header() with open(LOG_PATH, "a", newline="", encoding="utf-8") as f: csv.writer(f).writerow([row_dict.get(c, "") for c in LOG_COLUMNS]) # ── Main Processing Pipeline ──────────────────────────────────────────────── def process_ticket(text): t0 = time.time() ticket_id = str(uuid.uuid4())[:8] timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") emb = sbert.encode(text) # Duplicate detection is_dup, dup_text, dup_score = check_duplicate(emb) # Routing mode, dept, priority, conf, review = route_ticket(emb, text) latency_ms = round((time.time() - t0) * 1000, 2) # Tags for logging tag_idx, top_probs, _ = predict_tags(text, emb) tag_summary = ", ".join( f"{tag_list[idx]} ({top_probs[j]:.2f})" for j, idx in enumerate(tag_idx[:3]) ) if is_dup: routing_mode = "DUPLICATE_CHAIN" explanation = ( f"Duplicate detected (score={dup_score:.4f}). " f"Original: {str(dup_text)[:100]}" ) result = { "ticket_id": ticket_id, "status": "⚠️ DUPLICATE", "route": "DUPLICATE_CHAIN", "department": dept, "priority": priority, "confidence": round(float(dup_score), 3), "review": False, "tags": tag_summary, "message": f"Duplicate of: {str(dup_text)[:200]}", "latency": latency_ms, } else: routing_mode = mode explanation = ( f"Ticket routed to {dept} because predicted tags " f"[{tag_summary}] map to the {dept} department. " f"Routing mode: {mode}, Score: {conf:.3f}" ) result = { "ticket_id": ticket_id, "status": "✅ NOT DUPLICATE", "route": mode, "department": dept, "priority": priority, "confidence": round(float(conf), 3), "review": review, "tags": tag_summary, "message": "Ticket processed successfully", "latency": latency_ms, } # Register & log register_ticket(emb, text) _append_log({ "ticket_id": ticket_id, "timestamp": timestamp, "ticket_text": text, "duplicate_flag": is_dup, "duplicate_score": round(float(dup_score), 4), "routing_mode": routing_mode, "department": dept, "department_confidence": round(float(conf), 4), "priority": priority, "priority_confidence": "", "selected_tags": tag_summary, "routing_score": round(float(conf), 4), "prediction_latency_ms": latency_ms, "explanation": explanation, }) return result # ── Gradio UI Handler ──────────────────────────────────────────────────────── def ui_process(text): if not text or not text.strip(): return ( "⚠️ Please enter ticket text", "", "", "", "", "", "", "", "" ) r = process_ticket(text.strip()) # Confidence bar (visual) conf_pct = int(r["confidence"] * 100) # Review badge review_badge = "🔴 Yes — Manual review recommended" if r["review"] else "🟢 No" # Priority with emoji priority_map = { "critical": "🔴 Critical", "high": "🟠 High", "medium": "🟡 Medium", "low": "🟢 Low", } priority_display = priority_map.get( r["priority"].lower(), r["priority"] ) # Route mode with emoji route_map = { "AUTO_ROUTE": "⚡ Auto-Routed", "AUTO_ROUTE_VOTE": "⚡ Auto-Routed (Tag Vote)", "AUTO_ROUTE_SEMANTIC": "⚡ Auto-Routed (Semantic)", "AUTO_ROUTE_LOW_CONF": "⚠️ Auto-Routed (Low Confidence)", "HUMAN_REVIEW": "🧑💼 Human Review Required", "DUPLICATE_CHAIN": "🔗 Duplicate Chain", } route_display = route_map.get(r["route"], r["route"]) # Department display dept_display = r["department"].replace("_", " ") return ( r["status"], f"🎫 {r['ticket_id']}", route_display, f"🏢 {dept_display}", priority_display, f"{conf_pct}%", r["tags"], review_badge, r["message"], ) # ── Custom CSS ─────────────────────────────────────────────────────────────── CSS = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap'); * { font-family: 'Inter', sans-serif !important; } .gradio-container { max-width: 960px !important; margin: 0 auto !important; } /* Header */ .app-header { text-align: center; padding: 1.5rem 1rem; background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 50%, #a855f7 100%); border-radius: 16px; margin-bottom: 1.5rem; box-shadow: 0 8px 32px rgba(79, 70, 229, 0.3); } .app-header h1 { color: white !important; font-size: 1.75rem !important; font-weight: 700 !important; margin: 0 !important; letter-spacing: -0.02em; } .app-header p { color: rgba(255,255,255,0.85) !important; font-size: 0.95rem !important; margin: 0.4rem 0 0 0 !important; } /* Cards */ .result-card { background: linear-gradient(145deg, rgba(255,255,255,0.05), rgba(255,255,255,0.02)); border: 1px solid rgba(255,255,255,0.1); border-radius: 12px; padding: 0.25rem; } /* Status indicators */ .status-box textarea, .status-box input { font-weight: 600 !important; font-size: 1rem !important; } /* Submit button */ .submit-btn { background: linear-gradient(135deg, #4f46e5, #7c3aed) !important; border: none !important; color: white !important; font-weight: 600 !important; font-size: 1rem !important; padding: 0.75rem 2rem !important; border-radius: 10px !important; box-shadow: 0 4px 16px rgba(79, 70, 229, 0.4) !important; transition: all 0.3s ease !important; } .submit-btn:hover { transform: translateY(-2px) !important; box-shadow: 0 6px 24px rgba(79, 70, 229, 0.5) !important; } /* Clear button */ .clear-btn { border: 1px solid rgba(255,255,255,0.2) !important; border-radius: 10px !important; font-weight: 500 !important; } /* Stats footer */ .stats-row { text-align: center; padding: 0.75rem; background: rgba(79, 70, 229, 0.08); border-radius: 10px; margin-top: 0.5rem; font-size: 0.85rem; color: #a5b4fc; } footer { display: none !important; } """ # ── Example Tickets ────────────────────────────────────────────────────────── EXAMPLES = [ ["My laptop screen is flickering and sometimes goes completely black. I've tried restarting but the issue persists after login."], ["I cannot access the company VPN from my home network. It keeps showing authentication failed error even though my password is correct."], ["We need to upgrade our database server as the current one is running out of storage space and response times have increased significantly."], ["I was charged twice for my last month's subscription. Please process a refund for the duplicate charge."], ["The email server has been down since this morning. No one in the office can send or receive emails. This is critical!"], ["Can you provide training materials for the new CRM software that was deployed last week?"], ] # ── Build UI ───────────────────────────────────────────────────────────────── with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="indigo", neutral_hue="slate"), title="Ticket Auto-Routing System") as app: # Header gr.HTML("""
AI-powered ticket classification, routing, priority prediction & duplicate detection