Spaces:

Eklavya73
/

Intelligent_Ticket_Auto-Routing_System

Running

App Files Files Community

Eklavya73 commited on 5 days ago

Commit

3fbebe2

verified ·

1 Parent(s): 42b486e

Upload 13 files

Browse files

Files changed (14) hide show

.gitattributes +1 -0
.gitignore +5 -0
Datasets/Domain-A_Dataset_Clean.csv +3 -0
Models/db_embeddings.npy +3 -0
Models/department_prototypes.pkl +3 -0
Models/hf_scaler.pkl +3 -0
Models/mlb_tag_binarizer.pkl +3 -0
Models/priority_encoder.pkl +3 -0
Models/sbert_classifier.pkl +3 -0
Models/tag_calibrators.pkl +3 -0
Models/tuned_priority_model.pkl +3 -0
README.md +29 -6
app.py +492 -0
requirements.txt +9 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Datasets/Domain-A_Dataset_Clean.csv filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+__pycache__/
+*.pyc
+.gradio/
+*.log
+/tmp/

Datasets/Domain-A_Dataset_Clean.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a75145c90f0d6dad33433e7fa996dae9941da0cda2065b2015b327e368a19c91
+size 20014738

Models/db_embeddings.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e22107cb0072e10e333853ec7e3c4fc376b53334b39288d74d7e2fe150646cc
+size 135659648

Models/department_prototypes.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc14a6630d89e49213096a44ed0a5b80141849c32e84f6a797f7ca00e869bf51
+size 31682

Models/hf_scaler.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:78585451f4491cf6d972debc92abb9115a1e36cf656045f1749d08d828011371
+size 759

Models/mlb_tag_binarizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae4f9a6a39c6cd339154d5761b7d4d2d9cf9b2e9be0148bb4a95495a6e1a8057
+size 1774

Models/priority_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5cb01a7e21469b628fc277b2a60c89367615ef9199a15a270019f98eb8413f7f
+size 567

Models/sbert_classifier.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fcc0bcf076a4ef374c2e507c3680d349ae62933b3e00cead9fb2fdb9e294c00a
+size 22859821

Models/tag_calibrators.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d35216a0cda5ac83785f722581d63c085dc28e570dc957a025c54a16456c28f
+size 16329

Models/tuned_priority_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ffcb930933a60b6f7269efd5fa0d71a99567083cb986694f558bf4e37d2856d
+size 10923685

README.md CHANGED Viewed

@@ -1,13 +1,36 @@
 ---
 title: Intelligent Ticket Auto-Routing System
-emoji: 🐨
-colorFrom: gray
-colorTo: yellow
 sdk: gradio
-sdk_version: 6.10.0
 app_file: app.py
 pinned: false
-short_description: 'A Domain-Adaptive Multi-Label and  Duplicate- Aware Routing '
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Intelligent Ticket Auto-Routing System
+emoji: 🎫
+colorFrom: indigo
+colorTo: purple
 sdk: gradio
+sdk_version: 5.23.0
 app_file: app.py
 pinned: false
+license: mit
 ---
+# 🎫 Intelligent Ticket Auto-Routing System
+An AI-powered support ticket routing system that automatically:
+- **Classifies** tickets with multi-label tags
+- **Routes** them to the correct department
+- **Predicts** priority level
+- **Detects** duplicate tickets using FAISS semantic search
+## How It Works
+1. Enter a support ticket description
+2. The system encodes it using Sentence-BERT (`all-mpnet-base-v2`)
+3. A calibrated classifier predicts relevant tags
+4. Department routing uses a hybrid of tag-voting + semantic similarity to department prototypes
+5. Priority is predicted using text features + embeddings
+6. FAISS index checks for duplicate tickets in the database
+## Tech Stack
+- **Sentence-BERT** for semantic embeddings
+- **FAISS** for fast similarity search
+- **Scikit-learn** classifiers with isotonic calibration
+- **Gradio** for the interactive UI

app.py ADDED Viewed

	@@ -0,0 +1,492 @@

+"""
+Intelligent Ticket Auto-Routing System — Hugging Face Spaces App
+================================================================
+Converts support tickets into structured routing decisions:
+  • Multi-label tag classification
+  • Department routing (hybrid: tag-voting + semantic similarity)
+  • Priority prediction
+  • Duplicate detection via FAISS
+"""
+import csv
+import os
+import time
+import uuid
+from datetime import datetime
+from pathlib import Path
+import faiss
+import gradio as gr
+import joblib
+import numpy as np
+import pandas as pd
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+# ── Paths ────────────────────────────────────────────────────────────────────
+APP_DIR = Path(__file__).resolve().parent
+MODEL_DIR = APP_DIR / "Models"
+DATA_DIR = APP_DIR / "Datasets"
+import tempfile
+LOG_PATH = os.path.join(tempfile.gettempdir(), "routing_evaluation_log.csv")
+# ── Load Models ──────────────────────────────────────────────────────────────
+print("Loading SBERT model...")
+sbert = SentenceTransformer("all-mpnet-base-v2")
+print("Loading classifiers...")
+tag_model = joblib.load(MODEL_DIR / "sbert_classifier.pkl")
+tag_calibrators = joblib.load(MODEL_DIR / "tag_calibrators.pkl")
+priority_bundle = joblib.load(MODEL_DIR / "tuned_priority_model.pkl")
+priority_model = (
+    priority_bundle["model"]
+    if isinstance(priority_bundle, dict) and "model" in priority_bundle
+    else priority_bundle
+)
+priority_encoder = joblib.load(MODEL_DIR / "priority_encoder.pkl")
+hf_scaler = joblib.load(MODEL_DIR / "hf_scaler.pkl")
+tag_binarizer = joblib.load(MODEL_DIR / "mlb_tag_binarizer.pkl")
+tag_list = list(tag_binarizer.classes_)
+dept_prototypes = joblib.load(MODEL_DIR / "department_prototypes.pkl")
+print(f"[OK] Tags: {len(tag_list)}, Departments: {len(dept_prototypes)}")
+# ── Load Dataset & Build FAISS Index ─────────────────────────────────────────
+print("Loading dataset and embeddings...")
+df = pd.read_csv(DATA_DIR / "Domain-A_Dataset_Clean.csv")
+embeddings = np.load(MODEL_DIR / "db_embeddings.npy").astype("float32")
+index = faiss.IndexFlatIP(embeddings.shape[1])
+faiss.normalize_L2(embeddings)
+index.add(embeddings)
+print(f"[OK] FAISS index: {index.ntotal} vectors")
+# ── Duplicate Detection ──────────────────────────────────────────────────────
+DUP_THRESHOLD = 0.7623
+submitted_texts = list(df["text"].astype(str).tolist())
+def check_duplicate(query_emb):
+    """Check if query is a duplicate of any ticket in the index."""
+    q = query_emb.astype("float32").reshape(1, -1).copy()
+    faiss.normalize_L2(q)
+    D, I = index.search(q, 20)
+    best_idx = int(I[0][0])
+    best_score = float(D[0][0])
+    if best_score >= DUP_THRESHOLD:
+        matched = (
+            submitted_texts[best_idx]
+            if best_idx < len(submitted_texts)
+            else "(unknown)"
+        )
+        return True, matched, best_score
+    return False, None, best_score
+def register_ticket(query_emb, text):
+    """Add a new ticket to the FAISS index."""
+    v = query_emb.astype("float32").reshape(1, -1).copy()
+    faiss.normalize_L2(v)
+    index.add(v)
+    submitted_texts.append(text)
+# ── Tag Prediction ───────────────────────────────────────────────────────────
+def predict_tags(text, emb):
+    raw_probs = np.asarray(tag_model.predict_proba([emb])[0], dtype=float)
+    calibrated = np.array(raw_probs, dtype=float)
+    for i, cal in enumerate(tag_calibrators):
+        if cal is None:
+            continue
+        calibrated[i] = float(
+            cal.predict(np.asarray([raw_probs[i]], dtype=float))[0]
+        )
+    top_idx = calibrated.argsort()[-5:][::-1]
+    return top_idx, calibrated[top_idx], calibrated
+# ── Priority Prediction ─────────────────────────────────────────────────────
+def extract_features(text):
+    words = text.split()
+    return [
+        len(text),
+        len(words),
+        len(set(words)) / (len(words) + 1),
+        np.mean([len(w) for w in words]) if words else 0,
+        sum(w in text.lower() for w in ["urgent", "critical", "down"]),
+        sum(w in text.lower() for w in ["not", "cannot", "no"]),
+    ]
+def predict_priority(text, emb):
+    features = extract_features(text)
+    features_scaled = hf_scaler.transform([features])
+    x = np.hstack([emb.reshape(1, -1), features_scaled])
+    pred_idx = int(priority_model.predict(x)[0])
+    return str(priority_encoder.classes_[pred_idx])
+# ── Routing Engine ───────────────────────────────────────────────────────────
+def route_ticket(emb, text):
+    tag_idx, top_probs, all_probs = predict_tags(text, emb)
+    vote_score = np.mean(top_probs)
+    best_dept, best_sim = None, -1
+    for dept, proto in dept_prototypes.items():
+        sim = cosine_similarity([emb], [proto])[0][0]
+        if sim > best_sim:
+            best_sim = sim
+            best_dept = dept
+    hybrid = 0.7 * vote_score + 0.3 * best_sim
+    threshold = np.clip(
+        np.mean(all_probs) + np.std(all_probs), 0.45, 0.70
+    )
+    if hybrid >= threshold:
+        mode, review = "AUTO_ROUTE", False
+    elif vote_score >= 0.40 and hybrid >= 0.40:
+        mode, review = "AUTO_ROUTE_VOTE", False
+    elif best_sim >= 0.65:
+        mode, review = "AUTO_ROUTE_SEMANTIC", False
+    elif hybrid >= 0.30:
+        mode, review = "AUTO_ROUTE_LOW_CONF", True
+    else:
+        mode, review = "HUMAN_REVIEW", True
+    priority = predict_priority(text, emb)
+    return mode, best_dept, priority, hybrid, review
+# ── Logging ──────────────────────────────────────────────────────────────────
+LOG_COLUMNS = [
+    "ticket_id", "timestamp", "ticket_text", "duplicate_flag",
+    "duplicate_score", "routing_mode", "department",
+    "department_confidence", "priority", "priority_confidence",
+    "selected_tags", "routing_score", "prediction_latency_ms", "explanation",
+]
+def _ensure_log_header():
+    if not os.path.exists(LOG_PATH):
+        with open(LOG_PATH, "w", newline="", encoding="utf-8") as f:
+            csv.writer(f).writerow(LOG_COLUMNS)
+def _append_log(row_dict):
+    _ensure_log_header()
+    with open(LOG_PATH, "a", newline="", encoding="utf-8") as f:
+        csv.writer(f).writerow([row_dict.get(c, "") for c in LOG_COLUMNS])
+# ── Main Processing Pipeline ────────────────────────────────────────────────
+def process_ticket(text):
+    t0 = time.time()
+    ticket_id = str(uuid.uuid4())[:8]
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    emb = sbert.encode(text)
+    # Duplicate detection
+    is_dup, dup_text, dup_score = check_duplicate(emb)
+    # Routing
+    mode, dept, priority, conf, review = route_ticket(emb, text)
+    latency_ms = round((time.time() - t0) * 1000, 2)
+    # Tags for logging
+    tag_idx, top_probs, _ = predict_tags(text, emb)
+    tag_summary = ", ".join(
+        f"{tag_list[idx]} ({top_probs[j]:.2f})"
+        for j, idx in enumerate(tag_idx[:3])
+    )
+    if is_dup:
+        routing_mode = "DUPLICATE_CHAIN"
+        explanation = (
+            f"Duplicate detected (score={dup_score:.4f}). "
+            f"Original: {str(dup_text)[:100]}"
+        )
+        result = {
+            "ticket_id": ticket_id,
+            "status": "⚠️ DUPLICATE",
+            "route": "DUPLICATE_CHAIN",
+            "department": dept,
+            "priority": priority,
+            "confidence": round(float(dup_score), 3),
+            "review": False,
+            "tags": tag_summary,
+            "message": f"Duplicate of: {str(dup_text)[:200]}",
+            "latency": latency_ms,
+        }
+    else:
+        routing_mode = mode
+        explanation = (
+            f"Ticket routed to {dept} because predicted tags "
+            f"[{tag_summary}] map to the {dept} department. "
+            f"Routing mode: {mode}, Score: {conf:.3f}"
+        )
+        result = {
+            "ticket_id": ticket_id,
+            "status": "✅ NOT DUPLICATE",
+            "route": mode,
+            "department": dept,
+            "priority": priority,
+            "confidence": round(float(conf), 3),
+            "review": review,
+            "tags": tag_summary,
+            "message": "Ticket processed successfully",
+            "latency": latency_ms,
+        }
+    # Register & log
+    register_ticket(emb, text)
+    _append_log({
+        "ticket_id": ticket_id,
+        "timestamp": timestamp,
+        "ticket_text": text,
+        "duplicate_flag": is_dup,
+        "duplicate_score": round(float(dup_score), 4),
+        "routing_mode": routing_mode,
+        "department": dept,
+        "department_confidence": round(float(conf), 4),
+        "priority": priority,
+        "priority_confidence": "",
+        "selected_tags": tag_summary,
+        "routing_score": round(float(conf), 4),
+        "prediction_latency_ms": latency_ms,
+        "explanation": explanation,
+    })
+    return result
+# ── Gradio UI Handler ──────────────────────��─────────────────────────────────
+def ui_process(text):
+    if not text or not text.strip():
+        return (
+            "⚠️ Please enter ticket text",
+            "", "", "", "", "", "", "", ""
+        )
+    r = process_ticket(text.strip())
+    # Confidence bar (visual)
+    conf_pct = int(r["confidence"] * 100)
+    # Review badge
+    review_badge = "🔴 Yes — Manual review recommended" if r["review"] else "🟢 No"
+    # Priority with emoji
+    priority_map = {
+        "critical": "🔴 Critical",
+        "high": "🟠 High",
+        "medium": "🟡 Medium",
+        "low": "🟢 Low",
+    }
+    priority_display = priority_map.get(
+        r["priority"].lower(), r["priority"]
+    )
+    # Route mode with emoji
+    route_map = {
+        "AUTO_ROUTE": "⚡ Auto-Routed",
+        "AUTO_ROUTE_VOTE": "⚡ Auto-Routed (Tag Vote)",
+        "AUTO_ROUTE_SEMANTIC": "⚡ Auto-Routed (Semantic)",
+        "AUTO_ROUTE_LOW_CONF": "⚠️ Auto-Routed (Low Confidence)",
+        "HUMAN_REVIEW": "🧑‍💼 Human Review Required",
+        "DUPLICATE_CHAIN": "🔗 Duplicate Chain",
+    }
+    route_display = route_map.get(r["route"], r["route"])
+    # Department display
+    dept_display = r["department"].replace("_", " ")
+    return (
+        r["status"],
+        f"🎫 {r['ticket_id']}",
+        route_display,
+        f"🏢 {dept_display}",
+        priority_display,
+        f"{conf_pct}%",
+        r["tags"],
+        review_badge,
+        r["message"],
+    )
+# ── Custom CSS ───────────────────────────────────────────────────────────────
+CSS = """
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
+* { font-family: 'Inter', sans-serif !important; }
+.gradio-container {
+    max-width: 960px !important;
+    margin: 0 auto !important;
+}
+/* Header */
+.app-header {
+    text-align: center;
+    padding: 1.5rem 1rem;
+    background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 50%, #a855f7 100%);
+    border-radius: 16px;
+    margin-bottom: 1.5rem;
+    box-shadow: 0 8px 32px rgba(79, 70, 229, 0.3);
+}
+.app-header h1 {
+    color: white !important;
+    font-size: 1.75rem !important;
+    font-weight: 700 !important;
+    margin: 0 !important;
+    letter-spacing: -0.02em;
+}
+.app-header p {
+    color: rgba(255,255,255,0.85) !important;
+    font-size: 0.95rem !important;
+    margin: 0.4rem 0 0 0 !important;
+}
+/* Cards */
+.result-card {
+    background: linear-gradient(145deg, rgba(255,255,255,0.05), rgba(255,255,255,0.02));
+    border: 1px solid rgba(255,255,255,0.1);
+    border-radius: 12px;
+    padding: 0.25rem;
+}
+/* Status indicators */
+.status-box textarea, .status-box input {
+    font-weight: 600 !important;
+    font-size: 1rem !important;
+}
+/* Submit button */
+.submit-btn {
+    background: linear-gradient(135deg, #4f46e5, #7c3aed) !important;
+    border: none !important;
+    color: white !important;
+    font-weight: 600 !important;
+    font-size: 1rem !important;
+    padding: 0.75rem 2rem !important;
+    border-radius: 10px !important;
+    box-shadow: 0 4px 16px rgba(79, 70, 229, 0.4) !important;
+    transition: all 0.3s ease !important;
+}
+.submit-btn:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 6px 24px rgba(79, 70, 229, 0.5) !important;
+}
+/* Clear button */
+.clear-btn {
+    border: 1px solid rgba(255,255,255,0.2) !important;
+    border-radius: 10px !important;
+    font-weight: 500 !important;
+}
+/* Stats footer */
+.stats-row {
+    text-align: center;
+    padding: 0.75rem;
+    background: rgba(79, 70, 229, 0.08);
+    border-radius: 10px;
+    margin-top: 0.5rem;
+    font-size: 0.85rem;
+    color: #a5b4fc;
+}
+footer { display: none !important; }
+"""
+# ── Example Tickets ──────────────────────────────────────────────────────────
+EXAMPLES = [
+    ["My laptop screen is flickering and sometimes goes completely black. I've tried restarting but the issue persists after login."],
+    ["I cannot access the company VPN from my home network. It keeps showing authentication failed error even though my password is correct."],
+    ["We need to upgrade our database server as the current one is running out of storage space and response times have increased significantly."],
+    ["I was charged twice for my last month's subscription. Please process a refund for the duplicate charge."],
+    ["The email server has been down since this morning. No one in the office can send or receive emails. This is critical!"],
+    ["Can you provide training materials for the new CRM software that was deployed last week?"],
+]
+# ── Build UI ─────────────────────────────────��───────────────────────────────
+with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="indigo", neutral_hue="slate"), title="Ticket Auto-Routing System") as app:
+    # Header
+    gr.HTML("""
+        <div class="app-header">
+            <h1>🎫 Intelligent Ticket Auto-Routing System</h1>
+            <p>AI-powered ticket classification, routing, priority prediction & duplicate detection</p>
+        </div>
+    """)
+    with gr.Row():
+        # ── Left: Input ──
+        with gr.Column(scale=1):
+            ticket_input = gr.Textbox(
+                label="📝 Ticket Description",
+                placeholder="Describe the support issue in detail...",
+                lines=6,
+                max_lines=12,
+            )
+            with gr.Row():
+                submit_btn = gr.Button("🚀 Process Ticket", variant="primary", elem_classes=["submit-btn"])
+                clear_btn = gr.ClearButton(value="🗑️ Clear", elem_classes=["clear-btn"])
+            gr.Examples(
+                examples=EXAMPLES,
+                inputs=ticket_input,
+                label="💡 Try these examples",
+            )
+        # ── Right: Results ──
+        with gr.Column(scale=1):
+            with gr.Group(elem_classes=["result-card"]):
+                dup_status = gr.Textbox(label="🔍 Duplicate Status", interactive=False, elem_classes=["status-box"])
+                ticket_id = gr.Textbox(label="🆔 Ticket ID", interactive=False)
+            with gr.Group(elem_classes=["result-card"]):
+                with gr.Row():
+                    route_mode = gr.Textbox(label="🛤️ Routing Mode", interactive=False)
+                    department = gr.Textbox(label="🏢 Department", interactive=False)
+                with gr.Row():
+                    priority = gr.Textbox(label="⚡ Priority", interactive=False)
+                    confidence = gr.Textbox(label="📊 Confidence", interactive=False)
+            with gr.Group(elem_classes=["result-card"]):
+                tags = gr.Textbox(label="🏷️ Predicted Tags", interactive=False)
+                needs_review = gr.Textbox(label="👀 Needs Review", interactive=False)
+                message = gr.Textbox(label="💬 Details", interactive=False, lines=2)
+    gr.HTML(f"""
+        <div class="stats-row">
+            📊 Database: <strong>{index.ntotal:,}</strong> tickets indexed
+            &nbsp;•&nbsp;
+            🏷️ <strong>{len(tag_list)}</strong> tag categories
+            &nbsp;•&nbsp;
+            🏢 <strong>{len(dept_prototypes)}</strong> departments
+        </div>
+    """)
+    # ── Wire events ──
+    outputs = [dup_status, ticket_id, route_mode, department, priority, confidence, tags, needs_review, message]
+    submit_btn.click(fn=ui_process, inputs=ticket_input, outputs=outputs)
+    ticket_input.submit(fn=ui_process, inputs=ticket_input, outputs=outputs)
+    clear_btn.add([ticket_input] + outputs)
+# ── Launch ───────────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio>=5.0.0
+sentence-transformers>=2.2.0
+faiss-cpu>=1.7.0
+scikit-learn==1.5.1
+numpy>=1.24.0
+pandas>=2.0.0
+joblib>=1.3.0
+xgboost>=2.0.0
+lightgbm>=4.0.0