Eklavya73's picture
Update app.py
e7ec69a verified
"""
Intelligent Ticket Auto-Routing System β€” Hugging Face Spaces App
================================================================
Converts support tickets into structured routing decisions:
β€’ Multi-label tag classification
β€’ Department routing (hybrid: tag-voting + semantic similarity)
β€’ Priority prediction
β€’ Duplicate detection via FAISS
"""
import csv
import os
import time
import uuid
from datetime import datetime
from pathlib import Path
import faiss
import gradio as gr
import joblib
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
# ── Paths ────────────────────────────────────────────────────────────────────
APP_DIR = Path(__file__).resolve().parent
MODEL_DIR = APP_DIR / "Models"
DATA_DIR = APP_DIR / "Datasets"
import tempfile
LOG_PATH = os.path.join(tempfile.gettempdir(), "routing_evaluation_log.csv")
# ── Load Models ──────────────────────────────────────────────────────────────
print("Loading SBERT model...")
sbert = SentenceTransformer(
"sentence-transformers/all-mpnet-base-v2",
cache_folder="/data"
)
print("Loading classifiers...")
tag_model = joblib.load(MODEL_DIR / "sbert_classifier.pkl")
tag_calibrators = joblib.load(MODEL_DIR / "tag_calibrators.pkl")
priority_bundle = joblib.load(MODEL_DIR / "tuned_priority_model.pkl")
priority_model = (
priority_bundle["model"]
if isinstance(priority_bundle, dict) and "model" in priority_bundle
else priority_bundle
)
priority_encoder = joblib.load(MODEL_DIR / "priority_encoder.pkl")
hf_scaler = joblib.load(MODEL_DIR / "hf_scaler.pkl")
tag_binarizer = joblib.load(MODEL_DIR / "mlb_tag_binarizer.pkl")
tag_list = list(tag_binarizer.classes_)
dept_prototypes = joblib.load(MODEL_DIR / "department_prototypes.pkl")
print(f"[OK] Tags: {len(tag_list)}, Departments: {len(dept_prototypes)}")
# ── Load Dataset & Build FAISS Index ─────────────────────────────────────────
print("Loading dataset and embeddings...")
df = pd.read_csv(DATA_DIR / "Domain-A_Dataset_Clean.csv")
embeddings = np.load(MODEL_DIR / "db_embeddings.npy").astype("float32")
index = faiss.IndexFlatIP(embeddings.shape[1])
faiss.normalize_L2(embeddings)
index.add(embeddings)
print(f"[OK] FAISS index: {index.ntotal} vectors")
# ── Duplicate Detection ──────────────────────────────────────────────────────
DUP_THRESHOLD = 0.7623
submitted_texts = list(df["text"].astype(str).tolist())
def check_duplicate(query_emb):
"""Check if query is a duplicate of any ticket in the index."""
q = query_emb.astype("float32").reshape(1, -1).copy()
faiss.normalize_L2(q)
D, I = index.search(q, 20)
best_idx = int(I[0][0])
best_score = float(D[0][0])
if best_score >= DUP_THRESHOLD:
matched = (
submitted_texts[best_idx]
if best_idx < len(submitted_texts)
else "(unknown)"
)
return True, matched, best_score
return False, None, best_score
def register_ticket(query_emb, text):
"""Add a new ticket to the FAISS index."""
v = query_emb.astype("float32").reshape(1, -1).copy()
faiss.normalize_L2(v)
index.add(v)
submitted_texts.append(text)
# ── Tag Prediction ───────────────────────────────────────────────────────────
def predict_tags(text, emb):
raw_probs = np.asarray(tag_model.predict_proba([emb])[0], dtype=float)
calibrated = np.array(raw_probs, dtype=float)
for i, cal in enumerate(tag_calibrators):
if cal is None:
continue
calibrated[i] = float(
cal.predict(np.asarray([raw_probs[i]], dtype=float))[0]
)
top_idx = calibrated.argsort()[-5:][::-1]
return top_idx, calibrated[top_idx], calibrated
# ── Priority Prediction ─────────────────────────────────────────────────────
def extract_features(text):
words = text.split()
return [
len(text),
len(words),
len(set(words)) / (len(words) + 1),
np.mean([len(w) for w in words]) if words else 0,
sum(w in text.lower() for w in ["urgent", "critical", "down"]),
sum(w in text.lower() for w in ["not", "cannot", "no"]),
]
def predict_priority(text, emb):
features = extract_features(text)
features_scaled = hf_scaler.transform([features])
x = np.hstack([emb.reshape(1, -1), features_scaled])
pred_idx = int(priority_model.predict(x)[0])
return str(priority_encoder.classes_[pred_idx])
# ── Routing Engine ───────────────────────────────────────────────────────────
def route_ticket(emb, text):
tag_idx, top_probs, all_probs = predict_tags(text, emb)
vote_score = np.mean(top_probs)
best_dept, best_sim = None, -1
for dept, proto in dept_prototypes.items():
sim = cosine_similarity([emb], [proto])[0][0]
if sim > best_sim:
best_sim = sim
best_dept = dept
hybrid = 0.7 * vote_score + 0.3 * best_sim
threshold = np.clip(
np.mean(all_probs) + np.std(all_probs), 0.45, 0.70
)
if hybrid >= threshold:
mode, review = "AUTO_ROUTE", False
elif vote_score >= 0.40 and hybrid >= 0.40:
mode, review = "AUTO_ROUTE_VOTE", False
elif best_sim >= 0.65:
mode, review = "AUTO_ROUTE_SEMANTIC", False
elif hybrid >= 0.30:
mode, review = "AUTO_ROUTE_LOW_CONF", True
else:
mode, review = "HUMAN_REVIEW", True
priority = predict_priority(text, emb)
return mode, best_dept, priority, hybrid, review
# ── Logging ──────────────────────────────────────────────────────────────────
LOG_COLUMNS = [
"ticket_id", "timestamp", "ticket_text", "duplicate_flag",
"duplicate_score", "routing_mode", "department",
"department_confidence", "priority", "priority_confidence",
"selected_tags", "routing_score", "prediction_latency_ms", "explanation",
]
def _ensure_log_header():
if not os.path.exists(LOG_PATH):
with open(LOG_PATH, "w", newline="", encoding="utf-8") as f:
csv.writer(f).writerow(LOG_COLUMNS)
def _append_log(row_dict):
_ensure_log_header()
with open(LOG_PATH, "a", newline="", encoding="utf-8") as f:
csv.writer(f).writerow([row_dict.get(c, "") for c in LOG_COLUMNS])
# ── Main Processing Pipeline ────────────────────────────────────────────────
def process_ticket(text):
t0 = time.time()
ticket_id = str(uuid.uuid4())[:8]
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
emb = sbert.encode(text)
# Duplicate detection
is_dup, dup_text, dup_score = check_duplicate(emb)
# Routing
mode, dept, priority, conf, review = route_ticket(emb, text)
latency_ms = round((time.time() - t0) * 1000, 2)
# Tags for logging
tag_idx, top_probs, _ = predict_tags(text, emb)
tag_summary = ", ".join(
f"{tag_list[idx]} ({top_probs[j]:.2f})"
for j, idx in enumerate(tag_idx[:3])
)
if is_dup:
routing_mode = "DUPLICATE_CHAIN"
explanation = (
f"Duplicate detected (score={dup_score:.4f}). "
f"Original: {str(dup_text)[:100]}"
)
result = {
"ticket_id": ticket_id,
"status": "⚠️ DUPLICATE",
"route": "DUPLICATE_CHAIN",
"department": dept,
"priority": priority,
"confidence": round(float(dup_score), 3),
"review": False,
"tags": tag_summary,
"message": f"Duplicate of: {str(dup_text)[:200]}",
"latency": latency_ms,
}
else:
routing_mode = mode
explanation = (
f"Ticket routed to {dept} because predicted tags "
f"[{tag_summary}] map to the {dept} department. "
f"Routing mode: {mode}, Score: {conf:.3f}"
)
result = {
"ticket_id": ticket_id,
"status": "βœ… NOT DUPLICATE",
"route": mode,
"department": dept,
"priority": priority,
"confidence": round(float(conf), 3),
"review": review,
"tags": tag_summary,
"message": "Ticket processed successfully",
"latency": latency_ms,
}
# Register & log
register_ticket(emb, text)
_append_log({
"ticket_id": ticket_id,
"timestamp": timestamp,
"ticket_text": text,
"duplicate_flag": is_dup,
"duplicate_score": round(float(dup_score), 4),
"routing_mode": routing_mode,
"department": dept,
"department_confidence": round(float(conf), 4),
"priority": priority,
"priority_confidence": "",
"selected_tags": tag_summary,
"routing_score": round(float(conf), 4),
"prediction_latency_ms": latency_ms,
"explanation": explanation,
})
return result
# ── Gradio UI Handler ────────────────────────────────────────────────────────
def ui_process(text):
if not text or not text.strip():
return (
"⚠️ Please enter ticket text",
"", "", "", "", "", "", "", ""
)
r = process_ticket(text.strip())
# Confidence bar (visual)
conf_pct = int(r["confidence"] * 100)
# Review badge
review_badge = "πŸ”΄ Yes β€” Manual review recommended" if r["review"] else "🟒 No"
# Priority with emoji
priority_map = {
"critical": "πŸ”΄ Critical",
"high": "🟠 High",
"medium": "🟑 Medium",
"low": "🟒 Low",
}
priority_display = priority_map.get(
r["priority"].lower(), r["priority"]
)
# Route mode with emoji
route_map = {
"AUTO_ROUTE": "⚑ Auto-Routed",
"AUTO_ROUTE_VOTE": "⚑ Auto-Routed (Tag Vote)",
"AUTO_ROUTE_SEMANTIC": "⚑ Auto-Routed (Semantic)",
"AUTO_ROUTE_LOW_CONF": "⚠️ Auto-Routed (Low Confidence)",
"HUMAN_REVIEW": "πŸ§‘β€πŸ’Ό Human Review Required",
"DUPLICATE_CHAIN": "πŸ”— Duplicate Chain",
}
route_display = route_map.get(r["route"], r["route"])
# Department display
dept_display = r["department"].replace("_", " ")
return (
r["status"],
f"🎫 {r['ticket_id']}",
route_display,
f"🏒 {dept_display}",
priority_display,
f"{conf_pct}%",
r["tags"],
review_badge,
r["message"],
)
# ── Custom CSS ───────────────────────────────────────────────────────────────
CSS = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
* { font-family: 'Inter', sans-serif !important; }
.gradio-container {
max-width: 960px !important;
margin: 0 auto !important;
}
/* Header */
.app-header {
text-align: center;
padding: 1.5rem 1rem;
background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 50%, #a855f7 100%);
border-radius: 16px;
margin-bottom: 1.5rem;
box-shadow: 0 8px 32px rgba(79, 70, 229, 0.3);
}
.app-header h1 {
color: white !important;
font-size: 1.75rem !important;
font-weight: 700 !important;
margin: 0 !important;
letter-spacing: -0.02em;
}
.app-header p {
color: rgba(255,255,255,0.85) !important;
font-size: 0.95rem !important;
margin: 0.4rem 0 0 0 !important;
}
/* Cards */
.result-card {
background: linear-gradient(145deg, rgba(255,255,255,0.05), rgba(255,255,255,0.02));
border: 1px solid rgba(255,255,255,0.1);
border-radius: 12px;
padding: 0.25rem;
}
/* Status indicators */
.status-box textarea, .status-box input {
font-weight: 600 !important;
font-size: 1rem !important;
}
/* Submit button */
.submit-btn {
background: linear-gradient(135deg, #4f46e5, #7c3aed) !important;
border: none !important;
color: white !important;
font-weight: 600 !important;
font-size: 1rem !important;
padding: 0.75rem 2rem !important;
border-radius: 10px !important;
box-shadow: 0 4px 16px rgba(79, 70, 229, 0.4) !important;
transition: all 0.3s ease !important;
}
.submit-btn:hover {
transform: translateY(-2px) !important;
box-shadow: 0 6px 24px rgba(79, 70, 229, 0.5) !important;
}
/* Clear button */
.clear-btn {
border: 1px solid rgba(255,255,255,0.2) !important;
border-radius: 10px !important;
font-weight: 500 !important;
}
/* Stats footer */
.stats-row {
text-align: center;
padding: 0.75rem;
background: rgba(79, 70, 229, 0.08);
border-radius: 10px;
margin-top: 0.5rem;
font-size: 0.85rem;
color: #a5b4fc;
}
footer { display: none !important; }
"""
# ── Example Tickets ──────────────────────────────────────────────────────────
EXAMPLES = [
["My laptop screen is flickering and sometimes goes completely black. I've tried restarting but the issue persists after login."],
["I cannot access the company VPN from my home network. It keeps showing authentication failed error even though my password is correct."],
["We need to upgrade our database server as the current one is running out of storage space and response times have increased significantly."],
["I was charged twice for my last month's subscription. Please process a refund for the duplicate charge."],
["The email server has been down since this morning. No one in the office can send or receive emails. This is critical!"],
["Can you provide training materials for the new CRM software that was deployed last week?"],
]
# ── Build UI ─────────────────────────────────────────────────────────────────
with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="indigo", neutral_hue="slate"), title="Ticket Auto-Routing System") as app:
# Header
gr.HTML("""
<div class="app-header">
<h1>🎫 Intelligent Ticket Auto-Routing System</h1>
<p>AI-powered ticket classification, routing, priority prediction & duplicate detection</p>
</div>
""")
with gr.Row():
# ── Left: Input ──
with gr.Column(scale=1):
ticket_input = gr.Textbox(
label="πŸ“ Ticket Description",
placeholder="Describe the support issue in detail...",
lines=6,
max_lines=12,
)
with gr.Row():
submit_btn = gr.Button("πŸš€ Process Ticket", variant="primary", elem_classes=["submit-btn"])
clear_btn = gr.ClearButton(value="πŸ—‘οΈ Clear", elem_classes=["clear-btn"])
gr.Examples(
examples=EXAMPLES,
inputs=ticket_input,
label="πŸ’‘ Try these examples",
)
# ── Right: Results ──
with gr.Column(scale=1):
with gr.Group(elem_classes=["result-card"]):
dup_status = gr.Textbox(label="πŸ” Duplicate Status", interactive=False, elem_classes=["status-box"])
ticket_id = gr.Textbox(label="πŸ†” Ticket ID", interactive=False)
with gr.Group(elem_classes=["result-card"]):
with gr.Row():
route_mode = gr.Textbox(label="πŸ›€οΈ Routing Mode", interactive=False)
department = gr.Textbox(label="🏒 Department", interactive=False)
with gr.Row():
priority = gr.Textbox(label="⚑ Priority", interactive=False)
confidence = gr.Textbox(label="πŸ“Š Confidence", interactive=False)
with gr.Group(elem_classes=["result-card"]):
tags = gr.Textbox(label="🏷️ Predicted Tags", interactive=False)
needs_review = gr.Textbox(label="πŸ‘€ Needs Review", interactive=False)
message = gr.Textbox(label="πŸ’¬ Details", interactive=False, lines=2)
gr.HTML(f"""
<div class="stats-row">
πŸ“Š Database: <strong>{index.ntotal:,}</strong> tickets indexed
&nbsp;β€’&nbsp;
🏷️ <strong>{len(tag_list)}</strong> tag categories
&nbsp;β€’&nbsp;
🏒 <strong>{len(dept_prototypes)}</strong> departments
</div>
""")
# ── Wire events ──
outputs = [dup_status, ticket_id, route_mode, department, priority, confidence, tags, needs_review, message]
submit_btn.click(fn=ui_process, inputs=ticket_input, outputs=outputs)
ticket_input.submit(fn=ui_process, inputs=ticket_input, outputs=outputs)
clear_btn.add([ticket_input] + outputs)
# ── Launch ───────────────────────────────────────────────────────────────────
if __name__ == "__main__":
app.launch(server_name="0.0.0.0", server_port=7860)