TuShar2309 commited on
Commit
7d1de06
Β·
verified Β·
1 Parent(s): bf61ac4

Updated File

Browse files
Files changed (1) hide show
  1. app.py +257 -256
app.py CHANGED
@@ -1,256 +1,257 @@
1
- """
2
- IT Ticket Classifier - HuggingFace Spaces App
3
- Gradio interface for classifying IT support tickets
4
- """
5
-
6
- import gradio as gr
7
- import torch
8
- import torch.nn as nn
9
- from transformers import DistilBertModel, AutoTokenizer
10
- from huggingface_hub import hf_hub_download
11
- import re
12
- import os
13
-
14
- # Configuration
15
- HF_REPO_ID = "TuShar2309/ticket-classifier"
16
- MODEL_FILENAME = "ticket_classifier.pt"
17
-
18
- CLASS_NAMES = [
19
- "Access Management", "Backup", "Database", "Email",
20
- "General Inquiry", "Hardware", "Network", "Other",
21
- "Printing", "Security", "Software", "Storage"
22
- ]
23
-
24
- # Category descriptions for display
25
- CATEGORY_INFO = {
26
- "Access Management": "πŸ” Login, permissions, MFA, account issues",
27
- "Backup": "πŸ’Ύ Backup and restore operations",
28
- "Database": "πŸ—„οΈ SQL, database connectivity, queries",
29
- "Email": "πŸ“§ Outlook, calendar, mailbox issues",
30
- "General Inquiry": "❓ How-to questions, policies",
31
- "Hardware": "πŸ’» Laptop, monitor, keyboard, mouse",
32
- "Network": "🌐 WiFi, VPN, internet connectivity",
33
- "Other": "πŸ“‹ Miscellaneous requests",
34
- "Printing": "πŸ–¨οΈ Printers, scanning, print queue",
35
- "Security": "πŸ”’ Threats, malware, security incidents",
36
- "Software": "πŸ“¦ Application issues, installations",
37
- "Storage": "πŸ“ OneDrive, SharePoint, file storage"
38
- }
39
-
40
-
41
- class TicketPreprocessor:
42
- def __init__(self):
43
- self._email = re.compile(r'\b[\w.-]+@[\w.-]+\.\w+\b')
44
-
45
- def clean(self, text):
46
- return ' '.join(self._email.sub('[EMAIL]', str(text or '')).lower().split())
47
-
48
- def combine(self, subject, description):
49
- return f"[SUBJECT] {self.clean(subject)} [SEP] [DESCRIPTION] {self.clean(description)}"
50
-
51
-
52
- class TicketClassifier(nn.Module):
53
- def __init__(self, num_classes, model_name="distilbert-base-uncased", dropout=0.3):
54
- super().__init__()
55
- self.bert = DistilBertModel.from_pretrained(model_name)
56
- self.classifier = nn.Sequential(
57
- nn.Dropout(dropout),
58
- nn.Linear(768, 256),
59
- nn.GELU(),
60
- nn.Dropout(dropout),
61
- nn.Linear(256, num_classes)
62
- )
63
-
64
- def forward(self, input_ids, attention_mask):
65
- outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
66
- return self.classifier(outputs.last_hidden_state[:, 0, :])
67
-
68
- def predict_proba(self, input_ids, attention_mask):
69
- logits = self.forward(input_ids, attention_mask)
70
- return torch.softmax(logits, dim=-1)
71
-
72
-
73
- # Load model
74
- print("Loading model...")
75
- device = "cuda" if torch.cuda.is_available() else "cpu"
76
- print(f"Device: {device}")
77
-
78
- try:
79
- model_path = hf_hub_download(repo_id=HF_REPO_ID, filename=MODEL_FILENAME)
80
- print(f"Model downloaded: {model_path}")
81
-
82
- tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
83
- model = TicketClassifier(num_classes=len(CLASS_NAMES))
84
-
85
- checkpoint = torch.load(model_path, map_location=device)
86
- if 'model_state_dict' in checkpoint:
87
- model.load_state_dict(checkpoint['model_state_dict'])
88
- else:
89
- model.load_state_dict(checkpoint)
90
-
91
- model.to(device)
92
- model.eval()
93
- MODEL_LOADED = True
94
- print("Model loaded successfully!")
95
- except Exception as e:
96
- print(f"Error loading model: {e}")
97
- MODEL_LOADED = False
98
-
99
- preprocessor = TicketPreprocessor()
100
-
101
-
102
- def classify_ticket(subject, description):
103
- """Classify a ticket and return results."""
104
- if not subject and not description:
105
- return "⚠️ Please enter a subject or description", "", ""
106
-
107
- if not MODEL_LOADED:
108
- return "❌ Model not loaded", "", ""
109
-
110
- try:
111
- # Preprocess and tokenize
112
- combined = preprocessor.combine(subject, description)
113
- inputs = tokenizer(
114
- combined,
115
- return_tensors="pt",
116
- truncation=True,
117
- max_length=256,
118
- padding='max_length'
119
- ).to(device)
120
-
121
- # Predict
122
- with torch.no_grad():
123
- probs = model.predict_proba(inputs['input_ids'], inputs['attention_mask'])[0]
124
-
125
- probs_np = probs.cpu().numpy()
126
- top_indices = probs_np.argsort()[::-1]
127
-
128
- # Primary prediction
129
- primary_idx = top_indices[0]
130
- primary_cat = CLASS_NAMES[primary_idx]
131
- primary_conf = probs_np[primary_idx] * 100
132
-
133
- # Status
134
- if primary_conf >= 80:
135
- status = "βœ… **High Confidence** - Auto-route recommended"
136
- elif primary_conf >= 60:
137
- status = "⚠️ **Medium Confidence** - Review suggested"
138
- else:
139
- status = "πŸ” **Low Confidence** - Human review required"
140
-
141
- # Format primary result
142
- primary_result = f"""
143
- ## {CATEGORY_INFO.get(primary_cat, primary_cat)}
144
-
145
- ### Predicted Category: **{primary_cat}**
146
- ### Confidence: **{primary_conf:.1f}%**
147
-
148
- {status}
149
- """
150
-
151
- # Format alternatives
152
- alternatives = "### Other Possibilities:\n\n"
153
- for i in range(1, min(4, len(top_indices))):
154
- idx = top_indices[i]
155
- cat = CLASS_NAMES[idx]
156
- conf = probs_np[idx] * 100
157
- alternatives += f"- **{cat}**: {conf:.1f}%\n"
158
-
159
- # Confidence bar
160
- conf_display = f"{'β–ˆ' * int(primary_conf / 5)}{'β–‘' * (20 - int(primary_conf / 5))} {primary_conf:.1f}%"
161
-
162
- return primary_result, alternatives, conf_display
163
-
164
- except Exception as e:
165
- return f"❌ Error: {str(e)}", "", ""
166
-
167
-
168
- # Example tickets
169
- examples = [
170
- ["VPN not connecting", "Cannot connect to corporate VPN from home, getting timeout error"],
171
- ["Suspicious email received", "Got an email asking for my password, looks like phishing"],
172
- ["Need SharePoint access", "Just joined the marketing team, need access to the team SharePoint"],
173
- ["Laptop screen flickering", "My laptop screen has been flickering intermittently since yesterday"],
174
- ["Outlook not receiving emails", "Haven't received any emails in Outlook for the past 3 hours"],
175
- ["How to reset password", "What is the process to reset my Active Directory password?"],
176
- ["Printer not working", "Print jobs stuck in queue and won't print"],
177
- ["SQL query slow", "Database query that used to take 2 seconds now takes 10 minutes"],
178
- ]
179
-
180
-
181
- # Create Gradio interface
182
- with gr.Blocks(
183
- title="IT Ticket Classifier",
184
- theme=gr.themes.Soft(primary_hue="green", secondary_hue="blue"),
185
- css="""
186
- .gradio-container { max-width: 900px !important; }
187
- .primary-result { font-size: 1.2em; }
188
- """
189
- ) as demo:
190
- gr.Markdown("""
191
- # 🎫 IT Service Desk Ticket Classifier
192
-
193
- **Powered by DistilBERT** | Classifies tickets into 12 IT support categories
194
-
195
- Enter a ticket subject and description below to get the predicted category.
196
- """)
197
-
198
- with gr.Row():
199
- with gr.Column(scale=1):
200
- subject_input = gr.Textbox(
201
- label="πŸ“‹ Ticket Subject",
202
- placeholder="e.g., VPN not connecting",
203
- lines=1
204
- )
205
- description_input = gr.Textbox(
206
- label="πŸ“ Ticket Description",
207
- placeholder="e.g., Cannot connect to corporate VPN from home, getting timeout error after 30 seconds...",
208
- lines=4
209
- )
210
- classify_btn = gr.Button("πŸ” Classify Ticket", variant="primary", size="lg")
211
-
212
- with gr.Column(scale=1):
213
- primary_output = gr.Markdown(label="Primary Prediction")
214
- confidence_output = gr.Textbox(label="Confidence", interactive=False)
215
- alternatives_output = gr.Markdown(label="Alternatives")
216
-
217
- classify_btn.click(
218
- fn=classify_ticket,
219
- inputs=[subject_input, description_input],
220
- outputs=[primary_output, alternatives_output, confidence_output]
221
- )
222
-
223
- gr.Examples(
224
- examples=examples,
225
- inputs=[subject_input, description_input],
226
- outputs=[primary_output, alternatives_output, confidence_output],
227
- fn=classify_ticket,
228
- cache_examples=False
229
- )
230
-
231
- gr.Markdown("""
232
- ---
233
- ### πŸ“Š Supported Categories
234
-
235
- | Category | Description |
236
- |----------|-------------|
237
- | Access Management | Login, permissions, MFA |
238
- | Backup | Backup and restore |
239
- | Database | SQL, queries, DB issues |
240
- | Email | Outlook, calendar |
241
- | General Inquiry | How-to questions |
242
- | Hardware | Devices, laptops |
243
- | Network | WiFi, VPN, internet |
244
- | Other | Miscellaneous |
245
- | Printing | Printers, scanning |
246
- | Security | Threats, incidents |
247
- | Software | Applications |
248
- | Storage | OneDrive, SharePoint |
249
-
250
- ---
251
- **Model**: DistilBERT fine-tuned on 5,760 IT support tickets
252
- """)
253
-
254
-
255
- if __name__ == "__main__":
256
- demo.launch()
 
 
1
+ """
2
+ IT Ticket Classifier - HuggingFace Spaces App
3
+ Gradio interface for classifying IT support tickets
4
+ """
5
+
6
+ import gradio as gr
7
+ import torch
8
+ import torch.nn as nn
9
+ from transformers import DistilBertModel, AutoTokenizer
10
+ from huggingface_hub import hf_hub_download
11
+ import re
12
+ import os
13
+ import numpy as np
14
+
15
+ # Configuration
16
+ HF_REPO_ID = "TuShar2309/ticket-classifier"
17
+ MODEL_FILENAME = "ticket_classifier.pt"
18
+
19
+ CLASS_NAMES = [
20
+ "Access Management", "Backup", "Database", "Email",
21
+ "General Inquiry", "Hardware", "Network", "Other",
22
+ "Printing", "Security", "Software", "Storage"
23
+ ]
24
+
25
+ # Category descriptions for display
26
+ CATEGORY_INFO = {
27
+ "Access Management": "πŸ” Login, permissions, MFA, account issues",
28
+ "Backup": "πŸ’Ύ Backup and restore operations",
29
+ "Database": "πŸ—„οΈ SQL, database connectivity, queries",
30
+ "Email": "πŸ“§ Outlook, calendar, mailbox issues",
31
+ "General Inquiry": "❓ How-to questions, policies",
32
+ "Hardware": "πŸ’» Laptop, monitor, keyboard, mouse",
33
+ "Network": "🌐 WiFi, VPN, internet connectivity",
34
+ "Other": "πŸ“‹ Miscellaneous requests",
35
+ "Printing": "πŸ–¨οΈ Printers, scanning, print queue",
36
+ "Security": "πŸ”’ Threats, malware, security incidents",
37
+ "Software": "πŸ“¦ Application issues, installations",
38
+ "Storage": "πŸ“ OneDrive, SharePoint, file storage"
39
+ }
40
+
41
+
42
+ class TicketPreprocessor:
43
+ def __init__(self):
44
+ self._email = re.compile(r'\b[\w.-]+@[\w.-]+\.\w+\b')
45
+
46
+ def clean(self, text):
47
+ return ' '.join(self._email.sub('[EMAIL]', str(text or '')).lower().split())
48
+
49
+ def combine(self, subject, description):
50
+ return f"[SUBJECT] {self.clean(subject)} [SEP] [DESCRIPTION] {self.clean(description)}"
51
+
52
+
53
+ class TicketClassifier(nn.Module):
54
+ def __init__(self, num_classes, model_name="distilbert-base-uncased", dropout=0.3):
55
+ super().__init__()
56
+ self.bert = DistilBertModel.from_pretrained(model_name)
57
+ self.classifier = nn.Sequential(
58
+ nn.Dropout(dropout),
59
+ nn.Linear(768, 256),
60
+ nn.GELU(),
61
+ nn.Dropout(dropout),
62
+ nn.Linear(256, num_classes)
63
+ )
64
+
65
+ def forward(self, input_ids, attention_mask):
66
+ outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
67
+ return self.classifier(outputs.last_hidden_state[:, 0, :])
68
+
69
+ def predict_proba(self, input_ids, attention_mask):
70
+ logits = self.forward(input_ids, attention_mask)
71
+ return torch.softmax(logits, dim=-1)
72
+
73
+
74
+ # Load model
75
+ print("Loading model...")
76
+ device = "cuda" if torch.cuda.is_available() else "cpu"
77
+ print(f"Device: {device}")
78
+
79
+ try:
80
+ model_path = hf_hub_download(repo_id=HF_REPO_ID, filename=MODEL_FILENAME)
81
+ print(f"Model downloaded: {model_path}")
82
+
83
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
84
+ model = TicketClassifier(num_classes=len(CLASS_NAMES))
85
+
86
+ checkpoint = torch.load(model_path, map_location=device)
87
+ if 'model_state_dict' in checkpoint:
88
+ model.load_state_dict(checkpoint['model_state_dict'])
89
+ else:
90
+ model.load_state_dict(checkpoint)
91
+
92
+ model.to(device)
93
+ model.eval()
94
+ MODEL_LOADED = True
95
+ print("Model loaded successfully!")
96
+ except Exception as e:
97
+ print(f"Error loading model: {e}")
98
+ MODEL_LOADED = False
99
+
100
+ preprocessor = TicketPreprocessor()
101
+
102
+
103
+ def classify_ticket(subject, description):
104
+ """Classify a ticket and return results."""
105
+ if not subject and not description:
106
+ return "⚠️ Please enter a subject or description", "", ""
107
+
108
+ if not MODEL_LOADED:
109
+ return "❌ Model not loaded", "", ""
110
+
111
+ try:
112
+ # Preprocess and tokenize
113
+ combined = preprocessor.combine(subject, description)
114
+ inputs = tokenizer(
115
+ combined,
116
+ return_tensors="pt",
117
+ truncation=True,
118
+ max_length=256,
119
+ padding='max_length'
120
+ ).to(device)
121
+
122
+ # Predict
123
+ with torch.no_grad():
124
+ probs = model.predict_proba(inputs['input_ids'], inputs['attention_mask'])[0]
125
+
126
+ probs_np = probs.cpu().numpy()
127
+ top_indices = probs_np.argsort()[::-1]
128
+
129
+ # Primary prediction
130
+ primary_idx = top_indices[0]
131
+ primary_cat = CLASS_NAMES[primary_idx]
132
+ primary_conf = probs_np[primary_idx] * 100
133
+
134
+ # Status
135
+ if primary_conf >= 80:
136
+ status = "βœ… **High Confidence** - Auto-route recommended"
137
+ elif primary_conf >= 60:
138
+ status = "⚠️ **Medium Confidence** - Review suggested"
139
+ else:
140
+ status = "πŸ” **Low Confidence** - Human review required"
141
+
142
+ # Format primary result
143
+ primary_result = f"""
144
+ ## {CATEGORY_INFO.get(primary_cat, primary_cat)}
145
+
146
+ ### Predicted Category: **{primary_cat}**
147
+ ### Confidence: **{primary_conf:.1f}%**
148
+
149
+ {status}
150
+ """
151
+
152
+ # Format alternatives
153
+ alternatives = "### Other Possibilities:\n\n"
154
+ for i in range(1, min(4, len(top_indices))):
155
+ idx = top_indices[i]
156
+ cat = CLASS_NAMES[idx]
157
+ conf = probs_np[idx] * 100
158
+ alternatives += f"- **{cat}**: {conf:.1f}%\n"
159
+
160
+ # Confidence bar
161
+ conf_display = f"{'β–ˆ' * int(primary_conf / 5)}{'β–‘' * (20 - int(primary_conf / 5))} {primary_conf:.1f}%"
162
+
163
+ return primary_result, alternatives, conf_display
164
+
165
+ except Exception as e:
166
+ return f"❌ Error: {str(e)}", "", ""
167
+
168
+
169
+ # Example tickets
170
+ examples = [
171
+ ["VPN not connecting", "Cannot connect to corporate VPN from home, getting timeout error"],
172
+ ["Suspicious email received", "Got an email asking for my password, looks like phishing"],
173
+ ["Need SharePoint access", "Just joined the marketing team, need access to the team SharePoint"],
174
+ ["Laptop screen flickering", "My laptop screen has been flickering intermittently since yesterday"],
175
+ ["Outlook not receiving emails", "Haven't received any emails in Outlook for the past 3 hours"],
176
+ ["How to reset password", "What is the process to reset my Active Directory password?"],
177
+ ["Printer not working", "Print jobs stuck in queue and won't print"],
178
+ ["SQL query slow", "Database query that used to take 2 seconds now takes 10 minutes"],
179
+ ]
180
+
181
+
182
+ # Create Gradio interface
183
+ with gr.Blocks(
184
+ title="IT Ticket Classifier",
185
+ theme=gr.themes.Soft(primary_hue="green", secondary_hue="blue"),
186
+ css="""
187
+ .gradio-container { max-width: 900px !important; }
188
+ .primary-result { font-size: 1.2em; }
189
+ """
190
+ ) as demo:
191
+ gr.Markdown("""
192
+ # 🎫 IT Service Desk Ticket Classifier
193
+
194
+ **Powered by DistilBERT** | Classifies tickets into 12 IT support categories
195
+
196
+ Enter a ticket subject and description below to get the predicted category.
197
+ """)
198
+
199
+ with gr.Row():
200
+ with gr.Column(scale=1):
201
+ subject_input = gr.Textbox(
202
+ label="πŸ“‹ Ticket Subject",
203
+ placeholder="e.g., VPN not connecting",
204
+ lines=1
205
+ )
206
+ description_input = gr.Textbox(
207
+ label="πŸ“ Ticket Description",
208
+ placeholder="e.g., Cannot connect to corporate VPN from home, getting timeout error after 30 seconds...",
209
+ lines=4
210
+ )
211
+ classify_btn = gr.Button("πŸ” Classify Ticket", variant="primary", size="lg")
212
+
213
+ with gr.Column(scale=1):
214
+ primary_output = gr.Markdown(label="Primary Prediction")
215
+ confidence_output = gr.Textbox(label="Confidence", interactive=False)
216
+ alternatives_output = gr.Markdown(label="Alternatives")
217
+
218
+ classify_btn.click(
219
+ fn=classify_ticket,
220
+ inputs=[subject_input, description_input],
221
+ outputs=[primary_output, alternatives_output, confidence_output]
222
+ )
223
+
224
+ gr.Examples(
225
+ examples=examples,
226
+ inputs=[subject_input, description_input],
227
+ outputs=[primary_output, alternatives_output, confidence_output],
228
+ fn=classify_ticket,
229
+ cache_examples=False
230
+ )
231
+
232
+ gr.Markdown("""
233
+ ---
234
+ ### πŸ“Š Supported Categories
235
+
236
+ | Category | Description |
237
+ |----------|-------------|
238
+ | Access Management | Login, permissions, MFA |
239
+ | Backup | Backup and restore |
240
+ | Database | SQL, queries, DB issues |
241
+ | Email | Outlook, calendar |
242
+ | General Inquiry | How-to questions |
243
+ | Hardware | Devices, laptops |
244
+ | Network | WiFi, VPN, internet |
245
+ | Other | Miscellaneous |
246
+ | Printing | Printers, scanning |
247
+ | Security | Threats, incidents |
248
+ | Software | Applications |
249
+ | Storage | OneDrive, SharePoint |
250
+
251
+ ---
252
+ **Model**: DistilBERT fine-tuned on 5,760 IT support tickets
253
+ """)
254
+
255
+
256
+ if __name__ == "__main__":
257
+ demo.launch()