|
|
--- |
|
|
language: en |
|
|
tags: |
|
|
- sentiment-analysis |
|
|
- it-support |
|
|
- help-desk |
|
|
- customer-feedback |
|
|
- roberta |
|
|
- regression |
|
|
license: cc-by-4.0 |
|
|
datasets: |
|
|
- custom |
|
|
metrics: |
|
|
- accuracy |
|
|
- mse |
|
|
- mae |
|
|
pipeline_tag: text-classification |
|
|
--- |
|
|
|
|
|
# IT Help Desk Agent Sentiment Analysis |
|
|
|
|
|
## Model Description |
|
|
|
|
|
This is a fine-tuned RoBERTa model specifically designed for sentiment analysis of IT help desk and technical support interactions. The model performs **regression-based sentiment analysis**, outputting continuous sentiment scores from -1 (very negative) to +1 (very positive), which are then classified into three categories: |
|
|
|
|
|
- **Negative** (score ≤ -0.33): Dissatisfied customers, complaints, frustrations |
|
|
- **Neutral** (score between -0.33 and 0.33): Informational requests, neutral feedback |
|
|
- **Positive** (score ≥ 0.33): Satisfied customers, praise, positive experiences |
|
|
|
|
|
## Intended Use |
|
|
|
|
|
This model is specifically optimized for analyzing: |
|
|
- IT support ticket feedback |
|
|
- Help desk interaction sentiment |
|
|
- Technical support chat transcripts |
|
|
- Customer service feedback in IT contexts |
|
|
- Agent performance evaluation through customer sentiment |
|
|
|
|
|
## Model Architecture |
|
|
|
|
|
- **Base Model**: RoBERTa-base |
|
|
- **Architecture**: Custom regression head with dropout layers |
|
|
- **Output**: Continuous sentiment score (-1 to +1) with discrete classification |
|
|
- **Max Sequence Length**: 512 tokens |
|
|
|
|
|
## Usage |
|
|
|
|
|
```python |
|
|
# Simple usage example for KameronB/help-desk-agent-sentiment |
|
|
import torch |
|
|
import torch.nn as nn |
|
|
from transformers import RobertaTokenizer, RobertaModel |
|
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
# Define the model architecture |
|
|
class RobertaForSentimentRegression(nn.Module): |
|
|
def __init__(self, model_name='roberta-base', dropout_rate=0.3): |
|
|
super(RobertaForSentimentRegression, self).__init__() |
|
|
self.roberta = RobertaModel.from_pretrained(model_name) |
|
|
self.dropout = nn.Dropout(dropout_rate) |
|
|
self.regression_head = nn.Sequential( |
|
|
nn.Linear(768, 256), nn.ReLU(), nn.Dropout(dropout_rate), |
|
|
nn.Linear(256, 64), nn.ReLU(), nn.Dropout(dropout_rate), |
|
|
nn.Linear(64, 1), nn.Tanh() |
|
|
) |
|
|
|
|
|
def forward(self, input_ids, attention_mask): |
|
|
outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask) |
|
|
pooled_output = self.dropout(outputs.last_hidden_state[:, 0, :]) |
|
|
return self.regression_head(pooled_output).squeeze(-1) |
|
|
|
|
|
|
|
|
# Load model and tokenizer from Hugging Face |
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
tokenizer = RobertaTokenizer.from_pretrained('KameronB/help-desk-agent-sentiment') |
|
|
model = RobertaForSentimentRegression() |
|
|
|
|
|
# Download and load model weights |
|
|
model_path = hf_hub_download(repo_id="KameronB/help-desk-agent-sentiment", |
|
|
filename="roberta_sentiment_regression.pth") |
|
|
model.load_state_dict(torch.load(model_path, map_location=device)) |
|
|
model.to(device).eval() |
|
|
|
|
|
# Prediction function |
|
|
def predict_sentiment(text): |
|
|
encoding = tokenizer( |
|
|
text, |
|
|
truncation=True, |
|
|
padding='max_length', |
|
|
max_length=512, |
|
|
return_tensors='pt' |
|
|
) |
|
|
|
|
|
with torch.no_grad(): |
|
|
score = model( |
|
|
encoding['input_ids'].to(device), |
|
|
encoding['attention_mask'].to(device) |
|
|
).item() |
|
|
|
|
|
if score <= -0.33: |
|
|
return score, 'Negative' |
|
|
elif score >= 0.33: |
|
|
return score, 'Positive' |
|
|
else: |
|
|
return score, 'Neutral' |
|
|
|
|
|
# Example usage |
|
|
examples = [ |
|
|
"The technician was amazing and fixed my issue quickly!", |
|
|
"I waited hours and the agent was unhelpful.", |
|
|
"I submitted a ticket yesterday about password reset.", |
|
|
"The technician was absolutely amazing and fixed my issue in minutes!", |
|
|
"I waited on hold for 2 hours and the agent was completely unhelpful.", |
|
|
"I called the help desk at 3 PM about my password reset.", |
|
|
"The support team is the worst I've ever dealt with - completely incompetent!", |
|
|
"Sarah was very professional and walked me through the solution step by step.", |
|
|
"My ticket was submitted yesterday and I received confirmation.", |
|
|
"This is the most frustrating experience I've ever had with tech support!", |
|
|
"The agent was okay, nothing special but got the job done.", |
|
|
"Mike is an absolute legend! Best tech support ever!", |
|
|
"I'm not sure if the issue is completely resolved yet.", |
|
|
"It is a bit rainy outside today" |
|
|
] |
|
|
|
|
|
for text in examples: |
|
|
score, sentiment = predict_sentiment(text) |
|
|
print(f"Text: {text}") |
|
|
print(f"Sentiment: {sentiment} (Score: {score:.3f})") |
|
|
|
|
|
``` |
|
|
|
|
|
|
|
|
To get the sentiment of a piece of text, use the function below. It gets the sentiment of each sentence and then returns a length-weighted average to get the final sentiment |
|
|
|
|
|
```python |
|
|
# Install and import NLTK for sentence tokenization |
|
|
import nltk |
|
|
nltk.download('punkt', quiet=True) |
|
|
from nltk.tokenize import sent_tokenize |
|
|
|
|
|
def predict_paragraph_sentiment(text): |
|
|
""" |
|
|
Predict sentiment for a paragraph by analyzing individual sentences |
|
|
and calculating a weighted average based on sentence length. |
|
|
|
|
|
Args: |
|
|
text (str): The paragraph text to analyze |
|
|
|
|
|
Returns: |
|
|
tuple: (weighted_score, overall_sentiment, sentence_details) |
|
|
""" |
|
|
# Break text into sentences using NLTK |
|
|
sentences = sent_tokenize(text) |
|
|
|
|
|
if not sentences: |
|
|
return 0.0, 'Neutral', [] |
|
|
|
|
|
# Analyze each sentence |
|
|
sentence_results = [] |
|
|
total_chars = len(text) |
|
|
|
|
|
for sentence in sentences: |
|
|
sentence = sentence.strip() |
|
|
if sentence: # Skip empty sentences |
|
|
score, sentiment = predict_sentence_sentiment(sentence) |
|
|
char_weight = len(sentence) / total_chars |
|
|
|
|
|
sentence_results.append({ |
|
|
'sentence': sentence, |
|
|
'score': score, |
|
|
'sentiment': sentiment, |
|
|
'length': len(sentence), |
|
|
'weight': char_weight |
|
|
}) |
|
|
|
|
|
# Calculate weighted average |
|
|
if not sentence_results: |
|
|
return 0.0, 'Neutral', [] |
|
|
|
|
|
weighted_score = sum(result['score'] * result['weight'] for result in sentence_results) |
|
|
|
|
|
# Determine overall sentiment |
|
|
if weighted_score <= -0.33: |
|
|
overall_sentiment = 'Negative' |
|
|
elif weighted_score >= 0.33: |
|
|
overall_sentiment = 'Positive' |
|
|
else: |
|
|
overall_sentiment = 'Neutral' |
|
|
|
|
|
return weighted_score, overall_sentiment, sentence_results |
|
|
|
|
|
# Test the paragraph sentiment function |
|
|
test_paragraph = """ |
|
|
The IT support experience was mixed today. The initial wait time was absolutely terrible - I was on hold for over an hour! |
|
|
However, once I got through to Mike, he was fantastic. He quickly diagnosed the issue with my VPN connection and walked me |
|
|
through the solution step by step. The whole resolution took about 15 minutes once we started working on it. |
|
|
While the wait was frustrating, I'm satisfied with the technical support I received. |
|
|
""" |
|
|
|
|
|
print("=== Paragraph Sentiment Analysis Example ===") |
|
|
print(f"Text: {test_paragraph.strip()}") |
|
|
print("\n" + "="*80) |
|
|
|
|
|
weighted_score, overall_sentiment, sentence_details = predict_paragraph_sentiment(test_paragraph) |
|
|
|
|
|
print(f"\nOVERALL RESULTS:") |
|
|
print(f"Weighted Score: {weighted_score:.3f}") |
|
|
print(f"Overall Sentiment: {overall_sentiment}") |
|
|
|
|
|
print(f"\nSENTENCE BREAKDOWN:") |
|
|
for i, detail in enumerate(sentence_details, 1): |
|
|
print(f"{i}. \"{detail['sentence']}\"") |
|
|
print(f" Score: {detail['score']:.3f} | Sentiment: {detail['sentiment']} | Weight: {detail['weight']:.3f}") |
|
|
print() |
|
|
|
|
|
print("="*80) |
|
|
|
|
|
``` |
|
|
|
|
|
If you want to quantize this model to save a lot of memory, you can use torchao. |
|
|
This is the config you would use if you wanted to run it on a laptop or small device |
|
|
|
|
|
```python |
|
|
from torchao.quantization import quantize_, Int8WeightOnlyConfig |
|
|
|
|
|
model.eval().to("cpu") |
|
|
|
|
|
# In-place: converts Linear layers to int8 weights |
|
|
quantize_(model, Int8WeightOnlyConfig()) |
|
|
``` |
|
|
|
|
|
|