Vivek-Sham
/

deberta-multitask-sentiment-analysis

@@ -1,144 +1,148 @@
-import torch
-import torch.nn as nn
-from transformers import DebertaV2Tokenizer , DebertaV2Model
-from typing import Dict, Any
-import joblib
-import os
-# Define the EndpointHandler class
-class EndpointHandler:
-    def __init__(self, model_path =""):
-        # Load tokenizer and model from the Hugging Face repository
-        self.tokenizer = DebertaV2Tokenizer.from_pretrained(model_path)
-        # Initialize the custom multitask DeBERTa model with pre-defined label counts
-        self.model = MultitaskDebertaModel(num_emotion_labels=8, num_polarity_labels=4, num_hate_speech_labels=2)
-        self.model = self.model.from_pretrained(model_path)
-        # Use GPU if available
-        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        self.model.to(self.device)
-        self.model.eval()
-        # Load the encoder files directly from the root of the repository
-        self.emotion_encoder = joblib.load(os.path.join(model_path, 'emotion_encoder.pkl'))
-        self.polarity_encoder = joblib.load(os.path.join(model_path, 'polarity_encoder.pkl'))
-        self.hate_speech_encoder = joblib.load(os.path.join(model_path, 'hate_speech_encoder.pkl'))
-    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
-        # Preprocess input: extract the text from the request payload
-        text = data.get('inputs')
-        # Tokenize the input text
-        inputs = self.tokenizer(text, return_tensors='pt', max_length=256, truncation=True, padding=True)
-        if 'token_type_ids' in inputs:
-            del inputs['token_type_ids']
-        inputs = {key: val.to(self.device) for key, val in inputs.items()}
-        # Run the input through the model
-        with torch.no_grad():
-            outputs = self.model(**inputs)
-            emotion_logits = outputs.get('emotion')
-            polarity_logits = outputs.get('polarity')
-            hate_speech_logits = outputs.get('hate_speech')
-        # Decode predictions from logits using argmax and the label encoders
-        emotion_preds = torch.argmax(emotion_logits, dim=1).cpu().numpy()
-        polarity_preds = torch.argmax(polarity_logits, dim=1).cpu().numpy()
-        hate_speech_preds = torch.argmax(hate_speech_logits, dim=1).cpu().numpy()
-        # Inverse transform the predictions to get human-readable labels
-        decoded_emotions = self.emotion_encoder.inverse_transform(emotion_preds)
-        decoded_polarities = self.polarity_encoder.inverse_transform(polarity_preds)
-        decoded_hate_speech = self.hate_speech_encoder.inverse_transform(hate_speech_preds)
-        # Return the decoded results as a dictionary
-        return {
-            "emotions": decoded_emotions,
-            "polarities": decoded_polarities,
-            "hate_speech": decoded_hate_speech
-        }
-# Define your custom multitask model architecture here
-class MultitaskDebertaModel(nn.Module):
-    def __init__(self, num_emotion_labels, num_polarity_labels, num_hate_speech_labels):
-        super(MultitaskDebertaModel, self).__init__()
-        self.deberta = DebertaV2Model.from_pretrained('microsoft/deberta-v3-base')
-        # Freeze the first 5 layers of DeBERTa to speed up training and inference
-        for param in self.deberta.encoder.layer[:5]:
-            for p in param.parameters():
-                p.requires_grad = False
-        # LSTM layers for each task
-        self.emotion_lstm = nn.LSTM(768, 128, bidirectional=True, batch_first=True)
-        self.polarity_lstm = nn.LSTM(768, 128, bidirectional=True, batch_first=True)
-        self.hate_speech_lstm = nn.LSTM(768, 128, bidirectional=True, batch_first=True)
-        # Attention layers for each task
-        self.emotion_attention = nn.MultiheadAttention(embed_dim=256, num_heads=8, batch_first=True)
-        self.polarity_attention = nn.MultiheadAttention(embed_dim=256, num_heads=8, batch_first=True)
-        self.hate_speech_attention = nn.MultiheadAttention(embed_dim=256, num_heads=8, batch_first=True)
-        # Dense layers for each task after attention
-        self.emotion_dense = nn.Linear(256, 128)
-        self.polarity_dense = nn.Linear(256, 128)
-        self.hate_speech_dense = nn.Linear(256, 128)
-        # Fusion layer that combines the task-specific features and the CLS token
-        self.fusion_dense = nn.Linear(128 + 128 + 128 + 768, 128)
-        # Task-specific classifiers
-        self.emotion_classifier = nn.Linear(128, num_emotion_labels)
-        self.polarity_classifier = nn.Linear(128, num_polarity_labels)
-        self.hate_speech_classifier = nn.Linear(128, num_hate_speech_labels)
-        # Regularization layers: layer normalization and dropout
-        self.layer_norm = nn.LayerNorm(128)
-        self.dropout = nn.Dropout(p=0.3)
-        self.relu = nn.ReLU()
-    def forward(self, input_ids, attention_mask):
-        # Extract DeBERTa outputs
-        deberta_outputs = self.deberta(input_ids, attention_mask=attention_mask)
-        sequence_output = deberta_outputs.last_hidden_state
-        cls_output = sequence_output[:, 0, :]  # CLS token output
-        # Task-specific LSTM outputs
-        emotion_lstm_output, _ = self.emotion_lstm(sequence_output)
-        polarity_lstm_output, _ = self.polarity_lstm(sequence_output)
-        hate_speech_lstm_output, _ = self.hate_speech_lstm(sequence_output)
-        # Task-specific attention outputs
-        emotion_attention_output, _ = self.emotion_attention(emotion_lstm_output, emotion_lstm_output, emotion_lstm_output)
-        polarity_attention_output, _ = self.polarity_attention(polarity_lstm_output, polarity_lstm_output, polarity_lstm_output)
-        hate_speech_attention_output, _ = self.hate_speech_attention(hate_speech_lstm_output, hate_speech_lstm_output, hate_speech_lstm_output)
-        # Pool the attention outputs
-        emotion_features = torch.mean(emotion_attention_output, dim=1)
-        polarity_features = torch.mean(polarity_attention_output, dim=1)
-        hate_speech_features = torch.mean(hate_speech_attention_output, dim=1)
-        # Apply dense layers after pooling
-        emotion_features = self.relu(self.emotion_dense(emotion_features))
-        polarity_features = self.relu(self.polarity_dense(polarity_features))
-        hate_speech_features = self.relu(self.hate_speech_dense(hate_speech_features))
-        # Combine all features (task-specific features + CLS token)
-        combined_features = torch.cat([emotion_features, polarity_features, hate_speech_features, cls_output], dim=-1)
-        combined_features = self.relu(self.fusion_dense(combined_features))
-        # Apply layer normalization and dropout
-        combined_features = self.layer_norm(combined_features)
-        combined_features = self.dropout(combined_features)
-        # Task-specific logits
-        emotion_logits = self.emotion_classifier(combined_features)
-        polarity_logits = self.polarity_classifier(combined_features)
-        hate_speech_logits = self.hate_speech_classifier(combined_features)
-        return {
-            'emotion': emotion_logits,
-            'polarity': polarity_logits,
-            'hate_speech': hate_speech_logits
-        }

+import torch
+import torch.nn as nn
+from transformers import DebertaV2Tokenizer , DebertaV2Model
+from typing import Dict, Any
+import joblib
+import os
+# Define the EndpointHandler class
+class EndpointHandler:
+    def __init__(self, model_path =""):
+        # Load tokenizer and model from the Hugging Face repository
+        self.tokenizer = DebertaV2Tokenizer.from_pretrained(model_path)
+        # Initialize the custom multitask DeBERTa model with pre-defined label counts
+        self.model = MultitaskDebertaModel(num_emotion_labels=8, num_polarity_labels=4, num_hate_speech_labels=2)
+        self.model.load_state_dict(torch.load(os.path.join(model_path, 'pytorch_model.bin')))
+        # Use GPU if available
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.model.to(self.device)
+        self.model.eval()
+        # Load the encoder files directly from the root of the repository
+        self.emotion_encoder = joblib.load(os.path.join(model_path, 'emotion_encoder.pkl'))
+        self.polarity_encoder = joblib.load(os.path.join(model_path, 'polarity_encoder.pkl'))
+        self.hate_speech_encoder = joblib.load(os.path.join(model_path, 'hate_speech_encoder.pkl'))
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        # Preprocess input: extract the text from the request payload
+        text = data.get('inputs')
+        # Tokenize the input text
+        inputs = self.tokenizer(text, return_tensors='pt', max_length=256, truncation=True, padding=True)
+        if 'token_type_ids' in inputs:
+            del inputs['token_type_ids']
+        inputs = {key: val.to(self.device) for key, val in inputs.items()}
+        # Run the input through the model
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            emotion_logits = outputs.get('emotion')
+            polarity_logits = outputs.get('polarity')
+            hate_speech_logits = outputs.get('hate_speech')
+        # Decode predictions from logits using argmax and the label encoders
+        emotion_preds = torch.argmax(emotion_logits, dim=1).cpu().numpy()
+        polarity_preds = torch.argmax(polarity_logits, dim=1).cpu().numpy()
+        hate_speech_preds = torch.argmax(hate_speech_logits, dim=1).cpu().numpy()
+        # Inverse transform the predictions to get human-readable labels
+        decoded_emotions = self.emotion_encoder.inverse_transform(emotion_preds)
+        decoded_polarities = self.polarity_encoder.inverse_transform(polarity_preds)
+        decoded_hate_speech = self.hate_speech_encoder.inverse_transform(hate_speech_preds)
+        # Return the decoded results as a dictionary
+        return {
+            "emotions": decoded_emotions,
+            "polarities": decoded_polarities,
+            "hate_speech": decoded_hate_speech
+        }
+# Define your custom multitask model architecture here
+class MultitaskDebertaModel(nn.Module):
+    def __init__(self, num_emotion_labels, num_polarity_labels, num_hate_speech_labels):
+        super(MultitaskDebertaModel, self).__init__()
+        self.deberta = DebertaV2Model.from_pretrained('microsoft/deberta-v3-base')
+        # Freeze the first 5 layers of DeBERTa to speed up training and inference
+        for param in self.deberta.encoder.layer[:5]:
+            for p in param.parameters():
+                p.requires_grad = False
+        # LSTM layers for each task
+        self.emotion_lstm = nn.LSTM(768, 128, bidirectional=True, batch_first=True)
+        self.polarity_lstm = nn.LSTM(768, 128, bidirectional=True, batch_first=True)
+        self.hate_speech_lstm = nn.LSTM(768, 128, bidirectional=True, batch_first=True)
+        # Attention layers for each task
+        self.emotion_attention = nn.MultiheadAttention(embed_dim=256, num_heads=8, batch_first=True)
+        self.polarity_attention = nn.MultiheadAttention(embed_dim=256, num_heads=8, batch_first=True)
+        self.hate_speech_attention = nn.MultiheadAttention(embed_dim=256, num_heads=8, batch_first=True)
+        # Dense layers for each task after attention
+        self.emotion_dense = nn.Linear(256, 128)
+        self.polarity_dense = nn.Linear(256, 128)
+        self.hate_speech_dense = nn.Linear(256, 128)
+        # Fusion layer that combines the task-specific features and the CLS token
+        self.fusion_dense = nn.Linear(128 + 128 + 128 + 768, 128)
+        # Task-specific classifiers
+        self.emotion_classifier = nn.Linear(128, num_emotion_labels)
+        self.polarity_classifier = nn.Linear(128, num_polarity_labels)
+        self.hate_speech_classifier = nn.Linear(128, num_hate_speech_labels)
+        # Regularization layers: layer normalization and dropout
+        self.layer_norm = nn.LayerNorm(128)
+        self.dropout = nn.Dropout(p=0.3)
+        self.relu = nn.ReLU()
+    def forward(self, input_ids, attention_mask):
+        # Extract DeBERTa outputs
+        deberta_outputs = self.deberta(input_ids, attention_mask=attention_mask)
+        sequence_output = deberta_outputs.last_hidden_state
+        cls_output = sequence_output[:, 0, :]  # CLS token output
+        # Task-specific LSTM outputs
+        emotion_lstm_output, _ = self.emotion_lstm(sequence_output)
+        polarity_lstm_output, _ = self.polarity_lstm(sequence_output)
+        hate_speech_lstm_output, _ = self.hate_speech_lstm(sequence_output)
+        # Task-specific attention outputs
+        emotion_attention_output, _ = self.emotion_attention(emotion_lstm_output, emotion_lstm_output, emotion_lstm_output)
+        polarity_attention_output, _ = self.polarity_attention(polarity_lstm_output, polarity_lstm_output, polarity_lstm_output)
+        hate_speech_attention_output, _ = self.hate_speech_attention(hate_speech_lstm_output, hate_speech_lstm_output, hate_speech_lstm_output)
+        # Pool the attention outputs
+        emotion_features = torch.mean(emotion_attention_output, dim=1)
+        polarity_features = torch.mean(polarity_attention_output, dim=1)
+        hate_speech_features = torch.mean(hate_speech_attention_output, dim=1)
+        # Apply dense layers after pooling
+        emotion_features = self.relu(self.emotion_dense(emotion_features))
+        polarity_features = self.relu(self.polarity_dense(polarity_features))
+        hate_speech_features = self.relu(self.hate_speech_dense(hate_speech_features))
+        # Combine all features (task-specific features + CLS token)
+        combined_features = torch.cat([emotion_features, polarity_features, hate_speech_features, cls_output], dim=-1)
+        combined_features = self.relu(self.fusion_dense(combined_features))
+        # Apply layer normalization and dropout
+        combined_features = self.layer_norm(combined_features)
+        combined_features = self.dropout(combined_features)
+        # Task-specific logits
+        emotion_logits = self.emotion_classifier(combined_features)
+        polarity_logits = self.polarity_classifier(combined_features)
+        hate_speech_logits = self.hate_speech_classifier(combined_features)
+        return {
+            'emotion': emotion_logits,
+            'polarity': polarity_logits,
+            'hate_speech': hate_speech_logits
+        }
+        def load_model(self, model_path):
+        #Load model weights from the specified path
+        self.load_state_dict(torch.load(model_path))