Spaces:

point9
/

voting-ensemble

Runtime error

App Files Files Community

namanpenguin commited on Jun 16, 2025

Commit

5e6ef00

verified ·

1 Parent(s): b44c3a2

Upload 10 files

Browse files

Files changed (10) hide show

models/__pycache__/bert_model.cpython-311.pyc +0 -0
models/__pycache__/deberta_model.cpython-311.pyc +0 -0
models/__pycache__/parallel_bert_deberta.cpython-311.pyc +0 -0
models/__pycache__/roberta_model.cpython-311.pyc +0 -0
models/__pycache__/text_and_metadata_model.cpython-311.pyc +0 -0
models/bert_model.py +59 -0
models/deberta_model.py +55 -0
models/parallel_bert_deberta.py +119 -0
models/roberta_model.py +56 -0
models/text_and_metadata_model.py +72 -0

models/__pycache__/bert_model.cpython-311.pyc ADDED Viewed

Binary file (3.29 kB). View file

models/__pycache__/deberta_model.cpython-311.pyc ADDED Viewed

Binary file (3.15 kB). View file

models/__pycache__/parallel_bert_deberta.cpython-311.pyc ADDED Viewed

Binary file (6.45 kB). View file

models/__pycache__/roberta_model.cpython-311.pyc ADDED Viewed

Binary file (3.18 kB). View file

models/__pycache__/text_and_metadata_model.cpython-311.pyc ADDED Viewed

Binary file (4.09 kB). View file

models/bert_model.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# models/bert_model.py
+import torch
+import torch.nn as nn
+from transformers import BertModel
+from config import DROPOUT_RATE, BERT_MODEL_NAME # Import BERT_MODEL_NAME from config
+class BertMultiOutputModel(nn.Module):
+    """
+    BERT-based model for multi-output classification.
+    It uses a pre-trained BERT model as its backbone and adds a dropout layer
+    followed by separate linear classification heads for each target label.
+    """
+    # Statically set tokenizer name for easy access in main.py
+    tokenizer_name = BERT_MODEL_NAME
+    def __init__(self, num_labels):
+        """
+        Initializes the BertMultiOutputModel.
+        Args:
+            num_labels (list): A list where each element is the number of classes
+                                for a corresponding label column.
+        """
+        super(BertMultiOutputModel, self).__init__()
+        # Load the pre-trained BERT model.
+        # BertModel provides contextual embeddings and a pooled output for classification.
+        self.bert = BertModel.from_pretrained(BERT_MODEL_NAME)
+        self.dropout = nn.Dropout(DROPOUT_RATE) # Dropout layer for regularization
+        # Create a list of classification heads, one for each label column.
+        # Each head is a linear layer mapping BERT's pooled output size to the number of classes for that label.
+        self.classifiers = nn.ModuleList([
+            nn.Linear(self.bert.config.hidden_size, n_classes) for n_classes in num_labels
+        ])
+    def forward(self, input_ids, attention_mask):
+        """
+        Performs the forward pass of the model.
+        Args:
+            input_ids (torch.Tensor): Tensor of token IDs (from tokenizer).
+            attention_mask (torch.Tensor): Tensor indicating attention (from tokenizer).
+        Returns:
+            list: A list of logit tensors, one for each classification head.
+                  Each tensor has shape (batch_size, num_classes_for_that_label).
+        """
+        # Pass input_ids and attention_mask through BERT.
+        # .pooler_output typically represents the hidden state of the [CLS] token,
+        # processed through a linear layer and tanh activation, often used for classification.
+        pooled_output = self.bert(input_ids=input_ids, attention_mask=attention_mask).pooler_output
+        # Apply dropout for regularization
+        pooled_output = self.dropout(pooled_output)
+        # Pass the pooled output through each classification head.
+        # The result is a list of logits (raw scores before softmax/sigmoid) for each label.
+        return [classifier(pooled_output) for classifier in self.classifiers]

models/deberta_model.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# models/deberta_model.py
+import torch.nn as nn
+from transformers import DebertaModel
+from config import DROPOUT_RATE, DEBERTA_MODEL_NAME # Import DEBERTA_MODEL_NAME
+class DebertaMultiOutputModel(nn.Module):
+    """
+    DeBERTa-based model for multi-output classification.
+    Similar structure to the BERT model, using a pre-trained DeBERTa model
+    as the backbone for text feature extraction.
+    """
+    # Statically set tokenizer name for easy access in main.py
+    tokenizer_name = DEBERTA_MODEL_NAME
+    def __init__(self, num_labels):
+        """
+        Initializes the DebertaMultiOutputModel.
+        Args:
+            num_labels (list): A list where each element is the number of classes
+                                for a corresponding label column.
+        """
+        super(DebertaMultiOutputModel, self).__init__()
+        # Load the pre-trained DeBERTa model.
+        # DeBERTa models typically also provide a 'pooler_output' which is suitable for classification.
+        self.deberta = DebertaModel.from_pretrained(DEBERTA_MODEL_NAME)
+        self.dropout = nn.Dropout(DROPOUT_RATE) # Dropout layer for regularization
+        # Create classification heads for each label column.
+        # Each head maps DeBERTa's pooled output size to the number of classes for that label.
+        self.classifiers = nn.ModuleList([
+            nn.Linear(self.deberta.config.hidden_size, n_classes) for n_classes in num_labels
+        ])
+    def forward(self, input_ids, attention_mask):
+        """
+        Performs the forward pass of the model.
+        Args:
+            input_ids (torch.Tensor): Tensor of token IDs.
+            attention_mask (torch.Tensor): Tensor indicating attention.
+        Returns:
+            list: A list of logit tensors, one for each classification head.
+        """
+        # Pass input_ids and attention_mask through DeBERTa.
+        # .pooler_output is used here, similar to BERT.
+        pooled_output = self.deberta(input_ids=input_ids, attention_mask=attention_mask).pooler_output
+        # Apply dropout
+        pooled_output = self.dropout(pooled_output)
+        # Pass the pooled output through each classification head.
+        return [classifier(pooled_output) for classifier in self.classifiers]

models/parallel_bert_deberta.py ADDED Viewed

	@@ -0,0 +1,119 @@

+# models/parallel_bert_deberta.py
+import torch
+import torch.nn as nn
+from transformers import BertModel, DebertaModel
+from config import DROPOUT_RATE, BERT_MODEL_NAME, DEBERTA_MODEL_NAME # Import model names
+class Attention(nn.Module):
+    """
+    Simple Attention layer to compute a context vector from a sequence of hidden states.
+    It learns a single weight for each hidden state in the sequence, then uses softmax
+    to normalize these weights and compute a weighted sum of the hidden states.
+    """
+    def __init__(self, hidden_size):
+        """
+        Initializes the Attention layer.
+        Args:
+            hidden_size (int): The dimensionality of the input hidden states.
+        """
+        super(Attention, self).__init__()
+        # A linear layer to project the hidden state to a single scalar (attention score)
+        self.attn = nn.Linear(hidden_size, 1)
+    def forward(self, encoder_output):
+        """
+        Performs the forward pass of the attention mechanism.
+        Args:
+            encoder_output (torch.Tensor): Tensor of hidden states from an encoder.
+                                           Shape: (batch_size, sequence_length, hidden_size)
+        Returns:
+            torch.Tensor: The context vector, a weighted sum of the hidden states.
+                          Shape: (batch_size, hidden_size)
+        """
+        # Calculate raw attention scores
+        # self.attn(encoder_output) -> (batch_size, sequence_length, 1)
+        # .squeeze(-1) removes the last dimension, making it (batch_size, sequence_length)
+        attn_weights = torch.softmax(self.attn(encoder_output).squeeze(-1), dim=1)
+        # Compute the context vector as a weighted sum of encoder_output.
+        # attn_weights.unsqueeze(-1) adds a dimension for broadcasting: (batch_size, sequence_length, 1)
+        # This allows element-wise multiplication with encoder_output.
+        # torch.sum(..., dim=1) sums along the sequence_length dimension.
+        context_vector = torch.sum(attn_weights.unsqueeze(-1) * encoder_output, dim=1)
+        return context_vector
+class ParallelMultiOutputModel(nn.Module):
+    """
+    Hybrid model that leverages both BERT and DeBERTa in parallel.
+    It extracts features from both models, applies an attention mechanism to their outputs,
+    projects these attended features to a common dimension, concatenates them, and then
+    uses this combined representation for multi-output classification.
+    """
+    # Statically set tokenizer name to BERT's for this combined model
+    # (assuming BERT's tokenizer is compatible or primary for combined input)
+    tokenizer_name = BERT_MODEL_NAME
+    def __init__(self, num_labels):
+        """
+        Initializes the ParallelMultiOutputModel.
+        Args:
+            num_labels (list): A list where each element is the number of classes
+                                for a corresponding label column.
+        """
+        super(ParallelMultiOutputModel, self).__init__()
+        # Load pre-trained BERT and DeBERTa models
+        self.bert = BertModel.from_pretrained(BERT_MODEL_NAME)
+        self.deberta = DebertaModel.from_pretrained(DEBERTA_MODEL_NAME)
+        # Initialize attention layers for each backbone model
+        self.attn_bert = Attention(self.bert.config.hidden_size)
+        self.attn_deberta = Attention(self.deberta.config.hidden_size)
+        # Projection layers to reduce dimensionality of the context vectors
+        # before concatenation. This helps manage the combined feature size.
+        self.proj_bert = nn.Linear(self.bert.config.hidden_size, 256)
+        self.proj_deberta = nn.Linear(self.deberta.config.hidden_size, 256)
+        self.dropout = nn.Dropout(DROPOUT_RATE) # Dropout layer for regularization
+        # Define classification heads. The input feature size is the sum of
+        # the projected sizes from BERT and DeBERTa (256 + 256 = 512).
+        self.classifiers = nn.ModuleList([
+            nn.Linear(512, n_classes) for n_classes in num_labels
+        ])
+    def forward(self, input_ids, attention_mask):
+        """
+        Performs the forward pass of the parallel model.
+        Args:
+            input_ids (torch.Tensor): Tensor of token IDs.
+            attention_mask (torch.Tensor): Tensor indicating attention.
+        Returns:
+            list: A list of logit tensors, one for each classification head.
+        """
+        # Get the last hidden states (sequence of hidden states for all tokens)
+        # from both BERT and DeBERTa. These are typically used with attention.
+        bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state
+        deberta_output = self.deberta(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state
+        # Apply attention to get a single context vector from each model's output
+        context_bert = self.attn_bert(bert_output)
+        context_deberta = self.attn_deberta(deberta_output)
+        # Project the context vectors to their reduced dimensions
+        reduced_bert = self.proj_bert(context_bert)
+        reduced_deberta = self.proj_deberta(context_deberta)
+        # Concatenate the reduced feature vectors from both models
+        combined = torch.cat((reduced_bert, reduced_deberta), dim=1)
+        combined = self.dropout(combined) # Apply dropout to the combined features
+        # Pass the combined features through each classification head
+        return [classifier(combined) for classifier in self.classifiers]

models/roberta_model.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# models/roberta_model.py
+import torch
+import torch.nn as nn
+from transformers import RobertaModel
+from config import DROPOUT_RATE, ROBERTA_MODEL_NAME # Import ROBERTA_MODEL_NAME
+class RobertaMultiOutputModel(nn.Module):
+    """
+    RoBERTa-based model for multi-output classification.
+    Uses a pre-trained RoBERTa model as its backbone. RoBERTa is an optimized
+    version of BERT, often performing better.
+    """
+    # Statically set tokenizer name for easy access in main.py
+    tokenizer_name = ROBERTA_MODEL_NAME
+    def __init__(self, num_labels):
+        """
+        Initializes the RobertaMultiOutputModel.
+        Args:
+            num_labels (list): A list where each element is the number of classes
+                                for a corresponding label column.
+        """
+        super(RobertaMultiOutputModel, self).__init__()
+        # Load the pre-trained RoBERTa model.
+        # RoBERTa's pooler_output typically corresponds to the hidden state of the
+        # first token (<s>), which is often used for sequence classification.
+        self.roberta = RobertaModel.from_pretrained(ROBERTA_MODEL_NAME)
+        self.dropout = nn.Dropout(DROPOUT_RATE) # Dropout layer
+        # Create classification heads for each label column.
+        self.classifiers = nn.ModuleList([
+            nn.Linear(self.roberta.config.hidden_size, n_classes) for n_classes in num_labels
+        ])
+    def forward(self, input_ids, attention_mask):
+        """
+        Performs the forward pass of the model.
+        Args:
+            input_ids (torch.Tensor): Tensor of token IDs.
+            attention_mask (torch.Tensor): Tensor indicating attention.
+        Returns:
+            list: A list of logit tensors, one for each classification head.
+        """
+        # Pass input_ids and attention_mask through RoBERTa.
+        # .pooler_output is used for classification.
+        pooled_output = self.roberta(input_ids=input_ids, attention_mask=attention_mask).pooler_output
+        # Apply dropout
+        pooled_output = self.dropout(pooled_output)
+        # Pass the pooled output through each classification head.
+        return [classifier(pooled_output) for classifier in self.classifiers]

models/text_and_metadata_model.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# models/text_and_metadata_model.py
+import torch
+import torch.nn as nn
+from transformers import BertModel # Can be extended to RoBERTa, DeBERTa etc.
+from config import DROPOUT_RATE, BERT_MODEL_NAME # Import BERT_MODEL_NAME
+class BertWithMetadataModel(nn.Module):
+    """
+    Hybrid model that combines text features (extracted by BERT) with additional
+    numerical metadata features. The text features are processed by BERT,
+    metadata features by a simple MLP, and then their outputs are concatenated
+    before being fed into the final classification heads.
+    """
+    # Statically set tokenizer name
+    tokenizer_name = BERT_MODEL_NAME
+    def __init__(self, num_labels, metadata_dim):
+        """
+        Initializes the BertWithMetadataModel.
+        Args:
+            num_labels (list): A list where each element is the number of classes
+                                for a corresponding label column.
+            metadata_dim (int): The number of features in the numerical metadata.
+        """
+        super(BertWithMetadataModel, self).__init__()
+        # Load pre-trained BERT model for text processing
+        self.bert = BertModel.from_pretrained(BERT_MODEL_NAME)
+        self.dropout = nn.Dropout(DROPOUT_RATE) # Dropout for BERT's output
+        # MLP for processing numerical metadata features
+        self.metadata_mlp = nn.Sequential(
+            nn.Linear(metadata_dim, 128),  # First linear layer
+            nn.ReLU(),                     # Activation function
+            nn.Dropout(DROPOUT_RATE),      # Dropout for metadata features
+            nn.Linear(128, 64)             # Second linear layer
+        )
+        # Calculate the total input feature size for the classification heads.
+        # This is the sum of BERT's pooled output size and the metadata MLP's output size.
+        combined_feature_size = self.bert.config.hidden_size + 64
+        # Create classification heads, one for each label column
+        self.classifiers = nn.ModuleList([
+            nn.Linear(combined_feature_size, n_classes) for n_classes in num_labels
+        ])
+    def forward(self, input_ids, attention_mask, metadata):
+        """
+        Performs the forward pass of the hybrid model.
+        Args:
+            input_ids (torch.Tensor): Tensor of token IDs for text.
+            attention_mask (torch.Tensor): Tensor indicating attention for text.
+            metadata (torch.Tensor): Tensor of numerical metadata features.
+        Returns:
+            list: A list of logit tensors, one for each classification head.
+        """
+        # Process text input through BERT
+        bert_pooled_output = self.bert(input_ids=input_ids, attention_mask=attention_mask).pooler_output
+        bert_pooled_output = self.dropout(bert_pooled_output) # Apply dropout
+        # Process metadata through the MLP
+        metadata_output = self.metadata_mlp(metadata)
+        # Concatenate the processed text features and metadata features
+        combined_features = torch.cat((bert_pooled_output, metadata_output), dim=1)
+        # Pass the combined features through each classification head
+        return [classifier(combined_features) for classifier in self.classifiers]