File size: 5,155 Bytes
cf1b3ae 9635ecd cf1b3ae 9635ecd ec349a6 9635ecd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
---
license: mit
pipeline_tag: text-classification
tags:
- argument-detection
- stance-detection
- multi-task-learning
language:
- en
base_model:
- answerdotai/ModernBERT-large
---
This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
---
## Model Description
This is a multi-task learning (MTL) model built on top of `answerdotai/ModernBERT-large`. The model is designed to perform two distinct text classification tasks using a shared feature representation, enhanced by a Mixture-of-Experts (MoE) layer.
The model can be used for:
1. **Argumentativeness Classification:** Classifying a text as either "Argumentative" or "Non-argumentative."
2. **Stance Classification:** Classifying the relationship between two claims as "Same-side" or "Opposing-side."
## How to use
You can use this model for inference by loading it with the `transformers` library. The following code demonstrates how to make a prediction:
```python
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel
from huggingface_hub import PyTorchModelHubMixin
class MoELayer(nn.Module):
def __init__(self, input_dim, num_experts, top_k=2):
super(MoELayer, self).__init__()
self.num_experts = num_experts
self.top_k = top_k
# Define experts as independent feed-forward layers
self.experts = nn.ModuleList([nn.Sequential(
nn.Linear(input_dim, input_dim * 2),
nn.ReLU(),
nn.Linear(input_dim * 2, input_dim)
) for _ in range(num_experts)])
self.gating_network = nn.Linear(input_dim, num_experts)
def forward(self, x):
gate_logits = self.gating_network(x)
gate_probs = F.softmax(gate_logits, dim=-1)
# Get top-k experts for each input
topk_vals, topk_indices = torch.topk(gate_probs, self.top_k, dim=-1)
# Compute contributions from top-k experts
output = torch.zeros_like(x)
for i in range(self.top_k):
expert_idx = topk_indices[:, i]
expert_weight = topk_vals[:, i].unsqueeze(-1)
expert_outputs = torch.stack([self.experts[j](x[b]) for b, j in enumerate(expert_idx)], dim=0)
output += expert_weight * expert_outputs
return output
class SentenceClassificationMoeMTLModel(
nn.Module,
PyTorchModelHubMixin,
):
def __init__(self) -> None:
super(SentenceClassificationMoeMTLModel, self).__init__()
self.base_model = AutoModel.from_pretrained("answerdotai/ModernBERT-large")
self.moe_layer = MoELayer(input_dim=self.base_model.config.hidden_size, num_experts=8, top_k=2)
self.task_1_classifier = nn.Sequential(
nn.Linear(in_features=self.base_model.config.hidden_size, out_features=768, bias=False),
nn.GELU(),
nn.LayerNorm(768, eps=1e-05, elementwise_affine=True),
nn.Linear(768, 2)
)
self.task_2_classifier = nn.Sequential(
nn.Linear(in_features=self.base_model.config.hidden_size, out_features=768, bias=False),
nn.GELU(),
nn.LayerNorm(768, eps=1e-05, elementwise_affine=True),
nn.Linear(768, 2),
)
def forward(self, task, input_ids, attention_mask):
x = self.base_model(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state
cls_r = x[:, 0]
x = self.moe_layer(x[:, 0])
if task == "arg":
x = self.task_1_classifier(x)
elif task == "stance":
x = self.task_2_classifier(x)
return x, cls_r
model_name = "azza1625/argument-same-side-stance-classification"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = SentenceClassificationMoeMTLModel.from_pretrained(model_name)
model.eval()
device = "cpu"
def classify_sequence(seq, task, label_map):
enc = tokenizer(
*(seq if task == 'stance' else (seq,)),
return_tensors="pt",
truncation=True,
max_length=1024
).to(device)
with torch.no_grad():
logits, _ = model(task=task, **enc)
probs = torch.softmax(logits, dim=-1).squeeze()
pred_idx = probs.argmax().item()
confidence = probs[pred_idx].item()
return label_map[pred_idx], confidence
# Example input for task 1
text = "A fetus or embryo is not a person; therefore, abortion should not be considered murder."
label_map = {0: "Non-argumentative", 1: "Argumentative"}
label, confidence = classify_sequence(text, 'arg', label_map)
print(f"Prediction: {label} (Confidence: {confidence:.2f})")
# Example input for task 2
claim_1 = "A fetus or embryo is not a person; therefore, abortion should not be considered murder."
claim_2 = "Since death is the intention, such procedures should be considered murder."
label_map = {0: "Same-side", 1: "Opposing-side"}
label, confidence = classify_sequence([claim_1, claim_2], 'stance', label_map)
print(f"Prediction: {label} (Confidence: {confidence:.2f})") |