|
|
--- |
|
|
license: mit |
|
|
pipeline_tag: text-classification |
|
|
tags: |
|
|
- model_hub_mixin |
|
|
- pytorch_model_hub_mixin |
|
|
base_model: |
|
|
- cross-encoder/stsb-roberta-base |
|
|
--- |
|
|
|
|
|
This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration: |
|
|
|
|
|
For full documentation of this model, please see the official [model card](https://huggingface.co/govtech/stsb-roberta-base-off-topic). They are the ones who built the model. |
|
|
|
|
|
Mozilla AI has made it so you can call the `govtech/stsb-roberta-base-off-topic` using `from_pretrained`. To do this, you'll need to first pull the `CrossEncoderWithMLP` model |
|
|
architectuer from their model card and make sure to add `PyTorchModelHubMixin` as an inherited class. See this [article](https://huggingface.co/docs/hub/en/models-uploading#upload-a-pytorch-model-using-huggingfacehub) |
|
|
|
|
|
Then, you can do the following: |
|
|
|
|
|
```python |
|
|
from transformers import AutoModel, AutoTokenizer |
|
|
from huggingface_hub import PyTorchModelHubMixin |
|
|
import torch.nn as nn |
|
|
|
|
|
class CrossEncoderWithMLP(nn.Module, PyTorchModelHubMixin): |
|
|
def __init__(self, base_model, num_labels=2): |
|
|
super(CrossEncoderWithMLP, self).__init__() |
|
|
|
|
|
# Existing cross-encoder model |
|
|
self.base_model = base_model |
|
|
# Hidden size of the base model |
|
|
hidden_size = base_model.config.hidden_size |
|
|
# MLP layers after combining the cross-encoders |
|
|
self.mlp = nn.Sequential( |
|
|
nn.Linear(hidden_size, hidden_size // 2), # Input: a single sentence |
|
|
nn.ReLU(), |
|
|
nn.Linear(hidden_size // 2, hidden_size // 4), # Reduce the size of the layer |
|
|
nn.ReLU() |
|
|
) |
|
|
# Classifier head |
|
|
self.classifier = nn.Linear(hidden_size // 4, num_labels) |
|
|
|
|
|
def forward(self, input_ids, attention_mask): |
|
|
# Encode the pair of sentences in one pass |
|
|
outputs = self.base_model(input_ids, attention_mask) |
|
|
pooled_output = outputs.pooler_output |
|
|
# Pass the pooled output through mlp layers |
|
|
mlp_output = self.mlp(pooled_output) |
|
|
# Pass the final MLP output through the classifier |
|
|
logits = self.classifier(mlp_output) |
|
|
return logits |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("cross-encoder/stsb-roberta-base") |
|
|
base_model = AutoModel.from_pretrained("cross-encoder/stsb-roberta-base") |
|
|
off_topic = CrossEncoderWithMLP.from_pretrained("mozilla-ai/stsb-roberta-base-off-topic", base_model=base_model) |
|
|
|
|
|
# Then you can build a predict function that utilizes the tokenizer |
|
|
|
|
|
def predict(model, tokenizer, sentence1, sentence2): |
|
|
encoding = tokenizer( |
|
|
sentence1, |
|
|
sentence2, |
|
|
return_tensors="pt", |
|
|
truncation=True, |
|
|
padding="max_length", |
|
|
max_length=max_length, |
|
|
return_token_type_ids=False |
|
|
) |
|
|
input_ids = encoding["input_ids"].to(device) |
|
|
attention_mask = encoding["attention_mask"].to(device) |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model(input_ids=input_ids, attention_mask=attention_mask) |
|
|
probabilities = torch.softmax(outputs, dim=1) |
|
|
predicted_label = torch.argmax(probabilities, dim=1).item() |
|
|
|
|
|
return predicted_label, probabilities.cpu().numpy() |
|
|
``` |