Upload HarmFormer
Browse files- README.md +94 -3
- config.json +13 -0
- merges.txt +0 -0
- modeling.py +104 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +15 -0
- tokenizer.json +0 -0
- tokenizer_config.json +15 -0
- vocab.json +0 -0
README.md
CHANGED
|
@@ -1,3 +1,94 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HarmFormer
|
| 2 |
+
|
| 3 |
+
HarmFormer is a finetuned `allenai/longformer-base-4096`, which was trained to detect potentially harmful content across 5 different harm categories with three dimensions (Safe, Topical, Toxic) across long text and short text scenarios:
|
| 4 |
+
- H: Hate and Violence
|
| 5 |
+
- IH: Ideological Harm
|
| 6 |
+
- SE: Sexual Harm
|
| 7 |
+
- IL: Illegal Activities
|
| 8 |
+
- SI: Self-Inflicted Harm
|
| 9 |
+
|
| 10 |
+
We create and define HarmFormer to identify and detect harmful content in text data (especially web pages), which can be used for content moderation, safety checks, and other applications where understanding the nature of text's harmfulness is crucial.
|
| 11 |
+
|
| 12 |
+
More details about HarmFormer can be found in [our paper - Towards Safer Pretraining: Analyzing and Filtering Harmful Content in Webscale datasets for Responsible LLMs](https://arxiv.org/pdf/2505.02009).
|
| 13 |
+
|
| 14 |
+
## Model Details
|
| 15 |
+
|
| 16 |
+
- **Base Model:** allenai/longformer-base-4096
|
| 17 |
+
- **Number of Classes:** 5
|
| 18 |
+
- **Risk Levels per Class:** 3
|
| 19 |
+
- **Max Sequence Length:** 1024
|
| 20 |
+
|
| 21 |
+
## Usage
|
| 22 |
+
|
| 23 |
+
```python
|
| 24 |
+
from transformers import AutoTokenizer
|
| 25 |
+
from modeling import HarmFormer
|
| 26 |
+
import torch
|
| 27 |
+
|
| 28 |
+
# Load the model and tokenizer
|
| 29 |
+
model_path = "themendu/HarmFormer"
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 31 |
+
model = HarmFormer.from_pretrained(model_path)
|
| 32 |
+
|
| 33 |
+
# Prepare input text
|
| 34 |
+
text = "Your text here"
|
| 35 |
+
inputs = tokenizer(
|
| 36 |
+
text,
|
| 37 |
+
add_special_tokens=True,
|
| 38 |
+
max_length=1024,
|
| 39 |
+
truncation=True,
|
| 40 |
+
padding='max_length',
|
| 41 |
+
return_attention_mask=True,
|
| 42 |
+
return_tensors='pt',
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Run inference
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
outputs = model(**inputs)
|
| 48 |
+
|
| 49 |
+
# Process outputs
|
| 50 |
+
logits = torch.stack(outputs, dim=0).permute(1, 0, 2)
|
| 51 |
+
probabilities = torch.softmax(logits, dim=-1)
|
| 52 |
+
predictions = [[[round(prob, 3) for prob in class_probs] for class_probs in sample] for sample in probabilities.cpu().tolist()]
|
| 53 |
+
|
| 54 |
+
print(predictions)
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
### Batch Processing
|
| 58 |
+
|
| 59 |
+
For processing multiple texts at once:
|
| 60 |
+
|
| 61 |
+
```python
|
| 62 |
+
texts = ["Text 1", "Text 2", "Text 3"]
|
| 63 |
+
inputs = tokenizer(
|
| 64 |
+
texts,
|
| 65 |
+
add_special_tokens=True,
|
| 66 |
+
max_length=1024,
|
| 67 |
+
truncation=True,
|
| 68 |
+
padding='max_length',
|
| 69 |
+
return_attention_mask=True,
|
| 70 |
+
return_tensors='pt',
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
with torch.no_grad():
|
| 74 |
+
outputs = model(**inputs)
|
| 75 |
+
|
| 76 |
+
logits = torch.stack(outputs, dim=0).permute(1, 0, 2)
|
| 77 |
+
probabilities = torch.softmax(logits, dim=-1)
|
| 78 |
+
predictions = [[[round(prob, 3) for prob in class_probs] for class_probs in sample] for sample in probabilities.cpu().tolist()]
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
## Citation
|
| 82 |
+
|
| 83 |
+
If you use this model in your research, please cite:
|
| 84 |
+
```
|
| 85 |
+
@misc{mendu2025saferpretraininganalyzingfiltering,
|
| 86 |
+
title={Towards Safer Pretraining: Analyzing and Filtering Harmful Content in Webscale datasets for Responsible LLMs},
|
| 87 |
+
author={Sai Krishna Mendu and Harish Yenala and Aditi Gulati and Shanu Kumar and Parag Agrawal},
|
| 88 |
+
year={2025},
|
| 89 |
+
eprint={2505.02009},
|
| 90 |
+
archivePrefix={arXiv},
|
| 91 |
+
primaryClass={cs.CL},
|
| 92 |
+
url={https://arxiv.org/abs/2505.02009},
|
| 93 |
+
}
|
| 94 |
+
```
|
config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name": "allenai/longformer-base-4096",
|
| 3 |
+
"num_classes": 5,
|
| 4 |
+
"num_risk_levels": 3,
|
| 5 |
+
"max_length": 1024,
|
| 6 |
+
"class_map_dict": {
|
| 7 |
+
"0": "H",
|
| 8 |
+
"1": "IH",
|
| 9 |
+
"2": "SE",
|
| 10 |
+
"3": "IL",
|
| 11 |
+
"4": "SI"
|
| 12 |
+
}
|
| 13 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
modeling.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from transformers import LongformerModel, AutoModel, LongformerTokenizerFast, AutoTokenizer, PreTrainedModel
|
| 6 |
+
|
| 7 |
+
class HarmFormer(PreTrainedModel):
|
| 8 |
+
def __init__(self, config):
|
| 9 |
+
super(HarmFormer, self).__init__(config)
|
| 10 |
+
self.num_classes = config.num_classes
|
| 11 |
+
self.num_risk_levels = config.num_risk_levels
|
| 12 |
+
|
| 13 |
+
# Base model
|
| 14 |
+
self.base_model = AutoModel.from_config(config)
|
| 15 |
+
|
| 16 |
+
# Classification heads
|
| 17 |
+
hidden_size = self.base_model.config.hidden_size
|
| 18 |
+
|
| 19 |
+
self.classifiers = nn.ModuleList([
|
| 20 |
+
nn.Sequential(
|
| 21 |
+
nn.Linear(hidden_size, 128),
|
| 22 |
+
nn.ReLU(),
|
| 23 |
+
nn.Linear(128, self.num_risk_levels)
|
| 24 |
+
)
|
| 25 |
+
for _ in range(self.num_classes)
|
| 26 |
+
])
|
| 27 |
+
|
| 28 |
+
def forward(self, input_ids=None, attention_mask=None, **kwargs):
|
| 29 |
+
outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
|
| 30 |
+
pooled_output = outputs[1] # Pooled [CLS] token output
|
| 31 |
+
|
| 32 |
+
# Apply classifiers for each task
|
| 33 |
+
logits = []
|
| 34 |
+
for classifier in self.classifiers:
|
| 35 |
+
logits.append(classifier(pooled_output))
|
| 36 |
+
|
| 37 |
+
return logits
|
| 38 |
+
|
| 39 |
+
@classmethod
|
| 40 |
+
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
|
| 41 |
+
# Load config
|
| 42 |
+
config_path = os.path.join(pretrained_model_name_or_path, "config.json")
|
| 43 |
+
if os.path.exists(config_path):
|
| 44 |
+
with open(config_path, 'r') as f:
|
| 45 |
+
model_config = json.load(f)
|
| 46 |
+
else:
|
| 47 |
+
# Try to load from HF Hub
|
| 48 |
+
from huggingface_hub import hf_hub_download
|
| 49 |
+
config_path = hf_hub_download(repo_id=pretrained_model_name_or_path, filename="config.json")
|
| 50 |
+
with open(config_path, 'r') as f:
|
| 51 |
+
model_config = json.load(f)
|
| 52 |
+
|
| 53 |
+
# Create base model config
|
| 54 |
+
from transformers import AutoConfig
|
| 55 |
+
base_model_name = model_config.get("model_name", "allenai/longformer-base-4096")
|
| 56 |
+
base_config = AutoConfig.from_pretrained(base_model_name)
|
| 57 |
+
|
| 58 |
+
# Add our custom attributes
|
| 59 |
+
base_config.num_classes = model_config.get("num_classes", 5)
|
| 60 |
+
base_config.num_risk_levels = model_config.get("num_risk_levels", 3)
|
| 61 |
+
base_config.architecture = model_config.get("architecture", "SingleFC")
|
| 62 |
+
|
| 63 |
+
# Create model
|
| 64 |
+
model = cls(base_config)
|
| 65 |
+
|
| 66 |
+
# Load weights
|
| 67 |
+
checkpoint_path = os.path.join(pretrained_model_name_or_path, "pytorch_model.bin")
|
| 68 |
+
if os.path.exists(checkpoint_path):
|
| 69 |
+
state_dict = torch.load(checkpoint_path, map_location="cpu")
|
| 70 |
+
else:
|
| 71 |
+
# Try to load from HF Hub
|
| 72 |
+
checkpoint_path = hf_hub_download(repo_id=pretrained_model_name_or_path, filename="pytorch_model.bin")
|
| 73 |
+
state_dict = torch.load(checkpoint_path, map_location="cpu")
|
| 74 |
+
|
| 75 |
+
model.load_state_dict(state_dict)
|
| 76 |
+
model.eval()
|
| 77 |
+
|
| 78 |
+
return model
|
| 79 |
+
|
| 80 |
+
def predict_batch(model, tokenizer, texts, batch_size=32):
|
| 81 |
+
device = next(model.parameters()).device
|
| 82 |
+
predictions = []
|
| 83 |
+
|
| 84 |
+
# Process in batches to avoid OOM
|
| 85 |
+
for i in range(0, len(texts), batch_size):
|
| 86 |
+
batch_texts = texts[i:i+batch_size]
|
| 87 |
+
inputs = tokenizer(
|
| 88 |
+
batch_texts,
|
| 89 |
+
add_special_tokens=True,
|
| 90 |
+
max_length=1024,
|
| 91 |
+
truncation=True,
|
| 92 |
+
padding='max_length',
|
| 93 |
+
return_attention_mask=True,
|
| 94 |
+
return_tensors='pt',
|
| 95 |
+
).to(device)
|
| 96 |
+
|
| 97 |
+
with torch.no_grad():
|
| 98 |
+
outputs = model(**inputs)
|
| 99 |
+
logits = torch.stack(outputs, dim=0).permute(1, 0, 2) # (batch_size, num_classes, num_risk_levels)
|
| 100 |
+
probs = torch.softmax(logits, dim=-1)
|
| 101 |
+
batch_preds = [[[round(prob, 3) for prob in class_probs] for class_probs in sample] for sample in probs.cpu().tolist()]
|
| 102 |
+
predictions.extend(batch_preds)
|
| 103 |
+
|
| 104 |
+
return predictions
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee60722788b387b958e635b628de34ebe5ebf9eb941dda2796ec045b2f84db24
|
| 3 |
+
size 596729622
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"cls_token": "<s>",
|
| 4 |
+
"eos_token": "</s>",
|
| 5 |
+
"mask_token": {
|
| 6 |
+
"content": "<mask>",
|
| 7 |
+
"lstrip": true,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"pad_token": "<pad>",
|
| 13 |
+
"sep_token": "</s>",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"bos_token": "<s>",
|
| 4 |
+
"clean_up_tokenization_spaces": true,
|
| 5 |
+
"cls_token": "<s>",
|
| 6 |
+
"eos_token": "</s>",
|
| 7 |
+
"errors": "replace",
|
| 8 |
+
"mask_token": "<mask>",
|
| 9 |
+
"model_max_length": 4096,
|
| 10 |
+
"pad_token": "<pad>",
|
| 11 |
+
"sep_token": "</s>",
|
| 12 |
+
"tokenizer_class": "LongformerTokenizer",
|
| 13 |
+
"trim_offsets": true,
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|