themendu commited on
Commit
cac9916
·
verified ·
1 Parent(s): 60da4a7

Upload HarmFormer

Browse files
Files changed (9) hide show
  1. README.md +94 -3
  2. config.json +13 -0
  3. merges.txt +0 -0
  4. modeling.py +104 -0
  5. pytorch_model.bin +3 -0
  6. special_tokens_map.json +15 -0
  7. tokenizer.json +0 -0
  8. tokenizer_config.json +15 -0
  9. vocab.json +0 -0
README.md CHANGED
@@ -1,3 +1,94 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HarmFormer
2
+
3
+ HarmFormer is a finetuned `allenai/longformer-base-4096`, which was trained to detect potentially harmful content across 5 different harm categories with three dimensions (Safe, Topical, Toxic) across long text and short text scenarios:
4
+ - H: Hate and Violence
5
+ - IH: Ideological Harm
6
+ - SE: Sexual Harm
7
+ - IL: Illegal Activities
8
+ - SI: Self-Inflicted Harm
9
+
10
+ We create and define HarmFormer to identify and detect harmful content in text data (especially web pages), which can be used for content moderation, safety checks, and other applications where understanding the nature of text's harmfulness is crucial.
11
+
12
+ More details about HarmFormer can be found in [our paper - Towards Safer Pretraining: Analyzing and Filtering Harmful Content in Webscale datasets for Responsible LLMs](https://arxiv.org/pdf/2505.02009).
13
+
14
+ ## Model Details
15
+
16
+ - **Base Model:** allenai/longformer-base-4096
17
+ - **Number of Classes:** 5
18
+ - **Risk Levels per Class:** 3
19
+ - **Max Sequence Length:** 1024
20
+
21
+ ## Usage
22
+
23
+ ```python
24
+ from transformers import AutoTokenizer
25
+ from modeling import HarmFormer
26
+ import torch
27
+
28
+ # Load the model and tokenizer
29
+ model_path = "themendu/HarmFormer"
30
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
31
+ model = HarmFormer.from_pretrained(model_path)
32
+
33
+ # Prepare input text
34
+ text = "Your text here"
35
+ inputs = tokenizer(
36
+ text,
37
+ add_special_tokens=True,
38
+ max_length=1024,
39
+ truncation=True,
40
+ padding='max_length',
41
+ return_attention_mask=True,
42
+ return_tensors='pt',
43
+ )
44
+
45
+ # Run inference
46
+ with torch.no_grad():
47
+ outputs = model(**inputs)
48
+
49
+ # Process outputs
50
+ logits = torch.stack(outputs, dim=0).permute(1, 0, 2)
51
+ probabilities = torch.softmax(logits, dim=-1)
52
+ predictions = [[[round(prob, 3) for prob in class_probs] for class_probs in sample] for sample in probabilities.cpu().tolist()]
53
+
54
+ print(predictions)
55
+ ```
56
+
57
+ ### Batch Processing
58
+
59
+ For processing multiple texts at once:
60
+
61
+ ```python
62
+ texts = ["Text 1", "Text 2", "Text 3"]
63
+ inputs = tokenizer(
64
+ texts,
65
+ add_special_tokens=True,
66
+ max_length=1024,
67
+ truncation=True,
68
+ padding='max_length',
69
+ return_attention_mask=True,
70
+ return_tensors='pt',
71
+ )
72
+
73
+ with torch.no_grad():
74
+ outputs = model(**inputs)
75
+
76
+ logits = torch.stack(outputs, dim=0).permute(1, 0, 2)
77
+ probabilities = torch.softmax(logits, dim=-1)
78
+ predictions = [[[round(prob, 3) for prob in class_probs] for class_probs in sample] for sample in probabilities.cpu().tolist()]
79
+ ```
80
+
81
+ ## Citation
82
+
83
+ If you use this model in your research, please cite:
84
+ ```
85
+ @misc{mendu2025saferpretraininganalyzingfiltering,
86
+ title={Towards Safer Pretraining: Analyzing and Filtering Harmful Content in Webscale datasets for Responsible LLMs},
87
+ author={Sai Krishna Mendu and Harish Yenala and Aditi Gulati and Shanu Kumar and Parag Agrawal},
88
+ year={2025},
89
+ eprint={2505.02009},
90
+ archivePrefix={arXiv},
91
+ primaryClass={cs.CL},
92
+ url={https://arxiv.org/abs/2505.02009},
93
+ }
94
+ ```
config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "allenai/longformer-base-4096",
3
+ "num_classes": 5,
4
+ "num_risk_levels": 3,
5
+ "max_length": 1024,
6
+ "class_map_dict": {
7
+ "0": "H",
8
+ "1": "IH",
9
+ "2": "SE",
10
+ "3": "IL",
11
+ "4": "SI"
12
+ }
13
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
modeling.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import os
4
+ import json
5
+ from transformers import LongformerModel, AutoModel, LongformerTokenizerFast, AutoTokenizer, PreTrainedModel
6
+
7
+ class HarmFormer(PreTrainedModel):
8
+ def __init__(self, config):
9
+ super(HarmFormer, self).__init__(config)
10
+ self.num_classes = config.num_classes
11
+ self.num_risk_levels = config.num_risk_levels
12
+
13
+ # Base model
14
+ self.base_model = AutoModel.from_config(config)
15
+
16
+ # Classification heads
17
+ hidden_size = self.base_model.config.hidden_size
18
+
19
+ self.classifiers = nn.ModuleList([
20
+ nn.Sequential(
21
+ nn.Linear(hidden_size, 128),
22
+ nn.ReLU(),
23
+ nn.Linear(128, self.num_risk_levels)
24
+ )
25
+ for _ in range(self.num_classes)
26
+ ])
27
+
28
+ def forward(self, input_ids=None, attention_mask=None, **kwargs):
29
+ outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
30
+ pooled_output = outputs[1] # Pooled [CLS] token output
31
+
32
+ # Apply classifiers for each task
33
+ logits = []
34
+ for classifier in self.classifiers:
35
+ logits.append(classifier(pooled_output))
36
+
37
+ return logits
38
+
39
+ @classmethod
40
+ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
41
+ # Load config
42
+ config_path = os.path.join(pretrained_model_name_or_path, "config.json")
43
+ if os.path.exists(config_path):
44
+ with open(config_path, 'r') as f:
45
+ model_config = json.load(f)
46
+ else:
47
+ # Try to load from HF Hub
48
+ from huggingface_hub import hf_hub_download
49
+ config_path = hf_hub_download(repo_id=pretrained_model_name_or_path, filename="config.json")
50
+ with open(config_path, 'r') as f:
51
+ model_config = json.load(f)
52
+
53
+ # Create base model config
54
+ from transformers import AutoConfig
55
+ base_model_name = model_config.get("model_name", "allenai/longformer-base-4096")
56
+ base_config = AutoConfig.from_pretrained(base_model_name)
57
+
58
+ # Add our custom attributes
59
+ base_config.num_classes = model_config.get("num_classes", 5)
60
+ base_config.num_risk_levels = model_config.get("num_risk_levels", 3)
61
+ base_config.architecture = model_config.get("architecture", "SingleFC")
62
+
63
+ # Create model
64
+ model = cls(base_config)
65
+
66
+ # Load weights
67
+ checkpoint_path = os.path.join(pretrained_model_name_or_path, "pytorch_model.bin")
68
+ if os.path.exists(checkpoint_path):
69
+ state_dict = torch.load(checkpoint_path, map_location="cpu")
70
+ else:
71
+ # Try to load from HF Hub
72
+ checkpoint_path = hf_hub_download(repo_id=pretrained_model_name_or_path, filename="pytorch_model.bin")
73
+ state_dict = torch.load(checkpoint_path, map_location="cpu")
74
+
75
+ model.load_state_dict(state_dict)
76
+ model.eval()
77
+
78
+ return model
79
+
80
+ def predict_batch(model, tokenizer, texts, batch_size=32):
81
+ device = next(model.parameters()).device
82
+ predictions = []
83
+
84
+ # Process in batches to avoid OOM
85
+ for i in range(0, len(texts), batch_size):
86
+ batch_texts = texts[i:i+batch_size]
87
+ inputs = tokenizer(
88
+ batch_texts,
89
+ add_special_tokens=True,
90
+ max_length=1024,
91
+ truncation=True,
92
+ padding='max_length',
93
+ return_attention_mask=True,
94
+ return_tensors='pt',
95
+ ).to(device)
96
+
97
+ with torch.no_grad():
98
+ outputs = model(**inputs)
99
+ logits = torch.stack(outputs, dim=0).permute(1, 0, 2) # (batch_size, num_classes, num_risk_levels)
100
+ probs = torch.softmax(logits, dim=-1)
101
+ batch_preds = [[[round(prob, 3) for prob in class_probs] for class_probs in sample] for sample in probs.cpu().tolist()]
102
+ predictions.extend(batch_preds)
103
+
104
+ return predictions
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee60722788b387b958e635b628de34ebe5ebf9eb941dda2796ec045b2f84db24
3
+ size 596729622
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "errors": "replace",
8
+ "mask_token": "<mask>",
9
+ "model_max_length": 4096,
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "tokenizer_class": "LongformerTokenizer",
13
+ "trim_offsets": true,
14
+ "unk_token": "<unk>"
15
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff