tdunlap607 commited on
Commit
99d9876
·
1 Parent(s): 5675cf0

Fold 0 Epoch 8 Initial Push

Browse files
Files changed (3) hide show
  1. config.json +34 -0
  2. custom_models.py +124 -0
  3. pytorch_model.bin +3 -0
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/codebert-base",
3
+ "architectures": [
4
+ "CustomModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "auto_map": {
8
+ "AutoModel": "custom_models.CustomModel"
9
+ },
10
+ "bos_token_id": 0,
11
+ "classifier_dropout": null,
12
+ "eos_token_id": 2,
13
+ "hidden_act": "gelu",
14
+ "hidden_dropout_prob": 0.1,
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "layer_norm_eps": 1e-05,
19
+ "max_position_embeddings": 514,
20
+ "model_type": "roberta",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 12,
23
+ "output_hidden_states": true,
24
+ "output_past": true,
25
+ "pad_token_id": 1,
26
+ "position_embedding_type": "absolute",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.22.2",
29
+ "type_vocab_size": 1,
30
+ "use_cache": true,
31
+ "vocab_size": 50265,
32
+ "model_name": "microsoft/codebert-base",
33
+ "gradient_checkpointing": false
34
+ }
custom_models.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from transformers import AutoModel, AutoConfig
4
+ from transformers.modeling_utils import PreTrainedModel
5
+ from transformers import PretrainedConfig
6
+
7
+
8
+ class CustomConfig(PretrainedConfig):
9
+ model_type = "roberta"
10
+
11
+ def __init__(
12
+ self,
13
+ num_classes: int = 10,
14
+ **kwargs,
15
+ ):
16
+ self.num_classes = num_classes
17
+ super().__init__(**kwargs)
18
+
19
+
20
+ # ====================================================
21
+ # Model
22
+ # ====================================================
23
+ # class MeanPooling(nn.Module):
24
+ class MeanPooling(PreTrainedModel):
25
+ def __init__(
26
+ self,
27
+ config
28
+ # **kwargs,
29
+ ):
30
+ super(MeanPooling, self).__init__(config)
31
+
32
+ def forward(self, last_hidden_state, attention_mask):
33
+ input_mask_expanded = (
34
+ attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
35
+ )
36
+ sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
37
+ sum_mask = input_mask_expanded.sum(1)
38
+ sum_mask = torch.clamp(sum_mask, min=1e-9)
39
+ mean_embeddings = sum_embeddings / sum_mask
40
+ return mean_embeddings
41
+
42
+
43
+ # class CustomModel(nn.Module):
44
+ class CustomModel(PreTrainedModel):
45
+ config_class = CustomConfig
46
+
47
+ def __init__(
48
+ self,
49
+ cfg,
50
+ num_labels=10,
51
+ config_path=None,
52
+ pretrained=True,
53
+ binary_classification=False,
54
+ **kwargs,
55
+ ):
56
+ # super().__init__()
57
+ self.cfg = cfg
58
+ self.num_labels = num_labels
59
+ if config_path is None:
60
+ self.config = AutoConfig.from_pretrained(
61
+ self.cfg.model_name, output_hidden_states=True
62
+ )
63
+ else:
64
+ self.config = torch.load(config_path)
65
+
66
+ super().__init__(self.config)
67
+
68
+ if pretrained:
69
+ self.model = AutoModel.from_pretrained(
70
+ self.cfg.model_name, config=self.config
71
+ )
72
+ else:
73
+ self.model = AutoModel(self.config)
74
+
75
+ if self.cfg.gradient_checkpointing:
76
+ self.model.gradient_checkpointing_enable()
77
+
78
+ self.pool = MeanPooling(config=self.config)
79
+
80
+ self.binary_classification = binary_classification
81
+
82
+ if self.binary_classification:
83
+ # for binary classification we only want to output a single value
84
+ self.fc = nn.Linear(self.config.hidden_size, self.num_labels - 1)
85
+ else:
86
+ self.fc = nn.Linear(self.config.hidden_size, self.num_labels)
87
+
88
+ self._init_weights(self.fc)
89
+
90
+ self.sigmoid_fn = nn.Sigmoid()
91
+
92
+ def _init_weights(self, module):
93
+ if isinstance(module, nn.Linear):
94
+ module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
95
+ if module.bias is not None:
96
+ module.bias.data.zero_()
97
+ elif isinstance(module, nn.Embedding):
98
+ module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
99
+ if module.padding_idx is not None:
100
+ module.weight.data[module.padding_idx].zero_()
101
+ elif isinstance(module, nn.LayerNorm):
102
+ module.bias.data.zero_()
103
+ module.weight.data.fill_(1.0)
104
+
105
+ def feature(self, input_ids, attention_mask, token_type_ids):
106
+ outputs = self.model(
107
+ input_ids=input_ids,
108
+ attention_mask=attention_mask,
109
+ token_type_ids=token_type_ids,
110
+ )
111
+ last_hidden_states = outputs[0]
112
+ feature = self.pool(last_hidden_states, attention_mask)
113
+ return feature
114
+
115
+ def forward(self, input_ids, attention_mask, token_type_ids):
116
+ feature = self.feature(input_ids, attention_mask, token_type_ids)
117
+ output = self.fc(feature)
118
+ if self.binary_classification:
119
+ # for binary classification we have to use Sigmoid Function
120
+ # https://towardsdatascience.com/sigmoid-and-softmax-functions-in-5-minutes-f516c80ea1f9
121
+ # https://towardsdatascience.com/bert-to-the-rescue-17671379687f
122
+ output = self.sigmoid_fn(output)
123
+
124
+ return output
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:483735d4723221ec96c788dee0489f2fed25c889691befd65864344379728a89
3
+ size 498686261