yangwang825 commited on
Commit
a675ded
·
1 Parent(s): f84b875

Upload BertForSequenceClassification

Browse files
Files changed (3) hide show
  1. config.json +6 -1
  2. modeling_bert.py +4 -145
  3. pytorch_model.bin +1 -1
config.json CHANGED
@@ -1,8 +1,12 @@
1
  {
2
  "affine": true,
 
 
 
3
  "attention_probs_dropout_prob": 0.1,
4
  "auto_map": {
5
- "AutoConfig": "configuration_bert.BertConfig"
 
6
  },
7
  "classifier_dropout": null,
8
  "hidden_act": "gelu",
@@ -17,6 +21,7 @@
17
  "num_hidden_layers": 12,
18
  "pad_token_id": 0,
19
  "position_embedding_type": "absolute",
 
20
  "transformers_version": "4.33.3",
21
  "type_vocab_size": 2,
22
  "use_cache": true,
 
1
  {
2
  "affine": true,
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "auto_map": {
8
+ "AutoConfig": "configuration_bert.BertConfig",
9
+ "AutoModelForSequenceClassification": "modeling_bert.BertForSequenceClassification"
10
  },
11
  "classifier_dropout": null,
12
  "hidden_act": "gelu",
 
21
  "num_hidden_layers": 12,
22
  "pad_token_id": 0,
23
  "position_embedding_type": "absolute",
24
+ "torch_dtype": "float32",
25
  "transformers_version": "4.33.3",
26
  "type_vocab_size": 2,
27
  "use_cache": true,
modeling_bert.py CHANGED
@@ -1,7 +1,5 @@
1
  import torch
2
  import torch.nn as nn
3
- import torch.nn.functional as F
4
- from collections import OrderedDict
5
  from typing import Optional, List, Union, Tuple
6
  from transformers import (
7
  PretrainedConfig,
@@ -48,163 +46,24 @@ class BertPreTrainedModel(PreTrainedModel):
48
  module.weight.data.fill_(1.0)
49
 
50
 
51
- class PFSA(nn.Module):
52
- """
53
- https://openreview.net/pdf?id=isodM5jTA7h
54
- """
55
- def __init__(self, input_dim, alpha=1):
56
- super(PFSA, self).__init__()
57
- self.input_dim = input_dim
58
- self.alpha = alpha
59
-
60
- def forward(self, x, mask=None):
61
- """
62
- x: [B, T, F]
63
- """
64
- x = x.transpose(1, 2)[..., None]
65
- k = torch.mean(x, dim=[-1, -2], keepdim=True)
66
- kd = torch.sqrt((k - k.mean(dim=1, keepdim=True)).pow(2).sum(dim=1, keepdim=True)) # [B, 1, 1, 1]
67
- qd = torch.sqrt((x - x.mean(dim=1, keepdim=True)).pow(2).sum(dim=1, keepdim=True)) # [B, 1, T, 1]
68
- C_qk = (((x - x.mean(dim=1, keepdim=True)) * (k - k.mean(dim=1, keepdim=True))).sum(dim=1, keepdim=True)) / (qd * kd)
69
- A = (1 - torch.sigmoid(C_qk)) ** self.alpha
70
- out = x * A
71
- out = out.squeeze(dim=-1).transpose(1, 2)
72
- return out
73
-
74
-
75
- class PURE(nn.Module):
76
-
77
- def __init__(
78
- self,
79
- in_dim,
80
- q=5,
81
- r=1,
82
- center=False,
83
- num_iters=1,
84
- return_mean=True,
85
- return_std=True,
86
- normalize=False,
87
- do_pcr=True,
88
- do_pfsa=True,
89
- alpha=1,
90
- *args, **kwargs
91
- ):
92
- super().__init__()
93
- self.in_dim = in_dim
94
- self.target_rank = q
95
- self.num_pc_to_remove = r
96
- self.center = center
97
- self.num_iters = num_iters
98
- self.return_mean = return_mean
99
- self.return_std = return_std
100
- self.normalize = normalize
101
- self.do_pcr = do_pcr
102
- self.do_pfsa = do_pfsa
103
- # self.attention = SelfAttention(in_dim)
104
- self.attention = PFSA(in_dim, alpha=alpha)
105
- self.eps = 1e-5
106
-
107
- if self.normalize:
108
- self.norm = nn.Sequential(OrderedDict([
109
- ('relu', nn.LeakyReLU(inplace=True)),
110
- ('bn', nn.BatchNorm1d(in_dim)),
111
- ]))
112
-
113
- def get_out_dim(self):
114
- if self.return_mean and self.return_std:
115
- self.out_dim = self.in_dim * 2
116
- else:
117
- self.out_dim = self.in_dim
118
- return self.out_dim
119
-
120
- def _compute_pc(self, x):
121
- """
122
- x: (B, T, F)
123
- """
124
- _, _, V = torch.pca_lowrank(x, q=self.target_rank, center=self.center, niter=self.num_iters)
125
- pc = V.transpose(1, 2)[:, :self.num_pc_to_remove, :] # pc: [B, K, F]
126
- return pc
127
-
128
- def forward(self, x, attention_mask=None, *args, **kwargs):
129
- """
130
- PCR -> Attention
131
- x: (B, F, T)
132
- """
133
- if self.normalize:
134
- x = self.norm(x)
135
- xt = x.transpose(1, 2)
136
- if self.do_pcr:
137
- pc = self._compute_pc(xt) # pc: [B, K, F]
138
- xx = xt - xt @ pc.transpose(1, 2) @ pc # [B, T, F] * [B, F, K] * [B, K, F] = [B, T, F]
139
- else:
140
- xx = xt
141
- if self.do_pfsa:
142
- xx = self.attention(xx, attention_mask)
143
- if self.normalize:
144
- xx = F.normalize(xx, p=2, dim=2)
145
- return xx
146
-
147
-
148
  class BertPooler(nn.Module):
149
 
150
  def __init__(self, config):
151
  super().__init__()
152
- self.pure = PURE(
153
- config.hidden_size,
154
- q=config.q,
155
- r=config.r,
156
- center=config.center,
157
- num_iters=config.num_iters,
158
- return_mean=config.return_mean,
159
- return_std=config.return_std,
160
- normalize=config.normalize,
161
- do_pcr=config.do_pcr,
162
- do_pfsa=config.do_pfsa,
163
- alpha=config.alpha
164
- )
165
  if config.affine:
166
  self.dense = nn.Linear(config.hidden_size, config.hidden_size)
167
  else:
168
  self.dense = nn.Identity()
169
  self.activation = nn.Tanh()
170
- self.eps = 1e-5
171
 
172
- def forward(self, hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
173
  # We "pool" the model by simply taking the hidden state corresponding
174
  # to the first token.
175
- hidden_states = self.pure(hidden_states.transpose(1, 2), attention_mask)
176
- mean_tensor = self.mean_pooling(hidden_states, attention_mask)
177
- pooled_output = self.dense(mean_tensor)
178
  pooled_output = self.activation(pooled_output)
179
  return pooled_output
180
 
181
- def _get_gauss_noise(self, shape_of_tensor, device="cpu"):
182
- """Returns a tensor of epsilon Gaussian noise.
183
-
184
- Arguments
185
- ---------
186
- shape_of_tensor : tensor
187
- It represents the size of tensor for generating Gaussian noise.
188
- """
189
- gnoise = torch.randn(shape_of_tensor, device=device)
190
- gnoise -= torch.min(gnoise)
191
- gnoise /= torch.max(gnoise)
192
- gnoise = self.eps * ((1 - 9) * gnoise + 9)
193
-
194
- return gnoise
195
-
196
- def add_noise(self, tensor):
197
- gnoise = self._get_gauss_noise(tensor.size(), device=tensor.device)
198
- gnoise = gnoise
199
- tensor += gnoise
200
- return tensor
201
-
202
- def mean_pooling(self, token_embeddings, attention_mask):
203
- input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
204
- mean = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
205
- # mean = self.add_noise(mean)
206
- return mean
207
-
208
 
209
  class BertModel(BertPreTrainedModel):
210
 
@@ -324,7 +183,7 @@ class BertModel(BertPreTrainedModel):
324
  return_dict=return_dict,
325
  )
326
  sequence_output = encoder_outputs[0]
327
- pooled_output = self.pooler(sequence_output, attention_mask) if self.pooler is not None else None
328
 
329
  if not return_dict:
330
  return (sequence_output, pooled_output) + encoder_outputs[1:]
 
1
  import torch
2
  import torch.nn as nn
 
 
3
  from typing import Optional, List, Union, Tuple
4
  from transformers import (
5
  PretrainedConfig,
 
46
  module.weight.data.fill_(1.0)
47
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  class BertPooler(nn.Module):
50
 
51
  def __init__(self, config):
52
  super().__init__()
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  if config.affine:
54
  self.dense = nn.Linear(config.hidden_size, config.hidden_size)
55
  else:
56
  self.dense = nn.Identity()
57
  self.activation = nn.Tanh()
 
58
 
59
+ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
60
  # We "pool" the model by simply taking the hidden state corresponding
61
  # to the first token.
62
+ first_token_tensor = hidden_states[:, 0]
63
+ pooled_output = self.dense(first_token_tensor)
 
64
  pooled_output = self.activation(pooled_output)
65
  return pooled_output
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  class BertModel(BertPreTrainedModel):
69
 
 
183
  return_dict=return_dict,
184
  )
185
  sequence_output = encoder_outputs[0]
186
+ pooled_output = self.pooler(sequence_output) if self.pooler is not None else None
187
 
188
  if not return_dict:
189
  return (sequence_output, pooled_output) + encoder_outputs[1:]
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64dd3354da4b868afe78cc83d9e51ed4ca20cab88015a22a38257b205c9eadd4
3
  size 438000689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae9bcc7a4ae5b93f43cf78aa7dea754315ca54e073d4a6b4c780bc4be2dd2406
3
  size 438000689