Commit ·
a675ded
1
Parent(s): f84b875
Upload BertForSequenceClassification
Browse files- config.json +6 -1
- modeling_bert.py +4 -145
- pytorch_model.bin +1 -1
config.json
CHANGED
|
@@ -1,8 +1,12 @@
|
|
| 1 |
{
|
| 2 |
"affine": true,
|
|
|
|
|
|
|
|
|
|
| 3 |
"attention_probs_dropout_prob": 0.1,
|
| 4 |
"auto_map": {
|
| 5 |
-
"AutoConfig": "configuration_bert.BertConfig"
|
|
|
|
| 6 |
},
|
| 7 |
"classifier_dropout": null,
|
| 8 |
"hidden_act": "gelu",
|
|
@@ -17,6 +21,7 @@
|
|
| 17 |
"num_hidden_layers": 12,
|
| 18 |
"pad_token_id": 0,
|
| 19 |
"position_embedding_type": "absolute",
|
|
|
|
| 20 |
"transformers_version": "4.33.3",
|
| 21 |
"type_vocab_size": 2,
|
| 22 |
"use_cache": true,
|
|
|
|
| 1 |
{
|
| 2 |
"affine": true,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
"attention_probs_dropout_prob": 0.1,
|
| 7 |
"auto_map": {
|
| 8 |
+
"AutoConfig": "configuration_bert.BertConfig",
|
| 9 |
+
"AutoModelForSequenceClassification": "modeling_bert.BertForSequenceClassification"
|
| 10 |
},
|
| 11 |
"classifier_dropout": null,
|
| 12 |
"hidden_act": "gelu",
|
|
|
|
| 21 |
"num_hidden_layers": 12,
|
| 22 |
"pad_token_id": 0,
|
| 23 |
"position_embedding_type": "absolute",
|
| 24 |
+
"torch_dtype": "float32",
|
| 25 |
"transformers_version": "4.33.3",
|
| 26 |
"type_vocab_size": 2,
|
| 27 |
"use_cache": true,
|
modeling_bert.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
import torch
|
| 2 |
import torch.nn as nn
|
| 3 |
-
import torch.nn.functional as F
|
| 4 |
-
from collections import OrderedDict
|
| 5 |
from typing import Optional, List, Union, Tuple
|
| 6 |
from transformers import (
|
| 7 |
PretrainedConfig,
|
|
@@ -48,163 +46,24 @@ class BertPreTrainedModel(PreTrainedModel):
|
|
| 48 |
module.weight.data.fill_(1.0)
|
| 49 |
|
| 50 |
|
| 51 |
-
class PFSA(nn.Module):
|
| 52 |
-
"""
|
| 53 |
-
https://openreview.net/pdf?id=isodM5jTA7h
|
| 54 |
-
"""
|
| 55 |
-
def __init__(self, input_dim, alpha=1):
|
| 56 |
-
super(PFSA, self).__init__()
|
| 57 |
-
self.input_dim = input_dim
|
| 58 |
-
self.alpha = alpha
|
| 59 |
-
|
| 60 |
-
def forward(self, x, mask=None):
|
| 61 |
-
"""
|
| 62 |
-
x: [B, T, F]
|
| 63 |
-
"""
|
| 64 |
-
x = x.transpose(1, 2)[..., None]
|
| 65 |
-
k = torch.mean(x, dim=[-1, -2], keepdim=True)
|
| 66 |
-
kd = torch.sqrt((k - k.mean(dim=1, keepdim=True)).pow(2).sum(dim=1, keepdim=True)) # [B, 1, 1, 1]
|
| 67 |
-
qd = torch.sqrt((x - x.mean(dim=1, keepdim=True)).pow(2).sum(dim=1, keepdim=True)) # [B, 1, T, 1]
|
| 68 |
-
C_qk = (((x - x.mean(dim=1, keepdim=True)) * (k - k.mean(dim=1, keepdim=True))).sum(dim=1, keepdim=True)) / (qd * kd)
|
| 69 |
-
A = (1 - torch.sigmoid(C_qk)) ** self.alpha
|
| 70 |
-
out = x * A
|
| 71 |
-
out = out.squeeze(dim=-1).transpose(1, 2)
|
| 72 |
-
return out
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
class PURE(nn.Module):
|
| 76 |
-
|
| 77 |
-
def __init__(
|
| 78 |
-
self,
|
| 79 |
-
in_dim,
|
| 80 |
-
q=5,
|
| 81 |
-
r=1,
|
| 82 |
-
center=False,
|
| 83 |
-
num_iters=1,
|
| 84 |
-
return_mean=True,
|
| 85 |
-
return_std=True,
|
| 86 |
-
normalize=False,
|
| 87 |
-
do_pcr=True,
|
| 88 |
-
do_pfsa=True,
|
| 89 |
-
alpha=1,
|
| 90 |
-
*args, **kwargs
|
| 91 |
-
):
|
| 92 |
-
super().__init__()
|
| 93 |
-
self.in_dim = in_dim
|
| 94 |
-
self.target_rank = q
|
| 95 |
-
self.num_pc_to_remove = r
|
| 96 |
-
self.center = center
|
| 97 |
-
self.num_iters = num_iters
|
| 98 |
-
self.return_mean = return_mean
|
| 99 |
-
self.return_std = return_std
|
| 100 |
-
self.normalize = normalize
|
| 101 |
-
self.do_pcr = do_pcr
|
| 102 |
-
self.do_pfsa = do_pfsa
|
| 103 |
-
# self.attention = SelfAttention(in_dim)
|
| 104 |
-
self.attention = PFSA(in_dim, alpha=alpha)
|
| 105 |
-
self.eps = 1e-5
|
| 106 |
-
|
| 107 |
-
if self.normalize:
|
| 108 |
-
self.norm = nn.Sequential(OrderedDict([
|
| 109 |
-
('relu', nn.LeakyReLU(inplace=True)),
|
| 110 |
-
('bn', nn.BatchNorm1d(in_dim)),
|
| 111 |
-
]))
|
| 112 |
-
|
| 113 |
-
def get_out_dim(self):
|
| 114 |
-
if self.return_mean and self.return_std:
|
| 115 |
-
self.out_dim = self.in_dim * 2
|
| 116 |
-
else:
|
| 117 |
-
self.out_dim = self.in_dim
|
| 118 |
-
return self.out_dim
|
| 119 |
-
|
| 120 |
-
def _compute_pc(self, x):
|
| 121 |
-
"""
|
| 122 |
-
x: (B, T, F)
|
| 123 |
-
"""
|
| 124 |
-
_, _, V = torch.pca_lowrank(x, q=self.target_rank, center=self.center, niter=self.num_iters)
|
| 125 |
-
pc = V.transpose(1, 2)[:, :self.num_pc_to_remove, :] # pc: [B, K, F]
|
| 126 |
-
return pc
|
| 127 |
-
|
| 128 |
-
def forward(self, x, attention_mask=None, *args, **kwargs):
|
| 129 |
-
"""
|
| 130 |
-
PCR -> Attention
|
| 131 |
-
x: (B, F, T)
|
| 132 |
-
"""
|
| 133 |
-
if self.normalize:
|
| 134 |
-
x = self.norm(x)
|
| 135 |
-
xt = x.transpose(1, 2)
|
| 136 |
-
if self.do_pcr:
|
| 137 |
-
pc = self._compute_pc(xt) # pc: [B, K, F]
|
| 138 |
-
xx = xt - xt @ pc.transpose(1, 2) @ pc # [B, T, F] * [B, F, K] * [B, K, F] = [B, T, F]
|
| 139 |
-
else:
|
| 140 |
-
xx = xt
|
| 141 |
-
if self.do_pfsa:
|
| 142 |
-
xx = self.attention(xx, attention_mask)
|
| 143 |
-
if self.normalize:
|
| 144 |
-
xx = F.normalize(xx, p=2, dim=2)
|
| 145 |
-
return xx
|
| 146 |
-
|
| 147 |
-
|
| 148 |
class BertPooler(nn.Module):
|
| 149 |
|
| 150 |
def __init__(self, config):
|
| 151 |
super().__init__()
|
| 152 |
-
self.pure = PURE(
|
| 153 |
-
config.hidden_size,
|
| 154 |
-
q=config.q,
|
| 155 |
-
r=config.r,
|
| 156 |
-
center=config.center,
|
| 157 |
-
num_iters=config.num_iters,
|
| 158 |
-
return_mean=config.return_mean,
|
| 159 |
-
return_std=config.return_std,
|
| 160 |
-
normalize=config.normalize,
|
| 161 |
-
do_pcr=config.do_pcr,
|
| 162 |
-
do_pfsa=config.do_pfsa,
|
| 163 |
-
alpha=config.alpha
|
| 164 |
-
)
|
| 165 |
if config.affine:
|
| 166 |
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
|
| 167 |
else:
|
| 168 |
self.dense = nn.Identity()
|
| 169 |
self.activation = nn.Tanh()
|
| 170 |
-
self.eps = 1e-5
|
| 171 |
|
| 172 |
-
def forward(self, hidden_states: torch.Tensor
|
| 173 |
# We "pool" the model by simply taking the hidden state corresponding
|
| 174 |
# to the first token.
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
pooled_output = self.dense(mean_tensor)
|
| 178 |
pooled_output = self.activation(pooled_output)
|
| 179 |
return pooled_output
|
| 180 |
|
| 181 |
-
def _get_gauss_noise(self, shape_of_tensor, device="cpu"):
|
| 182 |
-
"""Returns a tensor of epsilon Gaussian noise.
|
| 183 |
-
|
| 184 |
-
Arguments
|
| 185 |
-
---------
|
| 186 |
-
shape_of_tensor : tensor
|
| 187 |
-
It represents the size of tensor for generating Gaussian noise.
|
| 188 |
-
"""
|
| 189 |
-
gnoise = torch.randn(shape_of_tensor, device=device)
|
| 190 |
-
gnoise -= torch.min(gnoise)
|
| 191 |
-
gnoise /= torch.max(gnoise)
|
| 192 |
-
gnoise = self.eps * ((1 - 9) * gnoise + 9)
|
| 193 |
-
|
| 194 |
-
return gnoise
|
| 195 |
-
|
| 196 |
-
def add_noise(self, tensor):
|
| 197 |
-
gnoise = self._get_gauss_noise(tensor.size(), device=tensor.device)
|
| 198 |
-
gnoise = gnoise
|
| 199 |
-
tensor += gnoise
|
| 200 |
-
return tensor
|
| 201 |
-
|
| 202 |
-
def mean_pooling(self, token_embeddings, attention_mask):
|
| 203 |
-
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
| 204 |
-
mean = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
| 205 |
-
# mean = self.add_noise(mean)
|
| 206 |
-
return mean
|
| 207 |
-
|
| 208 |
|
| 209 |
class BertModel(BertPreTrainedModel):
|
| 210 |
|
|
@@ -324,7 +183,7 @@ class BertModel(BertPreTrainedModel):
|
|
| 324 |
return_dict=return_dict,
|
| 325 |
)
|
| 326 |
sequence_output = encoder_outputs[0]
|
| 327 |
-
pooled_output = self.pooler(sequence_output
|
| 328 |
|
| 329 |
if not return_dict:
|
| 330 |
return (sequence_output, pooled_output) + encoder_outputs[1:]
|
|
|
|
| 1 |
import torch
|
| 2 |
import torch.nn as nn
|
|
|
|
|
|
|
| 3 |
from typing import Optional, List, Union, Tuple
|
| 4 |
from transformers import (
|
| 5 |
PretrainedConfig,
|
|
|
|
| 46 |
module.weight.data.fill_(1.0)
|
| 47 |
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
class BertPooler(nn.Module):
|
| 50 |
|
| 51 |
def __init__(self, config):
|
| 52 |
super().__init__()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
if config.affine:
|
| 54 |
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
|
| 55 |
else:
|
| 56 |
self.dense = nn.Identity()
|
| 57 |
self.activation = nn.Tanh()
|
|
|
|
| 58 |
|
| 59 |
+
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
| 60 |
# We "pool" the model by simply taking the hidden state corresponding
|
| 61 |
# to the first token.
|
| 62 |
+
first_token_tensor = hidden_states[:, 0]
|
| 63 |
+
pooled_output = self.dense(first_token_tensor)
|
|
|
|
| 64 |
pooled_output = self.activation(pooled_output)
|
| 65 |
return pooled_output
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
class BertModel(BertPreTrainedModel):
|
| 69 |
|
|
|
|
| 183 |
return_dict=return_dict,
|
| 184 |
)
|
| 185 |
sequence_output = encoder_outputs[0]
|
| 186 |
+
pooled_output = self.pooler(sequence_output) if self.pooler is not None else None
|
| 187 |
|
| 188 |
if not return_dict:
|
| 189 |
return (sequence_output, pooled_output) + encoder_outputs[1:]
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 438000689
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae9bcc7a4ae5b93f43cf78aa7dea754315ca54e073d4a6b4c780bc4be2dd2406
|
| 3 |
size 438000689
|