mramazan commited on
Commit
16f3f17
·
verified ·
1 Parent(s): 16fc1de

Delete models

Browse files
Files changed (41) hide show
  1. models/__init__.py +0 -14
  2. models/__pycache__/__init__.cpython-312.pyc +0 -0
  3. models/__pycache__/base.cpython-312.pyc +0 -0
  4. models/__pycache__/bert.cpython-312.pyc +0 -0
  5. models/__pycache__/dae.cpython-312.pyc +0 -0
  6. models/__pycache__/vae.cpython-312.pyc +0 -0
  7. models/base.py +0 -15
  8. models/bert.py +0 -19
  9. models/bert_modules/__init__.py +0 -1
  10. models/bert_modules/__pycache__/__init__.cpython-312.pyc +0 -0
  11. models/bert_modules/__pycache__/bert.cpython-312.pyc +0 -0
  12. models/bert_modules/__pycache__/transformer.cpython-312.pyc +0 -0
  13. models/bert_modules/attention/__init__.py +0 -2
  14. models/bert_modules/attention/__pycache__/__init__.cpython-312.pyc +0 -0
  15. models/bert_modules/attention/__pycache__/multi_head.cpython-312.pyc +0 -0
  16. models/bert_modules/attention/__pycache__/single.cpython-312.pyc +0 -0
  17. models/bert_modules/attention/multi_head.py +0 -37
  18. models/bert_modules/attention/single.py +0 -25
  19. models/bert_modules/bert.py +0 -44
  20. models/bert_modules/embedding/__init__.py +0 -1
  21. models/bert_modules/embedding/__pycache__/__init__.cpython-312.pyc +0 -0
  22. models/bert_modules/embedding/__pycache__/bert.cpython-312.pyc +0 -0
  23. models/bert_modules/embedding/__pycache__/position.cpython-312.pyc +0 -0
  24. models/bert_modules/embedding/__pycache__/token.cpython-312.pyc +0 -0
  25. models/bert_modules/embedding/bert.py +0 -31
  26. models/bert_modules/embedding/position.py +0 -16
  27. models/bert_modules/embedding/segment.py +0 -6
  28. models/bert_modules/embedding/token.py +0 -6
  29. models/bert_modules/transformer.py +0 -31
  30. models/bert_modules/utils/__init__.py +0 -4
  31. models/bert_modules/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  32. models/bert_modules/utils/__pycache__/feed_forward.cpython-312.pyc +0 -0
  33. models/bert_modules/utils/__pycache__/gelu.cpython-312.pyc +0 -0
  34. models/bert_modules/utils/__pycache__/layer_norm.cpython-312.pyc +0 -0
  35. models/bert_modules/utils/__pycache__/sublayer.cpython-312.pyc +0 -0
  36. models/bert_modules/utils/feed_forward.py +0 -16
  37. models/bert_modules/utils/gelu.py +0 -12
  38. models/bert_modules/utils/layer_norm.py +0 -17
  39. models/bert_modules/utils/sublayer.py +0 -18
  40. models/dae.py +0 -54
  41. models/vae.py +0 -69
models/__init__.py DELETED
@@ -1,14 +0,0 @@
1
- from .bert import BERTModel
2
- from .dae import DAEModel
3
- from .vae import VAEModel
4
-
5
- MODELS = {
6
- BERTModel.code(): BERTModel,
7
- DAEModel.code(): DAEModel,
8
- VAEModel.code(): VAEModel
9
- }
10
-
11
-
12
- def model_factory(args):
13
- model = MODELS[args.model_code]
14
- return model(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (656 Bytes)
 
models/__pycache__/base.cpython-312.pyc DELETED
Binary file (884 Bytes)
 
models/__pycache__/bert.cpython-312.pyc DELETED
Binary file (1.31 kB)
 
models/__pycache__/dae.cpython-312.pyc DELETED
Binary file (3.34 kB)
 
models/__pycache__/vae.cpython-312.pyc DELETED
Binary file (4.03 kB)
 
models/base.py DELETED
@@ -1,15 +0,0 @@
1
- import torch.nn as nn
2
-
3
- from abc import *
4
-
5
-
6
- class BaseModel(nn.Module, metaclass=ABCMeta):
7
- def __init__(self, args):
8
- super().__init__()
9
- self.args = args
10
-
11
- @classmethod
12
- @abstractmethod
13
- def code(cls):
14
- pass
15
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/bert.py DELETED
@@ -1,19 +0,0 @@
1
- from .base import BaseModel
2
- from .bert_modules.bert import BERT
3
-
4
- import torch.nn as nn
5
-
6
-
7
- class BERTModel(BaseModel):
8
- def __init__(self, args):
9
- super().__init__(args)
10
- self.bert = BERT(args)
11
- self.out = nn.Linear(self.bert.hidden, args.num_items + 1)
12
-
13
- @classmethod
14
- def code(cls):
15
- return 'bert'
16
-
17
- def forward(self, x):
18
- x = self.bert(x)
19
- return self.out(x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/bert_modules/__init__.py DELETED
@@ -1 +0,0 @@
1
-
 
 
models/bert_modules/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (178 Bytes)
 
models/bert_modules/__pycache__/bert.cpython-312.pyc DELETED
Binary file (2.37 kB)
 
models/bert_modules/__pycache__/transformer.cpython-312.pyc DELETED
Binary file (2.26 kB)
 
models/bert_modules/attention/__init__.py DELETED
@@ -1,2 +0,0 @@
1
- from .multi_head import MultiHeadedAttention
2
- from .single import Attention
 
 
 
models/bert_modules/attention/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (287 Bytes)
 
models/bert_modules/attention/__pycache__/multi_head.cpython-312.pyc DELETED
Binary file (2.44 kB)
 
models/bert_modules/attention/__pycache__/single.cpython-312.pyc DELETED
Binary file (1.31 kB)
 
models/bert_modules/attention/multi_head.py DELETED
@@ -1,37 +0,0 @@
1
- import torch.nn as nn
2
- from .single import Attention
3
-
4
-
5
- class MultiHeadedAttention(nn.Module):
6
- """
7
- Take in model size and number of heads.
8
- """
9
-
10
- def __init__(self, h, d_model, dropout=0.1):
11
- super().__init__()
12
- assert d_model % h == 0
13
-
14
- # We assume d_v always equals d_k
15
- self.d_k = d_model // h
16
- self.h = h
17
-
18
- self.linear_layers = nn.ModuleList([nn.Linear(d_model, d_model) for _ in range(3)])
19
- self.output_linear = nn.Linear(d_model, d_model)
20
- self.attention = Attention()
21
-
22
- self.dropout = nn.Dropout(p=dropout)
23
-
24
- def forward(self, query, key, value, mask=None):
25
- batch_size = query.size(0)
26
-
27
- # 1) Do all the linear projections in batch from d_model => h x d_k
28
- query, key, value = [l(x).view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
29
- for l, x in zip(self.linear_layers, (query, key, value))]
30
-
31
- # 2) Apply attention on all the projected vectors in batch.
32
- x, attn = self.attention(query, key, value, mask=mask, dropout=self.dropout)
33
-
34
- # 3) "Concat" using a view and apply a final linear.
35
- x = x.transpose(1, 2).contiguous().view(batch_size, -1, self.h * self.d_k)
36
-
37
- return self.output_linear(x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/bert_modules/attention/single.py DELETED
@@ -1,25 +0,0 @@
1
- import torch.nn as nn
2
- import torch.nn.functional as F
3
- import torch
4
-
5
- import math
6
-
7
-
8
- class Attention(nn.Module):
9
- """
10
- Compute 'Scaled Dot Product Attention
11
- """
12
-
13
- def forward(self, query, key, value, mask=None, dropout=None):
14
- scores = torch.matmul(query, key.transpose(-2, -1)) \
15
- / math.sqrt(query.size(-1))
16
-
17
- if mask is not None:
18
- scores = scores.masked_fill(mask == 0, -1e9)
19
-
20
- p_attn = F.softmax(scores, dim=-1)
21
-
22
- if dropout is not None:
23
- p_attn = dropout(p_attn)
24
-
25
- return torch.matmul(p_attn, value), p_attn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/bert_modules/bert.py DELETED
@@ -1,44 +0,0 @@
1
- from torch import nn as nn
2
-
3
- from models.bert_modules.embedding import BERTEmbedding
4
- from models.bert_modules.transformer import TransformerBlock
5
- from utils import fix_random_seed_as
6
-
7
-
8
- class BERT(nn.Module):
9
- def __init__(self, args):
10
- super().__init__()
11
-
12
- fix_random_seed_as(args.model_init_seed)
13
- # self.init_weights()
14
-
15
- max_len = args.bert_max_len
16
- num_items = args.num_items
17
- n_layers = args.bert_num_blocks
18
- heads = args.bert_num_heads
19
- vocab_size = num_items + 2
20
- hidden = args.bert_hidden_units
21
- self.hidden = hidden
22
- dropout = args.bert_dropout
23
-
24
- # embedding for BERT, sum of positional, segment, token embeddings
25
- self.embedding = BERTEmbedding(vocab_size=vocab_size, embed_size=self.hidden, max_len=max_len, dropout=dropout)
26
-
27
- # multi-layers transformer blocks, deep network
28
- self.transformer_blocks = nn.ModuleList(
29
- [TransformerBlock(hidden, heads, hidden * 4, dropout) for _ in range(n_layers)])
30
-
31
- def forward(self, x):
32
- mask = (x > 0).unsqueeze(1).repeat(1, x.size(1), 1).unsqueeze(1)
33
-
34
- # embedding the indexed sequence to sequence of vectors
35
- x = self.embedding(x)
36
-
37
- # running over multiple transformer blocks
38
- for transformer in self.transformer_blocks:
39
- x = transformer.forward(x, mask)
40
-
41
- return x
42
-
43
- def init_weights(self):
44
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/bert_modules/embedding/__init__.py DELETED
@@ -1 +0,0 @@
1
- from .bert import BERTEmbedding
 
 
models/bert_modules/embedding/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (233 Bytes)
 
models/bert_modules/embedding/__pycache__/bert.cpython-312.pyc DELETED
Binary file (1.82 kB)
 
models/bert_modules/embedding/__pycache__/position.cpython-312.pyc DELETED
Binary file (1.19 kB)
 
models/bert_modules/embedding/__pycache__/token.cpython-312.pyc DELETED
Binary file (729 Bytes)
 
models/bert_modules/embedding/bert.py DELETED
@@ -1,31 +0,0 @@
1
- import torch.nn as nn
2
- from .token import TokenEmbedding
3
- from .position import PositionalEmbedding
4
-
5
-
6
- class BERTEmbedding(nn.Module):
7
- """
8
- BERT Embedding which is consisted with under features
9
- 1. TokenEmbedding : normal embedding matrix
10
- 2. PositionalEmbedding : adding positional information using sin, cos
11
- 2. SegmentEmbedding : adding sentence segment info, (sent_A:1, sent_B:2)
12
-
13
- sum of all these features are output of BERTEmbedding
14
- """
15
-
16
- def __init__(self, vocab_size, embed_size, max_len, dropout=0.1):
17
- """
18
- :param vocab_size: total vocab size
19
- :param embed_size: embedding size of token embedding
20
- :param dropout: dropout rate
21
- """
22
- super().__init__()
23
- self.token = TokenEmbedding(vocab_size=vocab_size, embed_size=embed_size)
24
- self.position = PositionalEmbedding(max_len=max_len, d_model=embed_size)
25
- # self.segment = SegmentEmbedding(embed_size=self.token.embedding_dim)
26
- self.dropout = nn.Dropout(p=dropout)
27
- self.embed_size = embed_size
28
-
29
- def forward(self, sequence):
30
- x = self.token(sequence) # + self.position(sequence) # + self.segment(segment_label)
31
- return self.dropout(x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/bert_modules/embedding/position.py DELETED
@@ -1,16 +0,0 @@
1
- import torch.nn as nn
2
- import torch
3
- import math
4
-
5
-
6
- class PositionalEmbedding(nn.Module):
7
-
8
- def __init__(self, max_len, d_model):
9
- super().__init__()
10
-
11
- # Compute the positional encodings once in log space.
12
- self.pe = nn.Embedding(max_len, d_model)
13
-
14
- def forward(self, x):
15
- batch_size = x.size(0)
16
- return self.pe.weight.unsqueeze(0).repeat(batch_size, 1, 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/bert_modules/embedding/segment.py DELETED
@@ -1,6 +0,0 @@
1
- import torch.nn as nn
2
-
3
-
4
- class SegmentEmbedding(nn.Embedding):
5
- def __init__(self, embed_size=512):
6
- super().__init__(3, embed_size, padding_idx=0)
 
 
 
 
 
 
 
models/bert_modules/embedding/token.py DELETED
@@ -1,6 +0,0 @@
1
- import torch.nn as nn
2
-
3
-
4
- class TokenEmbedding(nn.Embedding):
5
- def __init__(self, vocab_size, embed_size=512):
6
- super().__init__(vocab_size, embed_size, padding_idx=0)
 
 
 
 
 
 
 
models/bert_modules/transformer.py DELETED
@@ -1,31 +0,0 @@
1
- import torch.nn as nn
2
-
3
- from .attention import MultiHeadedAttention
4
- from .utils import SublayerConnection, PositionwiseFeedForward
5
-
6
-
7
- class TransformerBlock(nn.Module):
8
- """
9
- Bidirectional Encoder = Transformer (self-attention)
10
- Transformer = MultiHead_Attention + Feed_Forward with sublayer connection
11
- """
12
-
13
- def __init__(self, hidden, attn_heads, feed_forward_hidden, dropout):
14
- """
15
- :param hidden: hidden size of transformer
16
- :param attn_heads: head sizes of multi-head attention
17
- :param feed_forward_hidden: feed_forward_hidden, usually 4*hidden_size
18
- :param dropout: dropout rate
19
- """
20
-
21
- super().__init__()
22
- self.attention = MultiHeadedAttention(h=attn_heads, d_model=hidden, dropout=dropout)
23
- self.feed_forward = PositionwiseFeedForward(d_model=hidden, d_ff=feed_forward_hidden, dropout=dropout)
24
- self.input_sublayer = SublayerConnection(size=hidden, dropout=dropout)
25
- self.output_sublayer = SublayerConnection(size=hidden, dropout=dropout)
26
- self.dropout = nn.Dropout(p=dropout)
27
-
28
- def forward(self, x, mask):
29
- x = self.input_sublayer(x, lambda _x: self.attention.forward(_x, _x, _x, mask=mask))
30
- x = self.output_sublayer(x, self.feed_forward)
31
- return self.dropout(x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/bert_modules/utils/__init__.py DELETED
@@ -1,4 +0,0 @@
1
- from .feed_forward import PositionwiseFeedForward
2
- from .layer_norm import LayerNorm
3
- from .sublayer import SublayerConnection
4
- from .gelu import GELU
 
 
 
 
 
models/bert_modules/utils/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (378 Bytes)
 
models/bert_modules/utils/__pycache__/feed_forward.cpython-312.pyc DELETED
Binary file (1.43 kB)
 
models/bert_modules/utils/__pycache__/gelu.cpython-312.pyc DELETED
Binary file (1 kB)
 
models/bert_modules/utils/__pycache__/layer_norm.cpython-312.pyc DELETED
Binary file (1.49 kB)
 
models/bert_modules/utils/__pycache__/sublayer.cpython-312.pyc DELETED
Binary file (1.34 kB)
 
models/bert_modules/utils/feed_forward.py DELETED
@@ -1,16 +0,0 @@
1
- import torch.nn as nn
2
- from .gelu import GELU
3
-
4
-
5
- class PositionwiseFeedForward(nn.Module):
6
- "Implements FFN equation."
7
-
8
- def __init__(self, d_model, d_ff, dropout=0.1):
9
- super(PositionwiseFeedForward, self).__init__()
10
- self.w_1 = nn.Linear(d_model, d_ff)
11
- self.w_2 = nn.Linear(d_ff, d_model)
12
- self.dropout = nn.Dropout(dropout)
13
- self.activation = GELU()
14
-
15
- def forward(self, x):
16
- return self.w_2(self.dropout(self.activation(self.w_1(x))))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/bert_modules/utils/gelu.py DELETED
@@ -1,12 +0,0 @@
1
- import torch.nn as nn
2
- import torch
3
- import math
4
-
5
-
6
- class GELU(nn.Module):
7
- """
8
- Paper Section 3.4, last paragraph notice that BERT used the GELU instead of RELU
9
- """
10
-
11
- def forward(self, x):
12
- return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
 
 
 
 
 
 
 
 
 
 
 
 
 
models/bert_modules/utils/layer_norm.py DELETED
@@ -1,17 +0,0 @@
1
- import torch.nn as nn
2
- import torch
3
-
4
-
5
- class LayerNorm(nn.Module):
6
- "Construct a layernorm module (See citation for details)."
7
-
8
- def __init__(self, features, eps=1e-6):
9
- super(LayerNorm, self).__init__()
10
- self.a_2 = nn.Parameter(torch.ones(features))
11
- self.b_2 = nn.Parameter(torch.zeros(features))
12
- self.eps = eps
13
-
14
- def forward(self, x):
15
- mean = x.mean(-1, keepdim=True)
16
- std = x.std(-1, keepdim=True)
17
- return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/bert_modules/utils/sublayer.py DELETED
@@ -1,18 +0,0 @@
1
- import torch.nn as nn
2
- from .layer_norm import LayerNorm
3
-
4
-
5
- class SublayerConnection(nn.Module):
6
- """
7
- A residual connection followed by a layer norm.
8
- Note for code simplicity the norm is first as opposed to last.
9
- """
10
-
11
- def __init__(self, size, dropout):
12
- super(SublayerConnection, self).__init__()
13
- self.norm = LayerNorm(size)
14
- self.dropout = nn.Dropout(dropout)
15
-
16
- def forward(self, x, sublayer):
17
- "Apply residual connection to any sublayer with the same size."
18
- return x + self.dropout(sublayer(self.norm(x)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/dae.py DELETED
@@ -1,54 +0,0 @@
1
- from .base import BaseModel
2
-
3
- import torch
4
- import torch.nn as nn
5
- import torch.nn.functional as F
6
-
7
-
8
- class DAEModel(BaseModel):
9
- def __init__(self, args):
10
- super().__init__(args)
11
-
12
- # Input dropout
13
- self.input_dropout = nn.Dropout(p=args.dae_dropout)
14
-
15
- # Construct a list of dimensions for the encoder and the decoder
16
- dims = [args.dae_hidden_dim] * 2 * args.dae_num_hidden
17
- dims = [args.num_items] + dims + [args.dae_latent_dim]
18
-
19
- # Stack encoders and decoders
20
- encoder_modules, decoder_modules = [], []
21
- for i in range(len(dims)//2):
22
- encoder_modules.append(nn.Linear(dims[2*i], dims[2*i+1]))
23
- decoder_modules.append(nn.Linear(dims[-2*i-1], dims[-2*i-2]))
24
- self.encoder = nn.ModuleList(encoder_modules)
25
- self.decoder = nn.ModuleList(decoder_modules)
26
-
27
- # Initialize weights
28
- self.encoder.apply(self.weight_init)
29
- self.decoder.apply(self.weight_init)
30
-
31
- def weight_init(self, m):
32
- if isinstance(m, nn.Linear):
33
- nn.init.kaiming_normal_(m.weight)
34
- m.bias.data.normal_(0.0, 0.001)
35
-
36
- @classmethod
37
- def code(cls):
38
- return 'dae'
39
-
40
- def forward(self, x):
41
- x = F.normalize(x)
42
- x = self.input_dropout(x)
43
-
44
- for i, layer in enumerate(self.encoder):
45
- x = layer(x)
46
- x = torch.tanh(x)
47
-
48
- for i, layer in enumerate(self.decoder):
49
- x = layer(x)
50
- if i != len(self.decoder)-1:
51
- x = torch.tanh(x)
52
-
53
- return x
54
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/vae.py DELETED
@@ -1,69 +0,0 @@
1
- from .base import BaseModel
2
-
3
- import torch
4
- import torch.nn as nn
5
- import torch.nn.functional as F
6
-
7
-
8
- class VAEModel(BaseModel):
9
- def __init__(self, args):
10
- super().__init__(args)
11
- self.latent_dim = args.vae_latent_dim
12
-
13
- # Input dropout
14
- self.input_dropout = nn.Dropout(p=args.vae_dropout)
15
-
16
- # Construct a list of dimensions for the encoder and the decoder
17
- dims = [args.vae_hidden_dim] * 2 * args.vae_num_hidden
18
- dims = [args.num_items] + dims + [args.vae_latent_dim * 2]
19
-
20
- # Stack encoders and decoders
21
- encoder_modules, decoder_modules = [], []
22
- for i in range(len(dims)//2):
23
- encoder_modules.append(nn.Linear(dims[2*i], dims[2*i+1]))
24
- if i == 0:
25
- decoder_modules.append(nn.Linear(dims[-1]//2, dims[-2]))
26
- else:
27
- decoder_modules.append(nn.Linear(dims[-2*i-1], dims[-2*i-2]))
28
- self.encoder = nn.ModuleList(encoder_modules)
29
- self.decoder = nn.ModuleList(decoder_modules)
30
-
31
- # Initialize weights
32
- self.encoder.apply(self.weight_init)
33
- self.decoder.apply(self.weight_init)
34
-
35
- def weight_init(self, m):
36
- if isinstance(m, nn.Linear):
37
- nn.init.kaiming_normal_(m.weight)
38
- m.bias.data.zero_()
39
-
40
- @classmethod
41
- def code(cls):
42
- return 'vae'
43
-
44
- def forward(self, x):
45
- x = F.normalize(x)
46
- x = self.input_dropout(x)
47
-
48
- for i, layer in enumerate(self.encoder):
49
- x = layer(x)
50
- if i != len(self.encoder) - 1:
51
- x = torch.tanh(x)
52
-
53
- mu, logvar = x[:, :self.latent_dim], x[:, self.latent_dim:]
54
-
55
- if self.training:
56
- # since log(var) = log(sigma^2) = 2*log(sigma)
57
- sigma = torch.exp(0.5 * logvar)
58
- eps = torch.randn_like(sigma)
59
- x = mu + eps * sigma
60
- else:
61
- x = mu
62
-
63
- for i, layer in enumerate(self.decoder):
64
- x = layer(x)
65
- if i != len(self.decoder) - 1:
66
- x = torch.tanh(x)
67
-
68
- return x, mu, logvar
69
-