Airin-chan commited on
Commit
2d203c6
·
verified ·
1 Parent(s): 110b3b8

Upload 5 files

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. Decoder_Model.keras +3 -0
  3. Decoders.py +119 -0
  4. Tokenizer.pkl +3 -0
  5. config.json +12 -0
  6. model_index.json +6 -0
.gitattributes CHANGED
@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  MicroGenerativeTeks/Decoder_Model.keras filter=lfs diff=lfs merge=lfs -text
37
  MicroGenerativeTeks/Encoder_Model.keras filter=lfs diff=lfs merge=lfs -text
38
  MicroGenerativeTeks/Projection_Model.keras filter=lfs diff=lfs merge=lfs -text
 
 
36
  MicroGenerativeTeks/Decoder_Model.keras filter=lfs diff=lfs merge=lfs -text
37
  MicroGenerativeTeks/Encoder_Model.keras filter=lfs diff=lfs merge=lfs -text
38
  MicroGenerativeTeks/Projection_Model.keras filter=lfs diff=lfs merge=lfs -text
39
+ Decoder_Model.keras filter=lfs diff=lfs merge=lfs -text
Decoder_Model.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37eef0fa6c6dad97bfcd400d6a277827971793cbc16c7039049e9279c880d32b
3
+ size 177537785
Decoders.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from tensorflow import keras
3
+ @keras.utils.register_keras_serializable()
4
+ class BlockDecoder(keras.layers.Layer) :
5
+ def __init__(self,d_model,num_head,dff,drop_out=0.2,**kwargs) :
6
+ super(BlockDecoder,self).__init__(**kwargs)
7
+ self.self_attention = keras.layers.MultiHeadAttention(num_heads=num_head,key_dim=(d_model//num_head),dropout=drop_out)
8
+ self.cross_attention = keras.layers.MultiHeadAttention(num_heads=num_head,key_dim=(d_model//num_head),dropout=drop_out)
9
+ self.ffn = keras.Sequential([
10
+ keras.layers.Dense(dff,activation=keras.activations.gelu),
11
+ keras.layers.Dense(d_model)
12
+ ])
13
+ self.layernorm1 = keras.layers.LayerNormalization(epsilon=1e-6)
14
+ self.layernorm2 = keras.layers.LayerNormalization(epsilon=1e-6)
15
+ self.layernorm3 = keras.layers.LayerNormalization(epsilon=1e-6)
16
+ self.dropout = keras.layers.Dropout(drop_out)
17
+ self.d_model = d_model
18
+ self.num_head = num_head
19
+ self.dff = dff
20
+ self.name = "BlockGeneretiveDecoders"
21
+ self.drop_rate = drop_out
22
+
23
+ def call(self,x,training=False,with_encoder = False) :
24
+ dec_attn,enc_attn = x
25
+ attn1 = self.self_attention(dec_attn,dec_attn,dec_attn,training=training,use_causal_mask = True)
26
+ attn1 = self.layernorm1(attn1 + dec_attn,training=training)
27
+ if with_encoder is True:
28
+ cross_attn = self.cross_attention(attn1,enc_attn,enc_attn,training=training)
29
+ cross_attn = self.layernorm2(cross_attn + attn1,training=training)
30
+ else :
31
+ dummy_attn = tf.zeros_like(attn1)
32
+ dummy_attn = self.cross_attention(dummy_attn,dummy_attn,dummy_attn)
33
+ _ = tf.stop_gradient(dummy_attn)
34
+ cross_attn = attn1
35
+
36
+ ffn = self.ffn(cross_attn)
37
+ ffn = self.dropout(ffn,training=training)
38
+ ffn = self.layernorm3(ffn + cross_attn,training=training)
39
+ return ffn
40
+
41
+ def get_config(self) :
42
+ config = super(BlockDecoder,self).get_config()
43
+ config.update({
44
+ "d_model" : self.d_model,
45
+ "num_head" : self.num_head,
46
+ "dff" : self.dff,
47
+ "drop_rate" : self.drop_rate
48
+ })
49
+ return config
50
+ @classmethod
51
+ def from_config(cls,config) :
52
+ return cls(**config)
53
+
54
+ @keras.utils.register_keras_serializable()
55
+ class Decoder(keras.Model) :
56
+ def __init__(self,d_model=512,vocab_size=18191,dff=1024,num_head=16,max_pos=551,drop_out=0.05,**kwargs) :
57
+ super(Decoder,self).__init__(**kwargs)
58
+ self.d_model = d_model
59
+ self.dff = dff
60
+ self.num_head = num_head
61
+ self.max_pos = max_pos
62
+ self.drop_out = drop_out
63
+ self.name="DecodersModels"
64
+ self.vocab_size = vocab_size
65
+ self.Embedding = keras.layers.Embedding(self.vocab_size,self.d_model)
66
+ self.PositionalEncoding = keras.layers.Embedding(self.max_pos,self.d_model)
67
+ self.block1 = BlockDecoder(d_model,num_head,dff,drop_out)
68
+ self.block2 = BlockDecoder(d_model,num_head,dff,drop_out)
69
+ self.block3 = BlockDecoder(d_model,num_head,dff,drop_out)
70
+ self.block4 = BlockDecoder(d_model,num_head,dff,drop_out)
71
+ self.block5 = BlockDecoder(d_model,num_head,dff,drop_out)
72
+ self.block6 = BlockDecoder(d_model,num_head,dff,drop_out)
73
+ self.block7 = BlockDecoder(d_model,num_head,dff,drop_out)
74
+ self.block8 = BlockDecoder(d_model,num_head,dff,drop_out)
75
+ self.linear = keras.layers.Dense(vocab_size)
76
+ self.projection_enc = keras.layers.Dense(d_model)
77
+
78
+ def call(self,x,training=True,with_encoder = False) :
79
+ decod_token,enc_log = x
80
+ if with_encoder is True:
81
+ enc_log = self.projection_enc(enc_log)
82
+ else :
83
+ enc_log = tf.zeros((tf.shape(decod_token)[0],tf.shape(decod_token)[1],self.d_model))
84
+ enc_log = self.projection_enc(enc_log)
85
+ _= tf.stop_gradient(enc_log)
86
+ seq_len = tf.shape(decod_token)[1]
87
+ decod_log = self.Embedding(decod_token)
88
+ decod_log *= tf.math.sqrt(tf.cast(self.d_model,tf.float32))
89
+ pos = tf.range(start=0,limit=seq_len,delta=1)
90
+ pos = tf.where(pos<self.max_pos,pos,self.max_pos-1)
91
+ pos = self.PositionalEncoding(pos)
92
+ pos = tf.expand_dims(pos,axis=0)
93
+ decod_log += pos
94
+ logits= self.block1([decod_log,enc_log],training=training,with_encoder = with_encoder)
95
+ logits = self.block2([logits,enc_log],training=training,with_encoder=with_encoder)
96
+ logits = self.block3([logits,enc_log],training=training,with_encoder=with_encoder)
97
+ logits = self.block4([logits,enc_log],training=training,with_encoder=with_encoder)
98
+ logits = self.block5([logits,enc_log],training=training,with_encoder=with_encoder)
99
+ logits = self.block6([logits,enc_log],training=training,with_encoder=with_encoder)
100
+ logits = self.block7([logits,enc_log],training=training,with_encoder=with_encoder)
101
+ logits = self.block8([logits,enc_log],training=training,with_encoder=with_encoder)
102
+ logits = self.linear(logits)
103
+ return logits
104
+
105
+ def get_config(self) :
106
+ config = super(Decoder,self).get_config()
107
+ config.update({
108
+ "d_model" : self.d_model,
109
+ "dff" : self.dff,
110
+ "num_head" : self.num_head,
111
+ "max_pos" : self.max_pos,
112
+ "drop_out" : self.drop_out,
113
+ "vocab_size" : self.vocab_size
114
+ })
115
+ return config
116
+
117
+ @classmethod
118
+ def from_config(cls,config) :
119
+ return cls(**config)
Tokenizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeaf5a1a0d6859528b933a45d7e02550d77df9d82105bf4bd8fb660ca10adb84
3
+ size 724960
config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type" :
3
+ "Micro-Generative-Transformers",
4
+ "architectures" :
5
+ ["MicroGenerativeTeks"],
6
+ "d_model" : 512,
7
+ "dff" : 1024,
8
+ "num_head" : 16,
9
+ "max_pos" : 250,
10
+ "drop_out" : 0.05,
11
+ "vocab_size" : 18191
12
+ }
model_index.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map" : {
3
+ "AutoModel":
4
+ "Decoders.py::Decoder"
5
+ }
6
+ }