Spaces:
Sleeping
Sleeping
| import tensorflow as tf | |
| from tensorflow import keras | |
| from tensorflow.keras import layers | |
| from tensorflow.keras.layers import TextVectorization, Embedding, Dense | |
| class TransformerEncoder(layers.Layer): | |
| def __init__(self, embed_dim, dense_dim, num_heads, **kwargs): | |
| super().__init__(**kwargs) | |
| self.embed_dim = embed_dim # Dimension of embedding. 4 in the dummy example | |
| self.dense_dim = dense_dim # No. of neurons in dense layer | |
| self.num_heads = num_heads # No. of heads for MultiHead Attention layer | |
| self.attention = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim) # MultiHead Attention layer | |
| self.dense_proj = keras.Sequential([layers.Dense(dense_dim, activation="relu"), | |
| layers.Dense(embed_dim),] # encoders are stacked on top of the other. | |
| ) # So output dimension is also embed_dim | |
| self.layernorm_1 = layers.LayerNormalization() | |
| self.layernorm_2 = layers.LayerNormalization() | |
| # Call function based on figure above | |
| def call(self, inputs, mask=None): | |
| if mask is not None: | |
| mask = mask[:, tf.newaxis, :] | |
| attention_output = self.attention(query=inputs, # Query: inputs, | |
| value=inputs, # Value: inputs, | |
| key=inputs, # Keys: Same as Values by default | |
| attention_mask=mask | |
| ) # Q: Can you see how this is self attention? A: all args are the same | |
| proj_input = self.layernorm_1(inputs + attention_output) # LayerNormalization; + Recall cat picture | |
| proj_output = self.dense_proj(proj_input) | |
| return self.layernorm_2(proj_input + proj_output) # LayerNormalization + Residual connection | |
| def get_config(self): | |
| config = super().get_config() | |
| config.update({ | |
| "embed_dim": self.embed_dim, | |
| "num_heads": self.num_heads, | |
| "dense_dim": self.dense_dim, | |
| }) | |
| return config | |
| # Using positional encoding to re-inject order information | |
| class PositionalEmbedding(layers.Layer): | |
| def __init__(self, sequence_length, input_dim, output_dim, **kwargs): # input_dim = (token) vocabulary size, output_dim = embedding size | |
| super().__init__(**kwargs) | |
| self.token_embeddings = layers.Embedding(input_dim=input_dim, output_dim=output_dim) # Q: what is input_dim and output_dim? A: vocab size, embedding dim | |
| self.position_embeddings = layers.Embedding(input_dim=sequence_length, output_dim=output_dim) # Q: Why input_dim = seq_length? A: there are seq_len; no. of possible positions | |
| # Q: What is the vocab for this Embedding layer? A: seq_length | |
| self.sequence_length = sequence_length | |
| self.input_dim = input_dim | |
| self.output_dim = output_dim | |
| def call(self, inputs): # inputs will be a batch of sequences (batch, seq_len) | |
| length = tf.shape(inputs)[-1] # lenght will just be sequence length | |
| positions = tf.range(start=0, limit=length, delta=1) # indices for input to positional embedding | |
| embedded_tokens = tf.reshape(self.token_embeddings(inputs), (-1, length, self.output_dim)) | |
| embedded_positions = tf.reshape(self.position_embeddings(positions), (-1, length, self.output_dim)) | |
| return layers.Add()([embedded_tokens, embedded_positions]) # ADD the embeddings | |
| def compute_mask(self, inputs, mask=None): # makes this layer a mask-generating layer | |
| if mask is None: | |
| return None | |
| return tf.math.not_equal(inputs, 0) # mask will get propagated to the next layer. | |
| # When using custom layers, this enables the layer to be reinstantiated from its config dict, | |
| # which is useful during model saving and loading. | |
| def get_config(self): | |
| config = super().get_config() | |
| config.update({ | |
| "output_dim": self.output_dim, | |
| "sequence_length": self.sequence_length, | |
| "input_dim": self.input_dim, | |
| }) | |
| return config | |