Upload TFBilma

Browse files

Files changed (4) hide show

config.json +6 -4
configuration_bilma.py +13 -5
modeling_bilma.py +52 -19
tf_model.h5 +1 -1

config.json CHANGED Viewed

@@ -1,17 +1,19 @@
 {
-  "_name_or_path": "bilma_MX",
   "auto_map": {
     "AutoConfig": "configuration_bilma.BilmaConfig",
-    "TFAutoModel": "modeling_bilma.TFBilma",
-    "TFAutoModelForMaskedLM": "modeling_bilma.TFBilma"
   },
   "hidden_dropout_prob": 0.1,
   "hidden_size": 512,
-  "include_head": null,
   "include_top": true,
   "model_type": "bilma",
   "num_attention_heads": 4,
   "num_hidden_layers": 2,
   "seq_max_length": 280,
   "transformers_version": "4.30.2",
   "vocab_size": 29025,

 {
+  "add_head": null,
+  "architectures": [
+    "Bilma"
+  ],
   "auto_map": {
     "AutoConfig": "configuration_bilma.BilmaConfig",
+    "TFAutoModel": "modeling_bilma.TFBilma"
   },
   "hidden_dropout_prob": 0.1,
   "hidden_size": 512,
   "include_top": true,
   "model_type": "bilma",
   "num_attention_heads": 4,
   "num_hidden_layers": 2,
+  "pooling": null,
   "seq_max_length": 280,
   "transformers_version": "4.30.2",
   "vocab_size": 29025,

configuration_bilma.py CHANGED Viewed

@@ -7,7 +7,8 @@ class BilmaConfig(PretrainedConfig):
         self,
         weights="MX",
         include_top = True,
-        include_head = None,
         num_attention_heads: int = 4,
         num_hidden_layers: int = 2,
         seq_max_length: int = 280,
@@ -17,14 +18,20 @@ class BilmaConfig(PretrainedConfig):
         **kwargs,
     ):
         countries = ["MX"]
         if weights not in countries:
             raise ValueError(f"`weights` must be one of {countries}, got {weights}.")
-        if include_head is not None and include_top == True:
-            raise ValueError(f"To include a head, 'include_top' must be False")
         if weights is not None:
             self.weights = weights
             self.include_top = include_top
-            self.include_head = include_head
             self.num_attention_heads = 4
             self.num_hidden_layers = 2
             self.seq_max_length = 280
@@ -36,7 +43,8 @@ class BilmaConfig(PretrainedConfig):
         self.weights = weights
         self.include_top = include_top
-        self.include_head = include_head
         self.num_attention_heads = num_attention_heads
         self.num_hidden_layers = num_hidden_layers
         self.seq_max_length = seq_max_length

         self,
         weights="MX",
         include_top = True,
+        add_head = None,
+        pooling = None,
         num_attention_heads: int = 4,
         num_hidden_layers: int = 2,
         seq_max_length: int = 280,
         **kwargs,
     ):
         countries = ["MX"]
+        poolings = ["mean", "cls", "max"]
         if weights not in countries:
             raise ValueError(f"`weights` must be one of {countries}, got {weights}.")
+        if add_head is not None and include_top == True:
+            raise ValueError(f"To add a head, 'include_top' must be False")
+        if pooling is not None and include_top == True:
+            raise ValueError(f"To specify a pooling, 'include_top' must be False")
+        if pooling is not None and pooling not in poolings:
+            raise ValueError(f"`pooling` must be one of {poolings}, got {pooling}.")
         if weights is not None:
             self.weights = weights
             self.include_top = include_top
+            self.add_head = add_head
+            self.pooling = pooling
             self.num_attention_heads = 4
             self.num_hidden_layers = 2
             self.seq_max_length = 280
         self.weights = weights
         self.include_top = include_top
+        self.add_head = add_head
+        self.pooling = pooling
         self.num_attention_heads = num_attention_heads
         self.num_hidden_layers = num_hidden_layers
         self.seq_max_length = seq_max_length

modeling_bilma.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from transformers import TFPreTrainedModel, PreTrainedTokenizer
 from tensorflow.keras.models import Model, load_model, Sequential
 from tensorflow.keras.layers import Layer, Dense, concatenate, Input, add, Dropout, LayerNormalization, MultiHeadAttention, Embedding
 import tensorflow as tf
@@ -9,7 +10,7 @@ from typing import Dict
 import re
 import unicodedata
-from .configuration_bilma import BilmaConfig
 # copied from preprocessing.py
 BLANK = ' '
@@ -38,7 +39,7 @@ class TFBilma(TFPreTrainedModel):
     def __init__(self, config):
         self.seq_max_length = config.seq_max_length
         self.include_top = config.include_top
-        self.include_head = config.include_head
         super().__init__(config)
         self.model = bilma(num_enc=config.num_hidden_layers,
@@ -49,7 +50,8 @@ class TFBilma(TFPreTrainedModel):
                            vocab_size=config.vocab_size,
                            rate=config.hidden_dropout_prob,
                            include_top = config.include_top,
-                           include_head = config.include_head)
     @property
     def dummy_inputs(self) -> Dict[str, tf.Tensor]:
@@ -72,19 +74,26 @@ class TFBilma(TFPreTrainedModel):
     def call(self, inputs):
-        if isinstance(inputs, Dict):
             ins = tf.cast(inputs["input_ids"], tf.float32)
         else:
             ins = inputs
         if self.include_top:
             output = {"logits":self.model(ins)}
         else:
-            if self.include_head is None:
                 output = {"last_hidden_state":self.model(ins)}
             else:
-                output = {"logits":self.model(ins)}
         return output
 # copied from bilma_model.py
 # --------------------------
@@ -115,18 +124,38 @@ def accuracy_function(ignore_id=0):
 def mean_vectors(inputs, enc_vectors, max_length):
     p = tf.where(inputs == 3)
-    count, _ = inputs.shape
     pos = tf.transpose(p)[1]
     C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
-    #C = tf.ragged.constant([[1]*i for i in pos.numpy()], dtype=tf.float32)
-    #C = C.to_tensor(0, shape=(count, max_length))
     C = tf.reshape(C, (-1, max_length, 1))
     S = tf.reduce_sum(enc_vectors * C, 1)
     x = S / tf.expand_dims(tf.cast(pos, tf.float32), (1))
     return x
-def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1, include_top=True, include_head=None):
     capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
     capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
     capt_inputs = capt_embedding(capt_inputs_ids)
@@ -136,14 +165,20 @@ def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, voca
     if include_top:
         fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
     else:
-        if include_head is None:
-            fin_output = enc_output
-        else:
-            x = enc_output
             x = mean_vectors(capt_inputs_ids, x, max_length)
-            for i, m in enumerate(include_head[:-1]):
                 x = Dense(m, use_bias=True, activation="relu", name=f"bilma/dense_ex_{i}")(x)
-            fin_output = [Dense(include_head[-1], use_bias=True, activation="softmax", name=f"bilma/dense_ex_final")(x), enc_output]
     caption_model = Model(inputs=capt_inputs_ids, outputs=fin_output, name="bilma_model")
     return caption_model
@@ -160,7 +195,6 @@ def load(model_file):
 #
 # Copied from transformer_text.py
 # -------------------------------
 class EncoderBlock(Layer):
     def __init__(self, layer_num, patch_dim, num_heads, ff_dim, rate=0.1, **kwargs):
         super(EncoderBlock, self).__init__(**kwargs)
@@ -242,7 +276,6 @@ class DecoderBlock(Layer):
         return final_output, attn_output1, attn_encoder
 class Encoder(Layer):
     def __init__(self, n, embed_dim, max_length, num_heads, ff_dim, rate=0.1, **kwargs):
         super(Encoder, self).__init__(**kwargs)

+from transformers import TFPreTrainedModel, PreTrainedTokenizer, BatchEncoding
 from tensorflow.keras.models import Model, load_model, Sequential
 from tensorflow.keras.layers import Layer, Dense, concatenate, Input, add, Dropout, LayerNormalization, MultiHeadAttention, Embedding
 import tensorflow as tf
 import re
 import unicodedata
+from configuration_bilma import BilmaConfig
 # copied from preprocessing.py
 BLANK = ' '
     def __init__(self, config):
         self.seq_max_length = config.seq_max_length
         self.include_top = config.include_top
+        self.add_head = config.add_head
         super().__init__(config)
         self.model = bilma(num_enc=config.num_hidden_layers,
                            vocab_size=config.vocab_size,
                            rate=config.hidden_dropout_prob,
                            include_top = config.include_top,
+                           add_head = config.add_head,
+                           pooling = config.pooling)
     @property
     def dummy_inputs(self) -> Dict[str, tf.Tensor]:
     def call(self, inputs):
+        if isinstance(inputs, Dict) or isinstance(inputs, BatchEncoding):
             ins = tf.cast(inputs["input_ids"], tf.float32)
         else:
             ins = inputs
         if self.include_top:
             output = {"logits":self.model(ins)}
         else:
+            if self.add_head is None:
                 output = {"last_hidden_state":self.model(ins)}
             else:
+                output = {"label":self.model(ins)}
         return output
+    def get_loss_function():
+        return loss_funtion()
+    def get_acc_function():
+        return accuracy_function()
 # copied from bilma_model.py
 # --------------------------
 def mean_vectors(inputs, enc_vectors, max_length):
     p = tf.where(inputs == 3)
     pos = tf.transpose(p)[1]
     C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
     C = tf.reshape(C, (-1, max_length, 1))
     S = tf.reduce_sum(enc_vectors * C, 1)
     x = S / tf.expand_dims(tf.cast(pos, tf.float32), (1))
     return x
+def mean_diff_vectors(inputs, enc_vectors, max_length):
+    p = tf.where(inputs == 3)
+    pos = tf.transpose(p)[1]
+    C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
+    C = tf.reshape(C, (-1, max_length, 1))
+    vecs = enc_vectors * C
+    S = tf.reduce_sum(vecs, 1)
+    mu = S / tf.expand_dims(tf.cast(pos, tf.float32), (1))
+    x = tf.reduce_sum(mu - vecs, 1) / tf.expand_dims(tf.cast(pos, tf.float32), (1))
+    return x
+def max_vectors(inputs, enc_vectors, max_length):
+    p = tf.where(inputs == 3)
+    pos = tf.transpose(p)[1]
+    C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
+    C = tf.reshape(C, (-1, max_length, 1))
+    x = tf.reduce_max(enc_vectors * C, 1)
+    return x
+def cls_vectors(inputs, enc_vectors, max_length):
+    x = tf.squeeze(enc_vectors[:, 0:1, :], axis=1)
+    return x
+def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1, include_top=True, add_head=None, pooling=None):
     capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
     capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
     capt_inputs = capt_embedding(capt_inputs_ids)
     if include_top:
         fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
     else:
+        x = enc_output
+        if pooling == "mean":
             x = mean_vectors(capt_inputs_ids, x, max_length)
+        elif pooling == "cls":
+            x = cls_vectors(capt_inputs_ids, x, max_length)
+        elif pooling == "max":
+            x = max_vectors(capt_inputs_ids, x, max_length)
+        if add_head is None:
+            fin_output = x
+        else:
+            for i, m in enumerate(add_head[:-1]):
                 x = Dense(m, use_bias=True, activation="relu", name=f"bilma/dense_ex_{i}")(x)
+            fin_output = Dense(add_head[-1], use_bias=True, activation="softmax", name=f"bilma/dense_ex_final")(x)
     caption_model = Model(inputs=capt_inputs_ids, outputs=fin_output, name="bilma_model")
     return caption_model
 #
 # Copied from transformer_text.py
 # -------------------------------
 class EncoderBlock(Layer):
     def __init__(self, layer_num, patch_dim, num_heads, ff_dim, rate=0.1, **kwargs):
         super(EncoderBlock, self).__init__(**kwargs)
         return final_output, attn_output1, attn_encoder
 class Encoder(Layer):
     def __init__(self, n, embed_dim, max_length, num_heads, ff_dim, rate=0.1, **kwargs):
         super(Encoder, self).__init__(**kwargs)

tf_model.h5 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:53217c655c71bcc7ab238879925eb57c52f8a2d170554a8ca059c0cadd490c2a
 size 156875820

 version https://git-lfs.github.com/spec/v1
+oid sha256:2cc8b04b7a93e6fa9eb46a7a30d89f2e97e4b8ac52da1c0e35239ded8a29482c
 size 156875820