Upload TFBilma
Browse files- config.json +1 -0
- configuration_bilma.py +4 -1
- modeling_bilma.py +8 -4
- tf_model.h5 +1 -1
config.json
CHANGED
|
@@ -9,6 +9,7 @@
|
|
| 9 |
},
|
| 10 |
"hidden_dropout_prob": 0.1,
|
| 11 |
"hidden_size": 512,
|
|
|
|
| 12 |
"model_type": "bilma",
|
| 13 |
"num_attention_heads": 4,
|
| 14 |
"num_hidden_layers": 2,
|
|
|
|
| 9 |
},
|
| 10 |
"hidden_dropout_prob": 0.1,
|
| 11 |
"hidden_size": 512,
|
| 12 |
+
"include_top": true,
|
| 13 |
"model_type": "bilma",
|
| 14 |
"num_attention_heads": 4,
|
| 15 |
"num_hidden_layers": 2,
|
configuration_bilma.py
CHANGED
|
@@ -6,6 +6,7 @@ class BilmaConfig(PretrainedConfig):
|
|
| 6 |
def __init__(
|
| 7 |
self,
|
| 8 |
weights="spanish",
|
|
|
|
| 9 |
num_attention_heads: int = 4,
|
| 10 |
num_hidden_layers: int = 2,
|
| 11 |
seq_max_length: int = 280,
|
|
@@ -15,9 +16,10 @@ class BilmaConfig(PretrainedConfig):
|
|
| 15 |
**kwargs,
|
| 16 |
):
|
| 17 |
if weights not in ["spanish", ""]:
|
| 18 |
-
raise ValueError(f"`weights` must be 'spanish'
|
| 19 |
if weights == "spanish":
|
| 20 |
self.weights = weights
|
|
|
|
| 21 |
self.num_attention_heads = 4
|
| 22 |
self.num_hidden_layers = 2
|
| 23 |
self.seq_max_length = 280
|
|
@@ -28,6 +30,7 @@ class BilmaConfig(PretrainedConfig):
|
|
| 28 |
return
|
| 29 |
|
| 30 |
self.weights = weights
|
|
|
|
| 31 |
self.num_attention_heads = num_attention_heads
|
| 32 |
self.num_hidden_layers = num_hidden_layers
|
| 33 |
self.seq_max_length = seq_max_length
|
|
|
|
| 6 |
def __init__(
|
| 7 |
self,
|
| 8 |
weights="spanish",
|
| 9 |
+
include_top=True,
|
| 10 |
num_attention_heads: int = 4,
|
| 11 |
num_hidden_layers: int = 2,
|
| 12 |
seq_max_length: int = 280,
|
|
|
|
| 16 |
**kwargs,
|
| 17 |
):
|
| 18 |
if weights not in ["spanish", ""]:
|
| 19 |
+
raise ValueError(f"`weights` must be 'spanish', got {weights}.")
|
| 20 |
if weights == "spanish":
|
| 21 |
self.weights = weights
|
| 22 |
+
self.include_top = include_top
|
| 23 |
self.num_attention_heads = 4
|
| 24 |
self.num_hidden_layers = 2
|
| 25 |
self.seq_max_length = 280
|
|
|
|
| 30 |
return
|
| 31 |
|
| 32 |
self.weights = weights
|
| 33 |
+
self.include_top = include_top
|
| 34 |
self.num_attention_heads = num_attention_heads
|
| 35 |
self.num_hidden_layers = num_hidden_layers
|
| 36 |
self.seq_max_length = seq_max_length
|
modeling_bilma.py
CHANGED
|
@@ -9,7 +9,7 @@ from typing import Dict
|
|
| 9 |
import re
|
| 10 |
import unicodedata
|
| 11 |
|
| 12 |
-
from
|
| 13 |
|
| 14 |
# copied from preprocessing.py
|
| 15 |
BLANK = ' '
|
|
@@ -49,7 +49,8 @@ class TFBilma(TFPreTrainedModel):
|
|
| 49 |
num_heads=config.num_attention_heads,
|
| 50 |
ff_dim=config.hidden_size,
|
| 51 |
vocab_size=config.vocab_size,
|
| 52 |
-
rate=config.hidden_dropout_prob
|
|
|
|
| 53 |
|
| 54 |
@property
|
| 55 |
def dummy_inputs(self) -> Dict[str, tf.Tensor]:
|
|
@@ -486,14 +487,17 @@ def accuracy_function(ignore_id=0):
|
|
| 486 |
return tf.math.divide_no_nan(tf.reduce_sum(accuracies), tf.reduce_sum(mask))
|
| 487 |
return acc_mlm
|
| 488 |
|
| 489 |
-
def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1):
|
| 490 |
capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
|
| 491 |
capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
|
| 492 |
capt_inputs = capt_embedding(capt_inputs_ids)
|
| 493 |
|
| 494 |
enc = Encoder(num_enc, embed_dim, max_length, num_heads, ff_dim, rate=rate, name="bilma/encoder")
|
| 495 |
enc_output = enc(capt_inputs)
|
| 496 |
-
|
|
|
|
|
|
|
|
|
|
| 497 |
|
| 498 |
caption_model = Model(inputs=capt_inputs_ids, outputs=[fin_output], name="bilma_model")
|
| 499 |
return caption_model
|
|
|
|
| 9 |
import re
|
| 10 |
import unicodedata
|
| 11 |
|
| 12 |
+
from configuration_bilma import BilmaConfig
|
| 13 |
|
| 14 |
# copied from preprocessing.py
|
| 15 |
BLANK = ' '
|
|
|
|
| 49 |
num_heads=config.num_attention_heads,
|
| 50 |
ff_dim=config.hidden_size,
|
| 51 |
vocab_size=config.vocab_size,
|
| 52 |
+
rate=config.hidden_dropout_prob,
|
| 53 |
+
include_top = config.include_top)
|
| 54 |
|
| 55 |
@property
|
| 56 |
def dummy_inputs(self) -> Dict[str, tf.Tensor]:
|
|
|
|
| 487 |
return tf.math.divide_no_nan(tf.reduce_sum(accuracies), tf.reduce_sum(mask))
|
| 488 |
return acc_mlm
|
| 489 |
|
| 490 |
+
def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1, include_top=True):
|
| 491 |
capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
|
| 492 |
capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
|
| 493 |
capt_inputs = capt_embedding(capt_inputs_ids)
|
| 494 |
|
| 495 |
enc = Encoder(num_enc, embed_dim, max_length, num_heads, ff_dim, rate=rate, name="bilma/encoder")
|
| 496 |
enc_output = enc(capt_inputs)
|
| 497 |
+
if include_top:
|
| 498 |
+
fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
|
| 499 |
+
else:
|
| 500 |
+
fin_output = enc_output
|
| 501 |
|
| 502 |
caption_model = Model(inputs=capt_inputs_ids, outputs=[fin_output], name="bilma_model")
|
| 503 |
return caption_model
|
tf_model.h5
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 156564220
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d31e357973be9bf86a3676237280b3ffe852ac994efd62d6eb67e06e36cd039
|
| 3 |
size 156564220
|