Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -81,7 +81,7 @@ class PositionalEmbedding(layers.Layer):
|
|
| 81 |
return output
|
| 82 |
|
| 83 |
|
| 84 |
-
class AttentionalPooling(layers.Layer):
|
| 85 |
def __init__(self, embed_dim, num_heads=6):
|
| 86 |
super().__init__()
|
| 87 |
self.embed_dim = embed_dim
|
|
@@ -100,7 +100,7 @@ class AttentionalPooling(layers.Layer):
|
|
| 100 |
return self.norm(attn_output)
|
| 101 |
|
| 102 |
|
| 103 |
-
class TransformerBlock(layers.Layer):
|
| 104 |
def __init__(self, embed_dim, dense_dim, num_heads, dropout_rate=0.1, ln_epsilon=1e-6, is_multimodal=False, **kwargs):
|
| 105 |
super().__init__(**kwargs)
|
| 106 |
self.embed_dim = embed_dim
|
|
@@ -128,7 +128,7 @@ class TransformerBlock(layers.Layer):
|
|
| 128 |
|
| 129 |
|
| 130 |
# Feed-Forward Network
|
| 131 |
-
self.dense_proj = keras.Sequential([
|
| 132 |
layers.Dense(self.dense_dim, activation="gelu"),
|
| 133 |
layers.Dropout(self.dropout_rate),
|
| 134 |
layers.Dense(self.embed_dim)
|
|
@@ -279,7 +279,7 @@ for layer in vit_tiny_model.layers:
|
|
| 279 |
layer.trainable = True
|
| 280 |
|
| 281 |
|
| 282 |
-
class CoCaEncoder(keras.Model):
|
| 283 |
def __init__(self,
|
| 284 |
vit, **kwargs):
|
| 285 |
|
|
@@ -317,7 +317,7 @@ class CoCaEncoder(keras.Model):
|
|
| 317 |
|
| 318 |
|
| 319 |
|
| 320 |
-
class CoCaDecoder(keras.Model):
|
| 321 |
def __init__(self,
|
| 322 |
cls_token_id,
|
| 323 |
num_heads,
|
|
@@ -368,7 +368,7 @@ class CoCaDecoder(keras.Model):
|
|
| 368 |
|
| 369 |
|
| 370 |
# день 6
|
| 371 |
-
class CoCaModel(keras.Model):
|
| 372 |
def __init__(self,
|
| 373 |
vit,
|
| 374 |
cls_token_id,
|
|
@@ -491,7 +491,7 @@ dummy_features = tf.zeros((1, 3, img_size, img_size), dtype=tf.float32)
|
|
| 491 |
dummy_captions = tf.zeros((1, sentence_length-1), dtype=tf.int64)
|
| 492 |
_ = coca_model((dummy_features, dummy_captions))
|
| 493 |
|
| 494 |
-
optimizer = keras.optimizers.Adam(learning_rate=1e-4)
|
| 495 |
coca_model.compile(optimizer)
|
| 496 |
|
| 497 |
save_dir = "models/"
|
|
@@ -540,7 +540,7 @@ class BahdanauAttention(layers.Layer):
|
|
| 540 |
|
| 541 |
|
| 542 |
|
| 543 |
-
class ImageCaptioningModel(keras.Model):
|
| 544 |
def __init__(self, vocab_size, max_caption_len, embedding_dim=512, lstm_units=512, dropout_rate=0.5, **kwargs):
|
| 545 |
super().__init__(**kwargs)
|
| 546 |
|
|
|
|
| 81 |
return output
|
| 82 |
|
| 83 |
|
| 84 |
+
class AttentionalPooling(tf.keras.layers.Layer):
|
| 85 |
def __init__(self, embed_dim, num_heads=6):
|
| 86 |
super().__init__()
|
| 87 |
self.embed_dim = embed_dim
|
|
|
|
| 100 |
return self.norm(attn_output)
|
| 101 |
|
| 102 |
|
| 103 |
+
class TransformerBlock(tf.keras.layers.Layer):
|
| 104 |
def __init__(self, embed_dim, dense_dim, num_heads, dropout_rate=0.1, ln_epsilon=1e-6, is_multimodal=False, **kwargs):
|
| 105 |
super().__init__(**kwargs)
|
| 106 |
self.embed_dim = embed_dim
|
|
|
|
| 128 |
|
| 129 |
|
| 130 |
# Feed-Forward Network
|
| 131 |
+
self.dense_proj = tf.keras.Sequential([
|
| 132 |
layers.Dense(self.dense_dim, activation="gelu"),
|
| 133 |
layers.Dropout(self.dropout_rate),
|
| 134 |
layers.Dense(self.embed_dim)
|
|
|
|
| 279 |
layer.trainable = True
|
| 280 |
|
| 281 |
|
| 282 |
+
class CoCaEncoder(tf.keras.Model):
|
| 283 |
def __init__(self,
|
| 284 |
vit, **kwargs):
|
| 285 |
|
|
|
|
| 317 |
|
| 318 |
|
| 319 |
|
| 320 |
+
class CoCaDecoder(tf.keras.Model):
|
| 321 |
def __init__(self,
|
| 322 |
cls_token_id,
|
| 323 |
num_heads,
|
|
|
|
| 368 |
|
| 369 |
|
| 370 |
# день 6
|
| 371 |
+
class CoCaModel(tf.keras.Model):
|
| 372 |
def __init__(self,
|
| 373 |
vit,
|
| 374 |
cls_token_id,
|
|
|
|
| 491 |
dummy_captions = tf.zeros((1, sentence_length-1), dtype=tf.int64)
|
| 492 |
_ = coca_model((dummy_features, dummy_captions))
|
| 493 |
|
| 494 |
+
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
|
| 495 |
coca_model.compile(optimizer)
|
| 496 |
|
| 497 |
save_dir = "models/"
|
|
|
|
| 540 |
|
| 541 |
|
| 542 |
|
| 543 |
+
class ImageCaptioningModel(tf.keras.Model):
|
| 544 |
def __init__(self, vocab_size, max_caption_len, embedding_dim=512, lstm_units=512, dropout_rate=0.5, **kwargs):
|
| 545 |
super().__init__(**kwargs)
|
| 546 |
|