Commit from model create scripts
Browse files
config.gin
CHANGED
|
@@ -31,7 +31,7 @@ MODEL_DIR = 'gs://nb-t5x-us-central2/finetuned/scandi3_3stammer_v2_large'
|
|
| 31 |
OPTIMIZER = @adafactor.Adafactor()
|
| 32 |
RANDOM_SEED = 0
|
| 33 |
TASK_FEATURE_LENGTHS = {'inputs': 512, 'targets': 512}
|
| 34 |
-
TRAIN_STEPS =
|
| 35 |
USE_CACHED_TASKS = False
|
| 36 |
USE_HARDWARE_RNG = False
|
| 37 |
VOCABULARY = @seqio.SentencePieceVocabulary()
|
|
|
|
| 31 |
OPTIMIZER = @adafactor.Adafactor()
|
| 32 |
RANDOM_SEED = 0
|
| 33 |
TASK_FEATURE_LENGTHS = {'inputs': 512, 'targets': 512}
|
| 34 |
+
TRAIN_STEPS = 3300000
|
| 35 |
USE_CACHED_TASKS = False
|
| 36 |
USE_HARDWARE_RNG = False
|
| 37 |
VOCABULARY = @seqio.SentencePieceVocabulary()
|
config.json
CHANGED
|
@@ -13,6 +13,7 @@
|
|
| 13 |
"initializer_factor": 1.0,
|
| 14 |
"is_encoder_decoder": true,
|
| 15 |
"layer_norm_epsilon": 1e-06,
|
|
|
|
| 16 |
"model_type": "t5",
|
| 17 |
"num_decoder_layers": 24,
|
| 18 |
"num_heads": 16,
|
|
@@ -21,12 +22,6 @@
|
|
| 21 |
"pad_token_id": 0,
|
| 22 |
"relative_attention_max_distance": 128,
|
| 23 |
"relative_attention_num_buckets": 32,
|
| 24 |
-
"tie_word_embeddings": false,
|
| 25 |
-
"tokenizer_class": "T5Tokenizer",
|
| 26 |
-
"torch_dtype": "float32",
|
| 27 |
-
"transformers_version": "4.19.2",
|
| 28 |
-
"use_cache": true,
|
| 29 |
-
"max_length": 512,
|
| 30 |
"task_specific_params": {
|
| 31 |
"text-generation": {
|
| 32 |
"max_length": 512
|
|
@@ -34,6 +29,11 @@
|
|
| 34 |
"translation": {
|
| 35 |
"max_length": 512
|
| 36 |
}
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
"vocab_size": 250112
|
| 39 |
}
|
|
|
|
| 13 |
"initializer_factor": 1.0,
|
| 14 |
"is_encoder_decoder": true,
|
| 15 |
"layer_norm_epsilon": 1e-06,
|
| 16 |
+
"max_length": 512,
|
| 17 |
"model_type": "t5",
|
| 18 |
"num_decoder_layers": 24,
|
| 19 |
"num_heads": 16,
|
|
|
|
| 22 |
"pad_token_id": 0,
|
| 23 |
"relative_attention_max_distance": 128,
|
| 24 |
"relative_attention_num_buckets": 32,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"task_specific_params": {
|
| 26 |
"text-generation": {
|
| 27 |
"max_length": 512
|
|
|
|
| 29 |
"translation": {
|
| 30 |
"max_length": 512
|
| 31 |
}
|
| 32 |
+
},
|
| 33 |
+
"tie_word_embeddings": false,
|
| 34 |
+
"tokenizer_class": "T5Tokenizer",
|
| 35 |
+
"torch_dtype": "float32",
|
| 36 |
+
"transformers_version": "4.19.2",
|
| 37 |
+
"use_cache": true,
|
| 38 |
"vocab_size": 250112
|
| 39 |
}
|
flax_model.msgpack
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4918349339
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:952384b0efdcd4b18b6882eaf7c4b15be2e902d3421059b6bec0f143751837c1
|
| 3 |
size 4918349339
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4918507641
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f42c4d1b64ee95336f0dc5fb01f822e65666c394ab291c24a7e8c497f47234f2
|
| 3 |
size 4918507641
|
train/events.out.tfevents.1667421611.t1v-n-b052f6bf-w-1.820156.0.v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:544b309881a8f653e93657c4eb6aa41d856f748d95c8460793389d7b17ed731e
|
| 3 |
+
size 171445
|
training_eval/translate/events.out.tfevents.1667421612.t1v-n-b052f6bf-w-1.820156.1.v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dccf5d96f93ab56d85473d58a9f8e29c8e1f82f7f1a04ad3263aa61ae4c4ee02
|
| 3 |
+
size 138967
|