another try bloat16
Browse files
config.json
CHANGED
|
@@ -20,7 +20,7 @@
|
|
| 20 |
"num_hidden_layers": 12,
|
| 21 |
"pad_token_id": 1,
|
| 22 |
"position_embedding_type": "absolute",
|
| 23 |
-
"torch_dtype": "
|
| 24 |
"transformers_version": "4.14.0.dev0",
|
| 25 |
"type_vocab_size": 1,
|
| 26 |
"use_cache": true,
|
|
|
|
| 20 |
"num_hidden_layers": 12,
|
| 21 |
"pad_token_id": 1,
|
| 22 |
"position_embedding_type": "absolute",
|
| 23 |
+
"torch_dtype": "bfloat16",
|
| 24 |
"transformers_version": "4.14.0.dev0",
|
| 25 |
"type_vocab_size": 1,
|
| 26 |
"use_cache": true,
|
events.out.tfevents.1640253419.t1v-n-6f5efcd5-w-0.1851551.0.v2
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:ee54b659c96fbadcf61cfe022890c77db861bba66c0ff84b873c329297a24f14
|
| 3 |
-
size 40
|
|
|
|
|
|
|
|
|
|
|
|
run_step1.sh
CHANGED
|
@@ -9,7 +9,7 @@
|
|
| 9 |
--weight_decay="0.01" \
|
| 10 |
--per_device_train_batch_size="256" \
|
| 11 |
--per_device_eval_batch_size="256" \
|
| 12 |
-
--learning_rate=
|
| 13 |
--warmup_steps="10000" \
|
| 14 |
--overwrite_output_dir \
|
| 15 |
--num_train_epochs="1000" \
|
|
@@ -22,4 +22,5 @@
|
|
| 22 |
--preprocessing_num_workers="64" \
|
| 23 |
--auth_token="True" \
|
| 24 |
--static_learning_rate="True" \
|
|
|
|
| 25 |
--push_to_hub
|
|
|
|
| 9 |
--weight_decay="0.01" \
|
| 10 |
--per_device_train_batch_size="256" \
|
| 11 |
--per_device_eval_batch_size="256" \
|
| 12 |
+
--learning_rate=4e-4" \
|
| 13 |
--warmup_steps="10000" \
|
| 14 |
--overwrite_output_dir \
|
| 15 |
--num_train_epochs="1000" \
|
|
|
|
| 22 |
--preprocessing_num_workers="64" \
|
| 23 |
--auth_token="True" \
|
| 24 |
--static_learning_rate="True" \
|
| 25 |
+
--dtype="bfloat16" \
|
| 26 |
--push_to_hub
|