another try bloat16

Files changed (3) hide show

config.json CHANGED Viewed

@@ -20,7 +20,7 @@
   "num_hidden_layers": 12,
   "pad_token_id": 1,
   "position_embedding_type": "absolute",
-  "torch_dtype": "float32",
   "transformers_version": "4.14.0.dev0",
   "type_vocab_size": 1,
   "use_cache": true,

   "num_hidden_layers": 12,
   "pad_token_id": 1,
   "position_embedding_type": "absolute",
+  "torch_dtype": "bfloat16",
   "transformers_version": "4.14.0.dev0",
   "type_vocab_size": 1,
   "use_cache": true,

events.out.tfevents.1640253419.t1v-n-6f5efcd5-w-0.1851551.0.v2 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ee54b659c96fbadcf61cfe022890c77db861bba66c0ff84b873c329297a24f14
-size 40

run_step1.sh CHANGED Viewed

@@ -9,7 +9,7 @@
     --weight_decay="0.01" \
     --per_device_train_batch_size="256" \
     --per_device_eval_batch_size="256" \
-    --learning_rate="4e-4" \
     --warmup_steps="10000" \
     --overwrite_output_dir \
     --num_train_epochs="1000" \
@@ -22,4 +22,5 @@
     --preprocessing_num_workers="64" \
     --auth_token="True" \
     --static_learning_rate="True" \
     --push_to_hub

     --weight_decay="0.01" \
     --per_device_train_batch_size="256" \
     --per_device_eval_batch_size="256" \
+    --learning_rate=4e-4" \
     --warmup_steps="10000" \
     --overwrite_output_dir \
     --num_train_epochs="1000" \
     --preprocessing_num_workers="64" \
     --auth_token="True" \
     --static_learning_rate="True" \
+    --dtype="bfloat16" \
     --push_to_hub