pere commited on
Commit
7e46c0b
·
1 Parent(s): 0a1e516

another try bloat16

Browse files
config.json CHANGED
@@ -20,7 +20,7 @@
20
  "num_hidden_layers": 12,
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
- "torch_dtype": "float32",
24
  "transformers_version": "4.14.0.dev0",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
 
20
  "num_hidden_layers": 12,
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
+ "torch_dtype": "bfloat16",
24
  "transformers_version": "4.14.0.dev0",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
events.out.tfevents.1640253419.t1v-n-6f5efcd5-w-0.1851551.0.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee54b659c96fbadcf61cfe022890c77db861bba66c0ff84b873c329297a24f14
3
- size 40
 
 
 
 
run_step1.sh CHANGED
@@ -9,7 +9,7 @@
9
  --weight_decay="0.01" \
10
  --per_device_train_batch_size="256" \
11
  --per_device_eval_batch_size="256" \
12
- --learning_rate="4e-4" \
13
  --warmup_steps="10000" \
14
  --overwrite_output_dir \
15
  --num_train_epochs="1000" \
@@ -22,4 +22,5 @@
22
  --preprocessing_num_workers="64" \
23
  --auth_token="True" \
24
  --static_learning_rate="True" \
 
25
  --push_to_hub
 
9
  --weight_decay="0.01" \
10
  --per_device_train_batch_size="256" \
11
  --per_device_eval_batch_size="256" \
12
+ --learning_rate=4e-4" \
13
  --warmup_steps="10000" \
14
  --overwrite_output_dir \
15
  --num_train_epochs="1000" \
 
22
  --preprocessing_num_workers="64" \
23
  --auth_token="True" \
24
  --static_learning_rate="True" \
25
+ --dtype="bfloat16" \
26
  --push_to_hub