pere commited on
Commit
e4b4f73
·
1 Parent(s): f2d1865

Saving weights and logs of step 10000

Browse files
events.out.tfevents.1641896043.t1v-n-e1a08808-w-0.1346783.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bee8663b0eb9cbb10e319fd1ca5b113af27cf8ae7cdcb5208afa84ac274bbf60
3
+ size 1470136
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e74c89bc95480c41642de0c02ce549eeb7d4d96ee7d88aad54c0625a29a551df
3
  size 498796983
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11958977b1f537c3ffe3d18da06f5d2db044284ebe998c2cb7a15698a9f6ff52
3
  size 498796983
run_step1.sh CHANGED
@@ -8,9 +8,9 @@
8
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
9
  --max_seq_length="128" \
10
  --weight_decay="0.01" \
11
- --per_device_train_batch_size="192" \
12
- --per_device_eval_batch_size="192" \
13
- --learning_rate="4e-4" \
14
  --warmup_steps="10000" \
15
  --overwrite_output_dir \
16
  --num_train_epochs="1000" \
 
8
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
9
  --max_seq_length="128" \
10
  --weight_decay="0.01" \
11
+ --per_device_train_batch_size="200" \
12
+ --per_device_eval_batch_size="200" \
13
+ --learning_rate="3e-4" \
14
  --warmup_steps="10000" \
15
  --overwrite_output_dir \
16
  --num_train_epochs="1000" \