pere commited on
Commit
682b4fd
·
1 Parent(s): 7f2a9ca
Files changed (1) hide show
  1. run_step1.sh +7 -6
run_step1.sh CHANGED
@@ -3,16 +3,17 @@
3
  --model_type="roberta" \
4
  --config_name="./" \
5
  --tokenizer_name="./" \
6
- --dataset_name="NbAiLab/nbailab_extended" \
 
7
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
8
  --max_seq_length="128" \
9
  --weight_decay="0.01" \
10
- --per_device_train_batch_size="256" \
11
- --per_device_eval_batch_size="256" \
12
- --learning_rate="6e-4" \
13
  --warmup_steps="10000" \
14
  --overwrite_output_dir \
15
- --num_train_epochs="1000" \
16
  --adam_beta1="0.9" \
17
  --adam_beta2="0.98" \
18
  --logging_steps="10000" \
@@ -21,6 +22,6 @@
21
  --preprocessing_num_workers="64" \
22
  --auth_token="True" \
23
  --static_learning_rate="True" \
24
- --dtype="bloat16" \
25
  --adafactor \
26
  --push_to_hub
 
3
  --model_type="roberta" \
4
  --config_name="./" \
5
  --tokenizer_name="./" \
6
+ --train_file /mnt/disks/flaxdisk/corpus/train_1_4.json \
7
+ --validation_file /mnt/disks/flaxdisk/corpus/validation.json \
8
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
9
  --max_seq_length="128" \
10
  --weight_decay="0.01" \
11
+ --per_device_train_batch_size="200" \
12
+ --per_device_eval_batch_size="200" \
13
+ --learning_rate="4e-4" \
14
  --warmup_steps="10000" \
15
  --overwrite_output_dir \
16
+ --num_train_epochs="2" \
17
  --adam_beta1="0.9" \
18
  --adam_beta2="0.98" \
19
  --logging_steps="10000" \
 
22
  --preprocessing_num_workers="64" \
23
  --auth_token="True" \
24
  --static_learning_rate="True" \
25
+ --dtype="bfloat16" \
26
  --adafactor \
27
  --push_to_hub