| python3 run_dnlm_flax.py \ | |
| --output_dir rotobart_output \ | |
| --overwrite_output_dir \ | |
| --dataset_path NbAiLab/NCC2 \ | |
| --model_name_or_path rotobart \ | |
| --tokenizer_name vocab-2/the_pile.model \ | |
| --shuffle_buffer_size 100_000 \ | |
| --do_train --do_eval \ | |
| --max_seq_length 1024 \ | |
| --encoder_layers 12 \ | |
| --decoder_layers 12 \ | |
| --per_device_train_batch_size 1 \ | |
| --per_device_eval_batch_size 1 \ | |
| --logging_steps 8 \ | |
| --num_train_steps 1000 \ | |
| --eval_steps 100 \ | |
| --save_steps 100 \ | |
| --num_eval_samples 200 \ | |
| --warmup_steps 30 \ | |
| --learning_rate 1e-4 \ | |
| --auth_token True \ | |
| --use_bf16 \ | |