| export HF_DATASETS_CACHE="/workspace/.cache/huggingface/datasets" | |
| python run_mlm.py \ | |
| --model_name_or_path Bingsu/my_mobilebert_untrained \ | |
| --train_file train.txt \ | |
| --validation_file val.txt \ | |
| --per_device_train_batch_size 16 \ | |
| --per_device_eval_batch_size 16 \ | |
| --gradient_accumulation_steps 2 \ | |
| --max_steps 1000000 \ | |
| --lr_scheduler_type cosine_with_restarts \ | |
| --warmup_ratio 0.05 \ | |
| --save_steps 5000 \ | |
| --save_total_limit 5 \ | |
| --logging_steps 5000 \ | |
| --fp16 \ | |
| --optim adamw_torch \ | |
| --do_train \ | |
| --do_eval \ | |
| --push_to_hub \ | |
| --hub_strategy checkpoint \ | |
| --output_dir mobilebert_ko \ | |
| --overwrite_output_dir |