| nohup python /workspace/nanomind/train.py --data_path /workspace/nanomind_data/pretrain_1m.jsonl.gz --out_dir /workspace/nanomind_runs/run1 --tokenizer_name hf-internal-testing/llama-tokenizer --seq_len 2048 --hidden_size 512 --n_layers 16 --n_heads 8 --n_kv_heads 1 --global_batch_size 64 --micro_batch_size 1 --lr 1e-3 --warmup_steps 2000 --max_steps 50000 --save_every 1000 --bf16 > /workspace/nanomind_runs/run1/train.log 2>&1 | |