| export NCCL_TIMEOUT=1800000 | |
| export TORCH_NCCL_BLOCKING_WAIT=0 | |
| export NCCL_ASYNC_ERROR_HANDLING=1 | |
| /workspace/miniconda3/envs/dflash/bin/python3 -m torch.distributed.run --standalone --nproc_per_node 8 /workspace/hanrui/syxin_old/eval_dflash_lora_inject.py \ | |
| --ckpt epoch_3_step_4644 \ | |
| --datasets gsm8k humaneval mt-bench | |