| CUDA_VISIBLE_DEVICES=0 python demo.py |
|
|
| PYTHONPATH=$(pwd)/VeOmni:$PYTHONPATH sh train.sh tasks/train_llada2_bd.py configs/sft/llada2_mini_bd_sft.yaml |
| |
| PYTHONPATH=$(pwd)/VeOmni:$PYTHONPATH sh train.sh tasks/train_llada2_bd_semi2.py configs/sft/llada2_mini_bd_sft_new.yaml |
|
|
| PYTHONPATH=$(pwd)/VeOmni:$PYTHONPATH sh train.sh tasks/train_llada2_bd_hybrid.py configs/sft/llada2_mini_bd_sft_new.yaml |
| |
| sft2 batchsize=8 |
| sft3 batchsize=32 |
| sft4 batchsize=8 |
| |
| python scripts/moe_convertor.py \ |
| --input-path /scratch/e0973935/model_weights/local_LLaDA2.1-mini \ |
| --output-path /scratch/e0973935/model_weights/local_LLaDA2.1-mini-merge \ |
| --mode merge |
|
|
| python scripts/moe_convertor.py \ |
| --input-path /scratch/e0973935/model_weights/llada2.0_mini_sft_27 \ |
| --output-path /scratch/e0973935/model_weights/local_LLaDA2.0-mini-merge-cust \ |
| --mode merge |
| |
| python scripts/moe_convertor.py \ |
| --input-path /scratch/e0973935/dFactory/llada2_mini_bd_sft_outputs_mathabla/checkpoints/global_step_179430/hf_ckpt \ |
| --output-path /scratch/e0973935/model_weights/llada2.0_mini_abla \ |
| --mode split |
| |
| qsub -I \ |
| -P CFP03-SF-102 \ |
| -l select=1:ngpus=2 \ |
| -l walltime=1:40:00 |
| |
| |
| |
| outputs3 online 0.6-1.0 lr=1e-6 bsz=8 |
| outputs4 online 0.6-1.0 lr=1e-5 bsz=64 |
| outputs5 online 0.4-0.8 lr=1e-6 bsz=8 allmath |
| outputs6 online 0.4-0.8 lr=1e-6 bsz=8 allmath onpolicyremask |
| outputs7 online 0.6-0.8 lr=1e-6 bsz=8 allmath |
| outputs8 online 0.6-0.8 lr=2e-6 bsz=8 allmath |
| outputs9 online 0.3-0.8 lr=1e-6 bsz=8 allmath ar-mask |
| outputs10 online 0.0-1.0 lr=1e-6 bsz=8 allmath ar-mask |
| outputs11 online 0.6-0.8 lr=1e-6 bsz=8 allmath+ |
| outputs12 online 0.6-0.8 lr=5e-7 bsz=8 allmath+ |
| outputs13 online 0.6-0.8 lr=1e-6 bsz=8 allmath+ block=64 |
| outputs14 online 0.6-0.8 lr=2e-6 bsz=8 allmath+ |
| outputs16 online 0.3-0.8 lr=1e-6 bsz=8 allmath+ ar-mask-8 label-mask |
| outputs17 online 0.3-0.5 lr=1e-6 bsz=8 allmath ar-mask |
| outputs18 online 0.6-0.8 lr=4e-6 bsz=8 allmath+ |
| outputs19 online 0.6-0.8 lr=1e-5 bsz=8 allmath+ |
| outputs20 online 0.6-0.8 lr=4e-6 bsz=8 allmath+ blockrand |
| outputs21 online 0.7-0.7 lr=4e-6 bsz=8 allmath+ |
| |
| outputs23 online 0.3-0.8 lr=2e-6 bsz=8 allmath+ ar-mask |
| outputs24 online 0.3-0.8 lr=2e-6 bsz=8 allmath+ gumblemask |
| outputs25 online 0.6-0.8 lr=2e-6 bsz=8 allmath+ gumblemask |
| |
| outputs26 online 0.6-0.8 lr=2e-6 bsz=8 allmath++ |
| outputs27 online 0.75 lr=2e-6 bsz=8 allmath++ |
| outputs28 online 0.6-0.8 lr=2e-6 bsz=8 allmath++ label-mask |
| outputs29 online 0.75 lr=2e-6 bsz=8 allmath++ gumblemask thresh=0.5 |
| outputs30 online 0.75 lr=2e-6 bsz=8 allmath++ gumblemask thresh=0.3 |
| outputs31 online 0.5-0.8 lr=2e-6 bsz=8 allmath++ gumblemask thresh=0.3 |
| outputs32 online 0.75 lr=2e-6 bsz=8 allmath+ rkd |
| outputs33 online 0.6-1.0 lr=2e-6 bsz=8 allmath+ rkd |
| outputs34 online 0.75 lr=2e-6 bsz=8 allmath+ rkd w0.25 |
| outputs36 online 0.75 lr=2e-6 bsz=8 allmath+ ar-attention |
| outputs37 online 0.75 lr=2e-6 bsz=8 allmath+ ar-attention-no-uni |
| |
| outputs38 online 0.75 lr=2e-6 bsz=8 allmath+ cont k=3 |
| outputs39 online 0.6-0.8 lr=2e-6 bsz=8 allmath+ cont k=3 |
| outputs40 online 0.75 lr=2e-6 bsz=8 allmath+ cont k=1 |
| outputs41 online 0.6-1.0 lr=2e-6 bsz=8 allmath+ cont k=1 |
| outputs42 online 0.75 lr=2e-6 bsz=8 allcode+ |
| |
| outputs43 online 0.75 lr=2e-6 bsz=8 allmath+ cont-norm k=1 |
| outputs44 online 0.6-1.0 lr=2e-6 bsz=8 allmath+ cont-norm k=1 |
| outputs45 online 0.75 lr=2e-6 bsz=8 allmath+ cont-norm k=3 |
| outputs47 online 0.75 lr=2e-6 bsz=8 allmath+ cont-norm nomask k=3 |
| |
| outputs48 online 0.6-0.9 lr=2e-6 bsz=8 allmath+ |
| outputs49 online 0.7-0.9 lr=2e-6 bsz=8 allcode+- |
| outputs50 online 0.75 lr=2e-6 bsz=8 allcode+- |
| outputs51 online 0.6-0.8 lr=2e-6 bsz=8 allcode+ |
| outputs52 online 0.75 lr=2e-6 bsz=8 allmath++ 27+epoch2 |
| |
| |
| outputs61 online 0.8 lr=2e-6 bsz=4 codefinal epoch=1 |
| |
| export PYTHONPATH="/scratch/e0973935/dInfer/python:${PYTHONPATH}" |
| python -c "import dinfer; print(dinfer.__file__)" |
| |
| |
| amgr login |
| |
| hpc project |
| |
| CUDA_VISIBLE_DEVICES=0,1,2,3 python load.py |
| |
| deepspeed --include localhost:0 train_compress_ed2.py |
| |
| deepspeed --num_nodes=1 --num_gpus=8 train_compress3.py |
|
|
|
|
| MAX_JOBS=4 pip install flash-attn --no-build-isolation |
| |
| MAX_JOBS=64 pip install flash_attn==2.8.3 --no-build-isolation |
| |
| pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.8cxx11abiTRUE-cp310-cp310-linux_x86_64.whl |
|
|
| scp -r /home/svu/e0973935/CompThinker /scratch/e0973935 |
|
|
| scp -r /scratch/e0973935/model_weights/custom_Qwen3-1.7B /scratch/e0950166 |
|
|
| scp -r /Users/yuruonan/Downloads/VITON_traindata/* yuruonan@deep40:/scratch/e0973935/model_weights/custom_Qwen3-1.7B |
| |
| scp -r e0973935@hopper.nus.edu.sg:/scratch/e0973935/model_weights/custom_Qwen3-1.7B /Users/zigeng/Downloads/nips26/models |
| |
| /Project_Storage/CFP-03/CFP03-SF-102 |
|
|
| scp -r /scratch/e0973935/model_weights/llada2.0_mini_sft_70 /Project_Storage/CFP-03/CFP03-SF-102 |
| |
| scp -r /Project_Storage/CFP-03/CFP03-SF-102/llada2.0_mini_sft_70_5 /scratch/e0973935/model_weights/ |