| #!/bin/bash | |
| #SBATCH -J wm_test | |
| #SBATCH -A coc | |
| #SBATCH -p coe-gpu | |
| #SBATCH --qos=coe-ice | |
| #SBATCH --gres=gpu:H100:1 | |
| #SBATCH --mem-per-gpu=32G | |
| #SBATCH --cpus-per-gpu=8 | |
| #SBATCH -t 00:15:00 | |
| #SBATCH -o logs/test_run_%j.log | |
| #SBATCH -e logs/test_run_%j.err | |
| cd /storage/ice-shared/ae8803che/hxue/data/world_model | |
| source /storage/ice-shared/ae8803che/hxue/data/wm/bin/activate | |
| # Setup cache directories | |
| export WANDB_CACHE_DIR=/storage/ice-shared/ae8803che/hxue/data/.wandb_cache | |
| export TORCH_HOME=/storage/ice-shared/ae8803che/hxue/data/.torch_cache | |
| export HF_HOME=/storage/ice-shared/ae8803che/hxue/data/.hf_cache | |
| mkdir -p $WANDB_CACHE_DIR $TORCH_HOME $HF_HOME | |
| export PYTHONPATH=$PYTHONPATH:$(pwd) | |
| echo "Starting test run..." | |
| # Run for 20 steps to trigger eval_freq=10 (for testing) | |
| python -m wm.trainer.train_dynamics \ | |
| --config wm/config/fulltraj_dit/lang_table.yaml \ | |
| --resume | |