FlowProt / model /scripts /train_classifier_mn5.slurm
alibtsd's picture
Deploy FlowProt Docker Space
f34af6f verified
Raw
History Blame Contribute Delete
705 Bytes
#!/bin/bash
#SBATCH --job-name=se3clf-train
#SBATCH --output=logs/%x-%j.out
#SBATCH --error=logs/%x-%j.err
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --gres=gpu:4
#SBATCH --cpus-per-task=16
#SBATCH --time=24:00:00
#SBATCH --qos=gp_debug
set -euo pipefail
# --- Adjust these to your MN5 environment ---
# module purge
# module load <python-module>
# module load <cuda-module>
# source /path/to/venv/bin/activate
cd "${SLURM_SUBMIT_DIR}"
mkdir -p logs wandb
export WANDB_MODE=offline
export WANDB_DIR="${SLURM_SUBMIT_DIR}/wandb"
srun python train_classifier.py \
experiment.num_devices=4 \
experiment.trainer.strategy=ddp \
experiment.wandb.offline=true \
experiment.wandb.dir="${WANDB_DIR}"