#!/bin/bash #SBATCH --job-name=refit-seed-array #SBATCH --partition=dgx-b200 #SBATCH --gpus=1 #SBATCH --cpus-per-task=10 #SBATCH --mem=100G #SBATCH --time=12:00:00 #SBATCH --output=%x_%A_%a.out #SBATCH --array=0-4 # 5 seeds → indices 0..4 HOME_LOC=~/ SCRIPT_LOC=$HOME_LOC/PeptiVerse/training_classifiers DATA_LOC=$HOME_LOC/PeptiVerse/training_data_cleaned # ── Configure per submission ────────────────────────────────────────── OBJECTIVE='permeability_pampa' # nf / solubility / hemolysis / permeability_penetrance/ permeability_pampa / permeability_caco2 WT='chemberta' # wt / smiles / chemberta / peptideclm MODEL_TYPE='mlp' # mlp / cnn / transformer DATA_FILE="hemo_${WT}_with_embeddings_unpooled" # nf / sol/ hemo / perm / pampa/ caco2 # Points to the directory where Optuna already saved best_model.pt BASE_OUT_DIR="${SCRIPT_LOC}/${OBJECTIVE}/${MODEL_TYPE}_${WT}" DATASET_PATH="${DATA_LOC}/permeability_${WT}/${DATA_FILE}" # ──────────────────────────────────────────────────────────────────────────── SEEDS=(1986 42 0 123 12345) SEED=${SEEDS[$SLURM_ARRAY_TASK_ID]} LOG_LOC=$SCRIPT_LOC/src_bash/logs mkdir -p $LOG_LOC DATE=$(date +%m_%d) cd $SCRIPT_LOC echo "Running seed=$SEED model=$MODEL_TYPE objective=$OBJECTIVE wt=$WT" START_TIME=$(date +%s%N) python -u refit_nn_seed.py \ --dataset_path "${DATASET_PATH}" \ --base_out_dir "${BASE_OUT_DIR}" \ --model "${MODEL_TYPE}" \ --seed "${SEED}" \ > "${LOG_LOC}/${DATE}_refit_${MODEL_TYPE}_${OBJECTIVE}_${WT}_seed${SEED}.log" 2>&1 END_TIME=$(date +%s%N) ELAPSED_S=$(( (END_TIME - START_TIME) / 1000000000 )) echo "Seed $SEED done at $(date) — wall clock: ${ELAPSED_S}s" echo "{\"model\": \"${MODEL_TYPE}\", \"objective\": \"${OBJECTIVE}\", \"wt\": \"${WT}\", \"seed\": ${SEED}, \"wall_s\": ${ELAPSED_S}}" \ >> "${LOG_LOC}/${DATE}_wall_clock_refit.jsonl"