supanthadey1's picture
Add BERTose and AFFINose training code release
1d6f391 verified
Raw
History Blame Contribute Delete
1.58 kB
#!/bin/bash
#SBATCH --job-name=v6_infonce
#SBATCH --partition=nova
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=8
#SBATCH --gres=gpu:1
#SBATCH --mem=369G
#SBATCH --time=72:00:00
#SBATCH --output=bert_v6_contrastive/cluster_scripts/train_v6_%j.out
#SBATCH --error=bert_v6_contrastive/cluster_scripts/train_v6_%j.err
echo "=========================================="
echo "V6 Curriculum Contrastive Training - FIXED with InfoNCE"
echo "=========================================="
echo "Job ID: $SLURM_JOB_ID"
echo "Node: $(hostname)"
echo "GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader)"
echo "Date: $(date)"
echo "=========================================="
# Activate conda
source /work/ratul1/supantha/miniconda3/etc/profile.d/conda.sh
conda activate glycanml
cd /work/ratul1/supantha/glycan-SD-VS/bert_training_v3/v3.1_cluster_training
# Run training with InfoNCE loss (NEVER goes to 0)
python bert_v6_contrastive/training/contrastive_trainer_v6_curriculum.py \
--positives bert_v5.1_contrastive/data/fully_resolved_161k.pkl \
--negatives bert_v6_contrastive/data/negatives_scored.pkl \
--checkpoint checkpoints_v5_bpe_topo/best_v5_bpe_topo_model.pt \
--output_dir checkpoints_v6 \
--epochs 30 \
--batch_size 128 \
--lr 2e-5 \
--mlm_weight 0.70 \
--cont_weight 0.30 \
--temperature 0.07 \
--easy_epochs 5 \
--medium_epochs 10 \
--hard_epochs 15
echo "=========================================="
echo "Training Complete!"
echo "Date: $(date)"
echo "=========================================="