| #SBATCH --job-name=v6_infonce | |
| #SBATCH --partition=nova | |
| #SBATCH --nodes=1 | |
| #SBATCH --ntasks=1 | |
| #SBATCH --cpus-per-task=8 | |
| #SBATCH --gres=gpu:1 | |
| #SBATCH --mem=369G | |
| #SBATCH --time=72:00:00 | |
| #SBATCH --output=bert_v6_contrastive/cluster_scripts/train_v6_%j.out | |
| #SBATCH --error=bert_v6_contrastive/cluster_scripts/train_v6_%j.err | |
| echo "==========================================" | |
| echo "V6 Curriculum Contrastive Training - FIXED with InfoNCE" | |
| echo "==========================================" | |
| echo "Job ID: $SLURM_JOB_ID" | |
| echo "Node: $(hostname)" | |
| echo "GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader)" | |
| echo "Date: $(date)" | |
| echo "==========================================" | |
| # Activate conda | |
| source /work/ratul1/supantha/miniconda3/etc/profile.d/conda.sh | |
| conda activate glycanml | |
| cd /work/ratul1/supantha/glycan-SD-VS/bert_training_v3/v3.1_cluster_training | |
| # Run training with InfoNCE loss (NEVER goes to 0) | |
| python bert_v6_contrastive/training/contrastive_trainer_v6_curriculum.py \ | |
| --positives bert_v5.1_contrastive/data/fully_resolved_161k.pkl \ | |
| --negatives bert_v6_contrastive/data/negatives_scored.pkl \ | |
| --checkpoint checkpoints_v5_bpe_topo/best_v5_bpe_topo_model.pt \ | |
| --output_dir checkpoints_v6 \ | |
| --epochs 30 \ | |
| --batch_size 128 \ | |
| --lr 2e-5 \ | |
| --mlm_weight 0.70 \ | |
| --cont_weight 0.30 \ | |
| --temperature 0.07 \ | |
| --easy_epochs 5 \ | |
| --medium_epochs 10 \ | |
| --hard_epochs 15 | |
| echo "==========================================" | |
| echo "Training Complete!" | |
| echo "Date: $(date)" | |
| echo "==========================================" | |