#!/bin/bash #SBATCH --job-name=v6_infonce #SBATCH --partition=nova #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=8 #SBATCH --gres=gpu:1 #SBATCH --mem=369G #SBATCH --time=72:00:00 #SBATCH --output=bert_v6_contrastive/cluster_scripts/train_v6_%j.out #SBATCH --error=bert_v6_contrastive/cluster_scripts/train_v6_%j.err echo "==========================================" echo "V6 Curriculum Contrastive Training - FIXED with InfoNCE" echo "==========================================" echo "Job ID: $SLURM_JOB_ID" echo "Node: $(hostname)" echo "GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader)" echo "Date: $(date)" echo "==========================================" # Activate conda source /work/ratul1/supantha/miniconda3/etc/profile.d/conda.sh conda activate glycanml cd /work/ratul1/supantha/glycan-SD-VS/bert_training_v3/v3.1_cluster_training # Run training with InfoNCE loss (NEVER goes to 0) python bert_v6_contrastive/training/contrastive_trainer_v6_curriculum.py \ --positives bert_v5.1_contrastive/data/fully_resolved_161k.pkl \ --negatives bert_v6_contrastive/data/negatives_scored.pkl \ --checkpoint checkpoints_v5_bpe_topo/best_v5_bpe_topo_model.pt \ --output_dir checkpoints_v6 \ --epochs 30 \ --batch_size 128 \ --lr 2e-5 \ --mlm_weight 0.70 \ --cont_weight 0.30 \ --temperature 0.07 \ --easy_epochs 5 \ --medium_epochs 10 \ --hard_epochs 15 echo "==========================================" echo "Training Complete!" echo "Date: $(date)" echo "=========================================="