#!/bin/bash # Optimized Single GPU AMP Flow Matching Training Launch Script with FULL DATA # This script launches optimized training on GPU 3 using ALL available data # Features: Mixed precision (BF16), increased batch size, H100 optimizations echo "=== Launching Optimized Single GPU AMP Flow Matching Training with FULL DATA ===" echo "Using GPU 3 for training (other GPUs are busy)" echo "Using ALL available peptide embeddings and UniProt data" echo "OVERNIGHT TRAINING: 15000 iterations with CFG support and H100 optimizations" echo "" # Check if required files exist echo "Checking required files..." if [ ! -f "final_compressor_model.pth" ]; then echo "❌ Missing final_compressor_model.pth" echo "Please run compressor_with_embeddings.py first" exit 1 fi if [ ! -f "final_decompressor_model.pth" ]; then echo "❌ Missing final_decompressor_model.pth" echo "Please run compressor_with_embeddings.py first" exit 1 fi if [ ! -d "/data2/edwardsun/flow_project/peptide_embeddings/" ]; then echo "❌ Missing /data2/edwardsun/flow_project/peptide_embeddings/ directory" echo "Please run final_sequence_encoder.py first" exit 1 fi # Check for full data files if [ ! -f "/data2/edwardsun/flow_project/peptide_embeddings/all_peptide_embeddings.pt" ]; then echo "⚠️ Warning: all_peptide_embeddings.pt not found" echo "Will use individual embedding files instead" else echo "✓ Found all_peptide_embeddings.pt (4.3GB - ALL peptide data)" fi if [ ! -f "/data2/edwardsun/flow_project/test_uniprot_processed/uniprot_processed_data.json" ]; then echo "❌ Missing /data2/edwardsun/flow_project/test_uniprot_processed/uniprot_processed_data.json" echo "This contains ALL UniProt data for CFG training" exit 1 else echo "✓ Found uniprot_processed_data.json (3.4GB - ALL UniProt data)" fi echo "✓ All required files found!" echo "" # Set CUDA device to GPU 3 export CUDA_VISIBLE_DEVICES=3 # Enable H100 optimizations export TORCH_CUDNN_V8_API_ENABLED=1 export TORCH_CUDNN_V8_API_DISABLED=0 echo "=== Optimized Training Configuration ===" echo " - GPU: 3 (CUDA_VISIBLE_DEVICES=3)" echo " - Batch size: 96 (optimized based on profiling)" echo " - Total iterations: 6,000" echo " - Mixed precision: BF16 (H100 optimized)" echo " - Learning rate: 4e-4 -> 2e-4 (cosine annealing)" echo " - Warmup steps: 5,000" echo " - Gradient clipping: 1.0" echo " - Weight decay: 0.01" echo " - Data workers: 16" echo " - CFG dropout: 15%" echo " - Validation: Every 10,000 steps" echo " - Checkpoints: Every 1,000 epochs" echo " - Estimated time: ~8-10 hours (overnight training)" echo "" # Check GPU memory and capabilities echo "Checking GPU capabilities..." nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader,nounits | while IFS=, read -r name total free; do echo " GPU: $name" echo " Total memory: ${total}MB" echo " Free memory: ${free}MB" echo " Available: $((free * 100 / total))%" done echo "" # Launch optimized training echo "Starting optimized single GPU training on GPU 3 with FULL DATA..." echo "" # Launch training with optional wandb logging # Uncomment the following line if you want to use wandb logging: # python amp_flow_training_single_gpu_full_data.py --use_wandb # Standard training without wandb python amp_flow_training_single_gpu_full_data.py echo "" echo "=== Optimized Overnight Training Complete with FULL DATA ===" echo "Check for output files:" echo " - amp_flow_model_best_optimized.pth (best validation model)" echo " - amp_flow_model_final_optimized.pth (final model)" echo " - amp_flow_checkpoint_optimized_step_*.pth (checkpoints every 1000 epochs)" echo "" echo "Training optimizations applied:" echo " ✓ Mixed precision (BF16) for ~30-50% speedup" echo " ✓ Increased batch size (128) for better H100 utilization" echo " ✓ Optimized learning rate schedule with proper warmup" echo " ✓ Gradient clipping for training stability" echo " ✓ CFG dropout for better guidance" echo " ✓ Validation monitoring and early stopping" echo " ✓ PyTorch 2.x compilation for speedup" echo "" echo "Next steps:" echo "1. Test the optimized model: python generate_amps.py" echo "2. Compare performance with previous model" echo "3. Implement reflow for 1-step generation" echo "4. Add conditioning for toxicity" echo "5. Fine-tune on specific AMP properties"