#!/bin/bash
#SBATCH --job-name=emb_deep_dive
#SBATCH --partition=nova
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=8
#SBATCH --gres=gpu:1
#SBATCH --mem=64G
#SBATCH --time=02:00:00
#SBATCH --output=bert_v6_contrastive/scripts/emb_analysis_%j.out
#SBATCH --error=bert_v6_contrastive/scripts/emb_analysis_%j.err
#SBATCH --mail-type=END,FAIL

echo "=== Embedding Deep Dive ==="
echo "Job: $SLURM_JOB_ID | Node: $(hostname) | $(date)"
nvidia-smi --query-gpu=name --format=csv,noheader

source activate glycan_bert 2>/dev/null || conda activate glycan_bert 2>/dev/null || true
cd /work/ratul1/supantha/glycan-SD-VS/bert_training_v3/v3.1_cluster_training
pip install umap-learn scikit-learn 2>/dev/null || true
mkdir -p bert_v6_contrastive/analysis

echo -e "\n=== Step 1: Extract V6 ==="
python3 bert_v6_contrastive/scripts/extract_embeddings.py \
    --checkpoint checkpoints_v6/phase_3_hard_checkpoint.pt --name v6

echo -e "\n=== Step 2: Extract V5 ==="
python3 bert_v6_contrastive/scripts/extract_embeddings.py \
    --checkpoint checkpoints_v5_bpe_topo/best_v5_bpe_topo_model.pt --name v5

echo -e "\n=== Step 3: Analyze V6 ==="
python3 bert_v6_contrastive/scripts/analyze_embeddings.py \
    --input bert_v6_contrastive/analysis/embeddings_v6.npz --name v6

echo -e "\n=== Step 4: Analyze V5 ==="
python3 bert_v6_contrastive/scripts/analyze_embeddings.py \
    --input bert_v6_contrastive/analysis/embeddings_v5.npz --name v5

echo -e "\n=== Step 5: V5 vs V6 Comparison ==="
python3 bert_v6_contrastive/scripts/analyze_embeddings.py \
    --input bert_v6_contrastive/analysis/embeddings_v6.npz --name v6 --compare

echo -e "\n=== Results ==="
ls -la bert_v6_contrastive/analysis/
echo "Done! $(date)"