File size: 4,243 Bytes
8cd1f2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
#!/bin/bash -ex
source ./scripts/env.sh
run_no_adaptation() {
local model=$1
local dataset=$2
local precision="bf16-true"
# Predictions directories and lists
local model_dir="$CHECKPOINTS_DIR/${model2checkpoint[$model]}"
local data_path=outputs/prompts/$model/$dataset/all.jsonl
local test_list="test_${dataset2testsize[$dataset]}"
local output_dir="outputs/no_adaptation/$model/$dataset/size=all/seed=all/test=$dataset/list=$test_list"
if [ ! -f $output_dir/logits.csv ]; then
mkdir -p $output_dir
python -m llmcal.scripts.run_posteriors \
--base_checkpoint_dir $model_dir \
--checkpoint_dir $model_dir \
--data_path $data_path \
--output_dir $output_dir \
--prediction_lists lists/$dataset/$test_list.txt \
--precision $precision \
--devices 1 \
--num_nodes 1 \
--batch_size 1 \
--max_seq_length $max_seq_length
fi
}
run_affine_calibration() {
local model=$1
local dataset=$2
local size=$3
local num_seed=$4
local method=$5
local precision="bf16-true"
local seed=$((base_seed+num_seed))
local learning_rate=1e-3
local tolerance=1e-5
local max_ls=40
local model_dir="$CHECKPOINTS_DIR/${model2checkpoint[$model]}"
local data_path="outputs/prompts/$model/$dataset/all.jsonl"
local test_list="test_${dataset2testsize[$dataset]}"
local train_list="0.0-1.0"
# Predictions directories and lists
local prediction_dir="outputs/no_adaptation/$model/$dataset/size=$size/seed=$num_seed/test=$dataset/list=$train_list"
if [ ! -f $prediction_dir/logits.csv ]; then
mkdir -p $prediction_dir
python -m llmcal.scripts.run_posteriors \
--base_checkpoint_dir $model_dir \
--checkpoint_dir $model_dir \
--data_path $data_path \
--output_dir $prediction_dir \
--prediction_lists lists/$dataset/size=$size/seed=$num_seed/$train_list.txt \
--precision $precision \
--devices 1 \
--num_nodes 1 \
--batch_size 1 \
--max_seq_length $max_seq_length
fi
# Calibration directories
cal_dir="outputs/calibration/$model/$dataset/size=$size/seed=$num_seed/$method/$train_list/$train_list"
if [ ! -f "$cal_dir/test=$dataset/list=$test_list/logits.csv" ]; then
mkdir -p $cal_dir/test=$dataset/list=$test_list $cal_dir/logs
python -m llmcal.scripts.affine_calibration \
--output_dir $cal_dir/test=$dataset/list=$test_list \
--log_dir $cal_dir/logs \
--checkpoint_dir $cal_dir \
--train_logits $prediction_dir/logits.csv \
--train_labels $prediction_dir/labels.csv \
--predict_logits "outputs/no_adaptation/$model/$dataset/size=all/seed=all/test=$dataset/list=$test_list/logits.csv" \
--predict_labels "outputs/no_adaptation/$model/$dataset/size=all/seed=all/test=$dataset/list=$test_list/labels.csv" \
--method $method \
--learning_rate $learning_rate \
--tolerance $tolerance \
--max_ls $max_ls \
--seed $seed
fi
}
# 1: model
# 2: sizes
# 3: val_check_interval
run_cal_vs_samples() {
local model=$1
for size in ${FACTORS[@]}; do
for dataset in "${DATASETS[@]}"; do
local test_list="test_${dataset2testsize[$dataset]}"
local num_seeds=${dataset2nseeds[$dataset]}
for num_seed in $(seq 0 $(($num_seeds - 1))); do
# Run baseline
run_no_adaptation $model $dataset
# Run DP Calibration
run_affine_calibration $model $dataset $size $num_seed "dp_calibration"
# Run Temp Scaling
run_affine_calibration $model $dataset $size $num_seed "temp_scaling"
# Run Vector scaling
run_affine_calibration $model $dataset $size $num_seed "vector_scaling"
# Run Bias shift
run_affine_calibration $model $dataset $size $num_seed "bias_shift"
done
done
done
}
run_cal_vs_samples $model 16
|