File size: 4,730 Bytes
8cd1f2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/bin/bash -ex

source ./scripts/env.sh
declare -a models=(llama3.2-1b-instruct qwen2.5-7b-instruct)
declare -a DATASETS=(sst2 agnews dbpedia 20newsgroups banking77)
metrics=(nce ner)
overwrite=true

for model in "${models[@]}"; do

    # for metric in "${metrics[@]}"; do

    #     # Compute results:
    #     results_path=outputs/results_paper/$model/$metric.jsonl
    #     if [ -f $results_path ] && [ $overwrite = false ]; then
    #         echo "Results already computed. Skipping."
    #     else
    #         mkdir -p $(dirname $results_path)
    #         python -m llmcal.scripts.compute_matched_results \
    #             --metric $metric \
    #             --finetuning_root_results_dirs outputs/finetune_lora/$model/ \
    #             --output_path $results_path \
    #             --reduced \
    #             --no_adaptation_root_results_dirs outputs/no_adaptation/$model \
    #             --lora_plus_cal_root_results_dirs "outputs/lora_plus_dpcal/$model,outputs/lora_plus_tempscaling/$model,outputs/lora_plus_biasshift/$model,outputs/lora_plus_vectorscaling/$model" \
    #             --lora_plus_cal_naive_root_results_dirs "outputs/lora_plus_dpcal_naive/$model,outputs/lora_plus_tempscaling_naive/$model" \
    #             --trainontest_root_results_dirs outputs/lora_plus_dpcal_trainontest/$model,outputs/lora_plus_tempscaling_trainontest/$model \
    #             --cal_root_results_dirs outputs/calibration/$model
    #     fi
    # done

    # samples_plots_path="outputs/results_paper/$model/metric_vs_samples/combined_performance_$model.png"
    # mkdir -p $(dirname $samples_plots_path)
    # python -m llmcal.scripts.results_vs_samples \
    #     --datasets "${DATASETS[*]}" \
    #     --metrics "${metrics[*]}" \
    #     --sizes "${FACTORS[*]}" \
    #     --methods_config "./configs/methods_final.yaml" \
    #     --results_dir outputs/results_paper/$model \
    #     --output_path $samples_plots_path \
    #     --intervals \
    #     --methods "no_adaptation lora_0.7 lora_1.0 lora_1.0_no_es lora_1.0_no_es_plus_tempscaling lora_1.0_no_es_plus_dpcal" \


    # # Training samples:
    # samples_plots_path="outputs/results_paper/$model/metric_vs_samples/adaptation_performance_$model.png"
    # mkdir -p $(dirname $samples_plots_path)
    # python -m llmcal.scripts.results_vs_samples \
    #     --datasets "${DATASETS[*]}" \
    #     --metrics "${metrics[*]}" \
    #     --sizes "${FACTORS[*]}" \
    #     --methods_config "./configs/methods_final.yaml" \
    #     --results_dir outputs/results_paper/$model \
    #     --output_path $samples_plots_path \
    #     --intervals \
    #     --methods "no_adaptation lora_1.0 lora_1.0_no_es dp_calibration lora_1.0_no_es_plus_tempscaling"


    samples_table_path="outputs/results_paper/$model/results_table/$model.tex"
    mkdir -p $(dirname $samples_table_path)
    python -m llmcal.scripts.results_table \
        --datasets "${DATASETS[*]}" \
        --metrics "${metrics[*]}" \
        --sizes "${FACTORS[*]}" \
        --methods_config "./configs/methods_final.yaml" \
        --results_dir outputs/results_paper/$model \
        --output_path $samples_table_path \
        --methods "no_adaptation temp_scaling vector_scaling bias_shift dp_calibration lora_0.7 lora_1.0 lora_1.0_no_es lora_1.0_no_es_plus_tempscaling lora_1.0_no_es_plus_dpcal lora_1.0_no_es_plus_biasshift lora_1.0_no_es_plus_vectorscaling"

    # samples_bars_path="outputs/results_paper/$model/results_bars/$model.pdf"
    # mkdir -p $(dirname $samples_bars_path)
    # python -m llmcal.scripts.results_bars \
    #     --datasets "${DATASETS[*]}" \
    #     --metrics "${metrics[*]}" \
    #     --sizes "${FACTORS[*]}" \
    #     --methods_config "./configs/methods_final.yaml" \
    #     --results_dir outputs/results_paper/$model \
    #     --output_path $samples_bars_path \
    #     --methods "dp_calibration bias_shift temp_scaling lora_0.7 lora_1.0 lora_1.0_no_es lora_1.0_no_es_plus_dpcal lora_1.0_no_es_plus_biasshift lora_1.0_no_es_plus_tempscaling no_adaptation"
done

samples_plots_path="outputs/results_paper/all_models/comparison.png"
results_dirs=$(for model in "${models[@]}"; do echo "outputs/results_paper/$model"; done | tr '\n' ' ')
results_dirs=${results_dirs::-1}

mkdir -p $(dirname $samples_plots_path)
python -m llmcal.scripts.compare_models \
    --datasets "${DATASETS[*]}" \
    --metrics "${metrics[*]}" \
    --sizes "${FACTORS[*]}" \
    --methods_config "./configs/methods_final.yaml" \
    --output_path $samples_plots_path \
    --models "${models[*]}" \
    --results_dirs "$results_dirs" \
    --intervals \
    --methods "no_adaptation dp_calibration lora_1.0_no_es lora_1.0_no_es_plus_tempscaling"