| source ./scripts/env.sh | |
| declare -a models=(llama3.2-1b-instruct qwen2.5-7b-instruct) | |
| declare -a DATASETS=(sst2 agnews dbpedia 20newsgroups banking77) | |
| metrics=(nce ner) | |
| overwrite=true | |
| for model in "${models[@]}"; do | |
| # for metric in "${metrics[@]}"; do | |
| # # Compute results: | |
| # results_path=outputs/results_paper/$model/$metric.jsonl | |
| # if [ -f $results_path ] && [ $overwrite = false ]; then | |
| # echo "Results already computed. Skipping." | |
| # else | |
| # mkdir -p $(dirname $results_path) | |
| # python -m llmcal.scripts.compute_matched_results \ | |
| # --metric $metric \ | |
| # --finetuning_root_results_dirs outputs/finetune_lora/$model/ \ | |
| # --output_path $results_path \ | |
| # --reduced \ | |
| # --no_adaptation_root_results_dirs outputs/no_adaptation/$model \ | |
| # --lora_plus_cal_root_results_dirs "outputs/lora_plus_dpcal/$model,outputs/lora_plus_tempscaling/$model,outputs/lora_plus_biasshift/$model,outputs/lora_plus_vectorscaling/$model" \ | |
| # --lora_plus_cal_naive_root_results_dirs "outputs/lora_plus_dpcal_naive/$model,outputs/lora_plus_tempscaling_naive/$model" \ | |
| # --trainontest_root_results_dirs outputs/lora_plus_dpcal_trainontest/$model,outputs/lora_plus_tempscaling_trainontest/$model \ | |
| # --cal_root_results_dirs outputs/calibration/$model | |
| # fi | |
| # done | |
| # samples_plots_path="outputs/results_paper/$model/metric_vs_samples/combined_performance_$model.png" | |
| # mkdir -p $(dirname $samples_plots_path) | |
| # python -m llmcal.scripts.results_vs_samples \ | |
| # --datasets "${DATASETS[*]}" \ | |
| # --metrics "${metrics[*]}" \ | |
| # --sizes "${FACTORS[*]}" \ | |
| # --methods_config "./configs/methods_final.yaml" \ | |
| # --results_dir outputs/results_paper/$model \ | |
| # --output_path $samples_plots_path \ | |
| # --intervals \ | |
| # --methods "no_adaptation lora_0.7 lora_1.0 lora_1.0_no_es lora_1.0_no_es_plus_tempscaling lora_1.0_no_es_plus_dpcal" \ | |
| # # Training samples: | |
| # samples_plots_path="outputs/results_paper/$model/metric_vs_samples/adaptation_performance_$model.png" | |
| # mkdir -p $(dirname $samples_plots_path) | |
| # python -m llmcal.scripts.results_vs_samples \ | |
| # --datasets "${DATASETS[*]}" \ | |
| # --metrics "${metrics[*]}" \ | |
| # --sizes "${FACTORS[*]}" \ | |
| # --methods_config "./configs/methods_final.yaml" \ | |
| # --results_dir outputs/results_paper/$model \ | |
| # --output_path $samples_plots_path \ | |
| # --intervals \ | |
| # --methods "no_adaptation lora_1.0 lora_1.0_no_es dp_calibration lora_1.0_no_es_plus_tempscaling" | |
| samples_table_path="outputs/results_paper/$model/results_table/$model.tex" | |
| mkdir -p $(dirname $samples_table_path) | |
| python -m llmcal.scripts.results_table \ | |
| --datasets "${DATASETS[*]}" \ | |
| --metrics "${metrics[*]}" \ | |
| --sizes "${FACTORS[*]}" \ | |
| --methods_config "./configs/methods_final.yaml" \ | |
| --results_dir outputs/results_paper/$model \ | |
| --output_path $samples_table_path \ | |
| --methods "no_adaptation temp_scaling vector_scaling bias_shift dp_calibration lora_0.7 lora_1.0 lora_1.0_no_es lora_1.0_no_es_plus_tempscaling lora_1.0_no_es_plus_dpcal lora_1.0_no_es_plus_biasshift lora_1.0_no_es_plus_vectorscaling" | |
| # samples_bars_path="outputs/results_paper/$model/results_bars/$model.pdf" | |
| # mkdir -p $(dirname $samples_bars_path) | |
| # python -m llmcal.scripts.results_bars \ | |
| # --datasets "${DATASETS[*]}" \ | |
| # --metrics "${metrics[*]}" \ | |
| # --sizes "${FACTORS[*]}" \ | |
| # --methods_config "./configs/methods_final.yaml" \ | |
| # --results_dir outputs/results_paper/$model \ | |
| # --output_path $samples_bars_path \ | |
| # --methods "dp_calibration bias_shift temp_scaling lora_0.7 lora_1.0 lora_1.0_no_es lora_1.0_no_es_plus_dpcal lora_1.0_no_es_plus_biasshift lora_1.0_no_es_plus_tempscaling no_adaptation" | |
| done | |
| samples_plots_path="outputs/results_paper/all_models/comparison.png" | |
| results_dirs=$(for model in "${models[@]}"; do echo "outputs/results_paper/$model"; done | tr '\n' ' ') | |
| results_dirs=${results_dirs::-1} | |
| mkdir -p $(dirname $samples_plots_path) | |
| python -m llmcal.scripts.compare_models \ | |
| --datasets "${DATASETS[*]}" \ | |
| --metrics "${metrics[*]}" \ | |
| --sizes "${FACTORS[*]}" \ | |
| --methods_config "./configs/methods_final.yaml" \ | |
| --output_path $samples_plots_path \ | |
| --models "${models[*]}" \ | |
| --results_dirs "$results_dirs" \ | |
| --intervals \ | |
| --methods "no_adaptation dp_calibration lora_1.0_no_es lora_1.0_no_es_plus_tempscaling" | |