llmcal / scripts /results_paper.sh
lestienne's picture
Add files using upload-large-folder tool
8cd1f2e verified
#!/bin/bash -ex
source ./scripts/env.sh
declare -a models=(llama3.2-1b-instruct qwen2.5-7b-instruct)
declare -a DATASETS=(sst2 agnews dbpedia 20newsgroups banking77)
metrics=(nce ner)
overwrite=true
for model in "${models[@]}"; do
# for metric in "${metrics[@]}"; do
# # Compute results:
# results_path=outputs/results_paper/$model/$metric.jsonl
# if [ -f $results_path ] && [ $overwrite = false ]; then
# echo "Results already computed. Skipping."
# else
# mkdir -p $(dirname $results_path)
# python -m llmcal.scripts.compute_matched_results \
# --metric $metric \
# --finetuning_root_results_dirs outputs/finetune_lora/$model/ \
# --output_path $results_path \
# --reduced \
# --no_adaptation_root_results_dirs outputs/no_adaptation/$model \
# --lora_plus_cal_root_results_dirs "outputs/lora_plus_dpcal/$model,outputs/lora_plus_tempscaling/$model,outputs/lora_plus_biasshift/$model,outputs/lora_plus_vectorscaling/$model" \
# --lora_plus_cal_naive_root_results_dirs "outputs/lora_plus_dpcal_naive/$model,outputs/lora_plus_tempscaling_naive/$model" \
# --trainontest_root_results_dirs outputs/lora_plus_dpcal_trainontest/$model,outputs/lora_plus_tempscaling_trainontest/$model \
# --cal_root_results_dirs outputs/calibration/$model
# fi
# done
# samples_plots_path="outputs/results_paper/$model/metric_vs_samples/combined_performance_$model.png"
# mkdir -p $(dirname $samples_plots_path)
# python -m llmcal.scripts.results_vs_samples \
# --datasets "${DATASETS[*]}" \
# --metrics "${metrics[*]}" \
# --sizes "${FACTORS[*]}" \
# --methods_config "./configs/methods_final.yaml" \
# --results_dir outputs/results_paper/$model \
# --output_path $samples_plots_path \
# --intervals \
# --methods "no_adaptation lora_0.7 lora_1.0 lora_1.0_no_es lora_1.0_no_es_plus_tempscaling lora_1.0_no_es_plus_dpcal" \
# # Training samples:
# samples_plots_path="outputs/results_paper/$model/metric_vs_samples/adaptation_performance_$model.png"
# mkdir -p $(dirname $samples_plots_path)
# python -m llmcal.scripts.results_vs_samples \
# --datasets "${DATASETS[*]}" \
# --metrics "${metrics[*]}" \
# --sizes "${FACTORS[*]}" \
# --methods_config "./configs/methods_final.yaml" \
# --results_dir outputs/results_paper/$model \
# --output_path $samples_plots_path \
# --intervals \
# --methods "no_adaptation lora_1.0 lora_1.0_no_es dp_calibration lora_1.0_no_es_plus_tempscaling"
samples_table_path="outputs/results_paper/$model/results_table/$model.tex"
mkdir -p $(dirname $samples_table_path)
python -m llmcal.scripts.results_table \
--datasets "${DATASETS[*]}" \
--metrics "${metrics[*]}" \
--sizes "${FACTORS[*]}" \
--methods_config "./configs/methods_final.yaml" \
--results_dir outputs/results_paper/$model \
--output_path $samples_table_path \
--methods "no_adaptation temp_scaling vector_scaling bias_shift dp_calibration lora_0.7 lora_1.0 lora_1.0_no_es lora_1.0_no_es_plus_tempscaling lora_1.0_no_es_plus_dpcal lora_1.0_no_es_plus_biasshift lora_1.0_no_es_plus_vectorscaling"
# samples_bars_path="outputs/results_paper/$model/results_bars/$model.pdf"
# mkdir -p $(dirname $samples_bars_path)
# python -m llmcal.scripts.results_bars \
# --datasets "${DATASETS[*]}" \
# --metrics "${metrics[*]}" \
# --sizes "${FACTORS[*]}" \
# --methods_config "./configs/methods_final.yaml" \
# --results_dir outputs/results_paper/$model \
# --output_path $samples_bars_path \
# --methods "dp_calibration bias_shift temp_scaling lora_0.7 lora_1.0 lora_1.0_no_es lora_1.0_no_es_plus_dpcal lora_1.0_no_es_plus_biasshift lora_1.0_no_es_plus_tempscaling no_adaptation"
done
samples_plots_path="outputs/results_paper/all_models/comparison.png"
results_dirs=$(for model in "${models[@]}"; do echo "outputs/results_paper/$model"; done | tr '\n' ' ')
results_dirs=${results_dirs::-1}
mkdir -p $(dirname $samples_plots_path)
python -m llmcal.scripts.compare_models \
--datasets "${DATASETS[*]}" \
--metrics "${metrics[*]}" \
--sizes "${FACTORS[*]}" \
--methods_config "./configs/methods_final.yaml" \
--output_path $samples_plots_path \
--models "${models[*]}" \
--results_dirs "$results_dirs" \
--intervals \
--methods "no_adaptation dp_calibration lora_1.0_no_es lora_1.0_no_es_plus_tempscaling"