| source ./scripts/env.sh | |
| # model="llama3.2-1b-instruct" | |
| metrics=(nce ner) | |
| overwrite=true | |
| for metric in "${metrics[@]}"; do | |
| # Compute results: | |
| results_path=outputs/results/$model/$metric.jsonl | |
| if [ -f $results_path ] && [ $overwrite = false ]; then | |
| echo "Results already computed. Skipping." | |
| else | |
| mkdir -p $(dirname $results_path) | |
| python -m llmcal.scripts.compute_matched_results \ | |
| --metric $metric \ | |
| --finetuning_root_results_dirs outputs/finetune_lora/$model/ \ | |
| --output_path $results_path \ | |
| --reduced \ | |
| # --lora_plus_cal_root_results_dirs "outputs/lora_plus_dpcal/$model,outputs/lora_plus_tempscaling/$model" \ | |
| # --cal_root_results_dirs outputs/calibration/$model \ | |
| # --trainontest_root_results_dirs outputs/lora_plus_dpcal_trainontest/$model,outputs/lora_plus_tempscaling_trainontest/$model \ | |
| fi | |
| done | |
| # Training samples: | |
| samples_plots_dir=outputs/results/$model/metric_vs_samples | |
| mkdir -p $samples_plots_dir | |
| python -m llmcal.scripts.results_vs_samples \ | |
| --datasets "${DATASETS[*]}" \ | |
| --metric "goodness" \ | |
| --sizes "${FACTORS[*]}" \ | |
| --methods_config "./configs/methods.yaml" \ | |
| --methods "lora_0.7 lora_0.7_no_es lora_1.0 lora_1.0_no_es lora_1.0_no_es_plus_tempscaling temp_scaling dp_calibration" \ | |
| --results_dir outputs/results/$model \ | |
| --output_dir $samples_plots_dir \ | |
| # --intervals | |
| # --methods "lora_0.7 lora_1.0 temp_scaling dp_calibration" \ | |
| # --methods "lora_1.0 lora_1.0_no_es lora_1.0_plus_dpcal lora_1.0_plus_tempscaling lora_1.0_plus_dpcal_trainontest lora_1.0_plus_tempscaling_trainontest" \ | |
| # --methods "no_adaptation dp_calibration temp_scaling lora_0.7 lora_0.7_no_es lora_0.7_plus_tempscaling lora_0.7_plus_dpcal lora_0.7_no_es_plus_tempscaling lora_0.7_no_es_plus_dpcal lora_1.0 lora_1.0_no_es lora_1.0_plus_tempscaling lora_1.0_plus_dpcal lora_1.0_no_es_plus_tempscaling lora_1.0_no_es_plus_dpcal" \ |