File size: 6,429 Bytes
766ea9e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
#!/bin/bash
#SBATCH --time=1:00:00 # walltime. hours:minutes:seconds
#SBATCH --ntasks=8 # number of processor cores (i.e. tasks)
#SBATCH --nodes=1 # number of nodes
#SBATCH --gpus=1
#SBATCH --mem=80G # 164G memory per CPU core
#SBATCH --mail-user=aw742@byu.edu # email address
#SBATCH --mail-type=BEGIN
#SBATCH --mail-type=END
#SBATCH --mail-type=FAIL
#SBATCH --qos=cs
#SBATCH --partition=cs
# some helpful debugging options
set -e
set -u
# LOAD MODULES, INSERT CODE, AND RUN YOUR PROGRAMS HERE
# module load python/3.11
source ./mse_env/Scripts/activate
# json config = "max_samples": 500,
# python mse_text_img_process.py
# python convert_mse.py
# pip install jsonlines
# pip install deepeval
NUM_TEST_CASES=100
# python mse_ollama_run.py --num $NUM_TEST_CASES --test f --shot 0 --out_file metric_test_orig_100_f.txt
# echo "Test case faithfulness finished"
NUM_SHOT=0
# set DEEPEVAL_RESULTS_FOLDER=.\data
python mse_ollama_timer.py
echo "Test time calculated"
# deepeval set-local-model --model-name Hudson/llemma:7b
# ollama pull Hudson/llemma:7b
# deepeval set-ollama Hudson/llemma:7b
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_0_shot_100_ar"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT #--out_file metric_test_0_shot_100_ar.txt
# echo "Test case answer relevancy finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_0_shot_100_crec"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT #--out_file metric_test_0_shot_100_crec.txt
# echo "Test case contexual recall finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_0_shot_100_cp"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT #--out_file metric_test_0_shot_100_cp.txt
# echo "Test case contextual precision finished"
NUM_SHOT=1
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_1_shot_100_ar"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT #--out_file metric_test_1_shot_100_ar.txt
# echo "Test case answer relevancy finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_1_shot_100_crec"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT #--out_file metric_test_1_shot_100_crec.txt
# echo "Test case contexual recall finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_1_shot_100_cp"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT #--out_file metric_test_1_shot_100_cp.txt
# echo "Test case contextual precision finished"
NUM_SHOT=5
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_5_shot_100_ar"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT #--out_file metric_test_5_shot_100_ar.txt
# echo "Test case answer relevancy finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_5_shot_100_crec"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT #--out_file metric_test_5_shot_100_crec.txt
# echo "Test case contexual recall finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_5_shot_100_cp"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT #--out_file metric_test_5_shot_100_cp.txt
# echo "Test case contextual precision finished"
# python mse_ollama_run.py --num 25 --begin 0 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_cp.txt
# echo "Test case contextual precision finished"
# python mse_ollama_run.py --num 25 --begin 25 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b25_cp.txt
# echo "Test case contextual precision finished (start 25)"
# python mse_ollama_run.py --num 25 --begin 50 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b50_cp.txt
# echo "Test case contextual precision finished (start 50)"
# python mse_ollama_run.py --num 25 --begin 75 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b75_cp.txt
# echo "Test case contextual precision finished (start 75)"
NUM_SHOT=10
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_10_shot_100_ar"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT -out_file metric_test_10_shot_100_ar.txt
# echo "Test case answer relevancy finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_10_shot_100_crec"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT -out_file metric_test_10_shot_100_crec.txt
# echo "Test case contexual recall finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_10_shot_100_cp"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT -out_file metric_test_10_shot_100_cp.txt
# echo "Test case contextual precision finished"
# finetuned
NUM_SHOT=0
# export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_ar"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT #> metric_test_ft_100_ar.txt
# echo "Test case answer relevancy finished"
# export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_crec"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT #> metric_test_ft_100_crec.txt
# echo "Test case contexual recall finished"
# export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_cp"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT > metric_test_ft_100_cp.txt
# echo "Test case contextual precision finished"
# python mse_ollama_run.py --num $NUM_TEST_CASES --test crel --out_file metric_test_orig_100_crel.txt
# echo "Test case contextual relevancy finished"
# python mse_ollama_run.py --num $NUM_TEST_CASES --test f --out_file metric_test_orig_100_f.txt
# echo "Test case faithfulness finished"
# python mse_jsonl_resize.py
# python finetune.py
# echo "Original Llemma Model"
# echo "Processing 0 shot 100 test cases"
# CUDA_VISIBLE_DEVICES=0 python mse_deepeval_dataset.py --num 100 --shot 0 --dataset mse_llemma_orig_100_case_0_shot
# echo "Processing 1 shot 100 test cases"
# CUDA_VISIBLE_DEVICES=0 python mse_deepeval_dataset.py --num 100 --shot 1 --dataset mse_llemma_orig_100_case_1_shot
# echo "Processing 5 shot 100 test cases"
# CUDA_VISIBLE_DEVICES=0 python mse_deepeval_dataset.py --num 100 --shot 5 --dataset mse_llemma_orig_100_case_5_shot
# echo "Processing 10 shot 100 test cases"
# CUDA_VISIBLE_DEVICES=0 python mse_deepeval_dataset.py --num 100 --shot 10 --dataset mse_llemma_orig_100_case_10_shot |