#!/bin/bash #SBATCH --time=1:00:00 # walltime. hours:minutes:seconds #SBATCH --ntasks=8 # number of processor cores (i.e. tasks) #SBATCH --nodes=1 # number of nodes #SBATCH --gpus=1 #SBATCH --mem=80G # 164G memory per CPU core #SBATCH --mail-user=aw742@byu.edu # email address #SBATCH --mail-type=BEGIN #SBATCH --mail-type=END #SBATCH --mail-type=FAIL #SBATCH --qos=cs #SBATCH --partition=cs # some helpful debugging options set -e set -u # LOAD MODULES, INSERT CODE, AND RUN YOUR PROGRAMS HERE # module load python/3.11 source ./mse_env/Scripts/activate # json config = "max_samples": 500, # python mse_text_img_process.py # python convert_mse.py # pip install jsonlines # pip install deepeval NUM_TEST_CASES=100 # python mse_ollama_run.py --num $NUM_TEST_CASES --test f --shot 0 --out_file metric_test_orig_100_f.txt # echo "Test case faithfulness finished" NUM_SHOT=0 # set DEEPEVAL_RESULTS_FOLDER=.\data # deepeval set-local-model --model-name Hudson/llemma:7b # ollama pull Hudson/llemma:7b # deepeval set-ollama Hudson/llemma:7b # python mse_ollama_run.py --test ar --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_0_shot.json > metric_test_0_shot_100_ar.txt # echo "Test case answer relevancy finished" # python mse_ollama_run.py --test crec --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_0_shot.json #> metric_test_0_shot_100_crec.txt # echo "Test case contexual recall finished" # python mse_ollama_run.py --test cp --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_0_shot.json > metric_test_0_shot_100_cp.txt # echo "Test case contextual precision finished" NUM_SHOT=1 # python mse_ollama_run.py --test ar --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_1_shot.json > metric_test_1_shot_100_ar.txt # echo "Test case answer relevancy finished" # python mse_ollama_run.py --test crec --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_1_shot.json #> metric_test_1_shot_100_crec.txt # echo "Test case contexual recall finished" # python mse_ollama_run.py --test cp --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_1_shot.json #> metric_test_1_shot_100_cp.txt # echo "Test case contextual precision finished" NUM_SHOT=5 # python mse_ollama_run.py --test ar --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_5_shot.json > metric_test_5_shot_100_ar.txt # echo "Test case answer relevancy finished" # python mse_ollama_run.py --test crec --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_5_shot.json #> metric_test_5_shot_100_crec.txt # echo "Test case contexual recall finished" # python mse_ollama_run.py --test cp --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_5_shot.json #> metric_test_5_shot_100_cp.txt # echo "Test case contextual precision finished" # # python mse_ollama_run.py --num 25 --begin 0 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_cp.txt # # echo "Test case contextual precision finished" # # python mse_ollama_run.py --num 25 --begin 25 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b25_cp.txt # # echo "Test case contextual precision finished (start 25)" # # python mse_ollama_run.py --num 25 --begin 50 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b50_cp.txt # # echo "Test case contextual precision finished (start 50)" # # python mse_ollama_run.py --num 25 --begin 75 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b75_cp.txt # # echo "Test case contextual precision finished (start 75)" # NUM_SHOT=10 # python mse_ollama_run.py --test ar --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_10_shot.json > metric_test_10_shot_100_ar.txt # echo "Test case answer relevancy finished" # python mse_ollama_run.py --test crec --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_10_shot.json #> metric_test_10_shot_100_crec.txt # echo "Test case contexual recall finished" # python mse_ollama_run.py --test cp --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_10_shot.json #> metric_test_10_shot_100_cp.txt # echo "Test case contextual precision finished" # finetuned NUM_SHOT=0 # export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_ar" # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT --out_file metric_test_ft_100_ar.docx # echo "Test case answer relevancy finished" # export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_crec" # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT --out_file metric_test_ft_100_crec.docx # echo "Test case contexual recall finished" # export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_cp" # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT --out_file metric_test_ft_100_cp.docx # echo "Test case contextual precision finished" python mse_ollama_run_ft.py --test ar --dataset ./deepeval-test-dataset/mse_llemma_ft_100_case_0_shot.json #> metric_test_ft_100_ar.txt echo "Test case answer relevancy finished" python mse_ollama_run_ft.py --test crec --dataset ./deepeval-test-dataset/mse_llemma_ft_100_case_0_shot.json > metric_test_ft_100_crec.txt echo "Test case contexual recall finished" python mse_ollama_run_ft.py --test cp --dataset ./deepeval-test-dataset/mse_llemma_ft_100_case_0_shot.json > metric_test_ft_100_cp.txt echo "Test case contextual precision finished" # python mse_ollama_run.py --num $NUM_TEST_CASES --test crel --out_file metric_test_orig_100_crel.txt # echo "Test case contextual relevancy finished" # python mse_ollama_run.py --num $NUM_TEST_CASES --test f --out_file metric_test_orig_100_f.txt # echo "Test case faithfulness finished" # python mse_jsonl_resize.py # python finetune.py