|
|
set -ex |
|
|
|
|
|
PROMPT_TYPE=qwen25-math-cot |
|
|
MODEL_NAME_OR_PATH=baidu/Reproducibility/trained_models/trained_3epo_cleaned_torch |
|
|
OUTPUT_DIR=baidu/Qwen2.5-Math/evaluation/result/ernie_cleaned |
|
|
|
|
|
SPLIT="test" |
|
|
NUM_TEST_SAMPLE=-1 |
|
|
|
|
|
|
|
|
DATA_NAME="gsm8k,math,svamp,asdiv,mawps,carp_en,tabmwp,minerva_math,gaokao2023en,olympiadbench,college_math" |
|
|
TOKENIZERS_PARALLELISM=false \ |
|
|
python3 -u math_eval.py \ |
|
|
--model_name_or_path ${MODEL_NAME_OR_PATH} \ |
|
|
--data_name ${DATA_NAME} \ |
|
|
--output_dir ${OUTPUT_DIR} \ |
|
|
--split ${SPLIT} \ |
|
|
--prompt_type ${PROMPT_TYPE} \ |
|
|
--num_test_sample ${NUM_TEST_SAMPLE} \ |
|
|
--seed 0 \ |
|
|
--temperature 0 \ |
|
|
--n_sampling 1 \ |
|
|
--top_p 1 \ |
|
|
--start 0 \ |
|
|
--end -1 \ |
|
|
--use_vllm \ |
|
|
--save_outputs \ |
|
|
--overwrite \ |
|
|
|
|
|
|
|
|
DATA_NAME="aqua,sat_math,mmlu_stem" |
|
|
TOKENIZERS_PARALLELISM=false \ |
|
|
python3 -u math_eval.py \ |
|
|
--model_name_or_path ${MODEL_NAME_OR_PATH} \ |
|
|
--data_name ${DATA_NAME} \ |
|
|
--output_dir ${OUTPUT_DIR} \ |
|
|
--split ${SPLIT} \ |
|
|
--prompt_type ${PROMPT_TYPE} \ |
|
|
--num_test_sample ${NUM_TEST_SAMPLE} \ |
|
|
--seed 0 \ |
|
|
--temperature 0 \ |
|
|
--n_sampling 1 \ |
|
|
--top_p 1 \ |
|
|
--start 0 \ |
|
|
--end -1 \ |
|
|
--use_vllm \ |
|
|
--save_outputs \ |
|
|
--overwrite \ |
|
|
--num_shots 5 |
|
|
|
|
|
|
|
|
DATA_NAME="gaokao2024_I,gaokao2024_II,gaokao2024_mix,gaokao_math_cloze,gaokao_math_qa" |
|
|
TOKENIZERS_PARALLELISM=false \ |
|
|
python3 -u math_eval.py \ |
|
|
--model_name_or_path ${MODEL_NAME_OR_PATH} \ |
|
|
--data_name ${DATA_NAME} \ |
|
|
--output_dir ${OUTPUT_DIR} \ |
|
|
--split ${SPLIT} \ |
|
|
--prompt_type ${PROMPT_TYPE} \ |
|
|
--num_test_sample ${NUM_TEST_SAMPLE} \ |
|
|
--seed 0 \ |
|
|
--temperature 0 \ |
|
|
--n_sampling 1 \ |
|
|
--top_p 1 \ |
|
|
--start 0 \ |
|
|
--end -1 \ |
|
|
--use_vllm \ |
|
|
--save_outputs \ |
|
|
--overwrite \ |
|
|
--adapt_few_shot |
|
|
|
|
|
|
|
|
DATA_NAME="cmath,cn_middle_school" |
|
|
TOKENIZERS_PARALLELISM=false \ |
|
|
python3 -u math_eval.py \ |
|
|
--model_name_or_path ${MODEL_NAME_OR_PATH} \ |
|
|
--data_name ${DATA_NAME} \ |
|
|
--output_dir ${OUTPUT_DIR} \ |
|
|
--split ${SPLIT} \ |
|
|
--prompt_type ${PROMPT_TYPE} \ |
|
|
--num_test_sample ${NUM_TEST_SAMPLE} \ |
|
|
--seed 0 \ |
|
|
--temperature 0 \ |
|
|
--n_sampling 1 \ |
|
|
--top_p 1 \ |
|
|
--start 0 \ |
|
|
--end -1 \ |
|
|
--use_vllm \ |
|
|
--save_outputs \ |
|
|
--overwrite \ |
|
|
--adapt_few_shot |
|
|
|
|
|
|
|
|
|
|
|
DATA_NAME="aime24,amc23" |
|
|
TOKENIZERS_PARALLELISM=false \ |
|
|
python3 -u math_eval.py \ |
|
|
--model_name_or_path ${MODEL_NAME_OR_PATH} \ |
|
|
--data_name ${DATA_NAME} \ |
|
|
--output_dir ${OUTPUT_DIR} \ |
|
|
--split ${SPLIT} \ |
|
|
--prompt_type ${PROMPT_TYPE} \ |
|
|
--num_test_sample ${NUM_TEST_SAMPLE} \ |
|
|
--seed 0 \ |
|
|
--temperature 0 \ |
|
|
--n_sampling 1 \ |
|
|
--top_p 1 \ |
|
|
--start 0 \ |
|
|
--end -1 \ |
|
|
--use_vllm \ |
|
|
--save_outputs \ |
|
|
--overwrite \ |
|
|
|