| #!/usr/bin/env bash |
|
|
| |
| mkdir -p "logs" |
|
|
| |
| model_name_or_path="meta-llama/Llama-3.1-8B-Instruct" |
| output_dir="./output" |
| device="cuda" |
| ckpt_suffix="cot-unk" |
| max_new_tokens=4096 |
| num_thought_tokens=10 |
| verbose=1 |
|
|
| |
| while [[ $# -gt 0 ]]; do |
| case $1 in |
| --model_name_or_path) model_name_or_path="$2"; shift 2 ;; |
| --max_new_tokens) max_new_tokens="$2"; shift 2 ;; |
| --output_dir) output_dir="$2"; shift 2 ;; |
| --ckpt_suffix) ckpt_suffix="$2"; shift 2 ;; |
| --verbose) verbose="$2"; shift 2 ;; |
| *) echo "Unknown argument: $1"; shift ;; |
| esac |
| done |
|
|
| |
| echo "=================== [Default Args] =====================" |
| echo "Model ID: ${model_name_or_path}" |
| echo "Device: ${device}" |
| echo "Output dir: ${output_dir}" |
| echo "Checkpoint suffix: ${ckpt_suffix}" |
| echo "Max new tokens: ${max_new_tokens}" |
| echo "Verbose: ${verbose}" |
| echo "--------------------------------------------------------" |
|
|
| model_name="${model_name_or_path#*/}" |
|
|
| |
| dataset="Maxwell-Jia/AIME_2024" |
| start_time=$(date +%s) |
|
|
| echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" |
| echo "Eval Dataset: ${dataset} at $(date)" |
|
|
| cmd="python main.py \ |
| --dataset \"${dataset}\" \ |
| --model_name_or_path \"${model_name_or_path}\" \ |
| --output_dir \"${output_dir}\" \ |
| --device \"${device}\" \ |
| --ckpt_suffix \"${ckpt_suffix}\" \ |
| --max_new_tokens ${max_new_tokens} \ |
| --num_thought_tokens ${num_thought_tokens} \ |
| --max_num_steps 0 \ |
| --verbose ${verbose}" |
|
|
| log_file_name="logs/Baseline-CoT-Unk-AIME2024-${model_name}-max_tokens${max_new_tokens}.log" |
|
|
| |
| echo "${cmd} > \"${log_file_name}\"" |
| eval "${cmd} > \"${log_file_name}\"" |
|
|
| |
| end_time=$(date +%s) |
| elapsed_time=$((end_time - start_time)) |
| echo "Evaluation for dataset ${dataset} finished at: $(date)" |
| echo "Elapsed time: ${elapsed_time} seconds" |
|
|
| |
| dataset="opencompass/AIME2025" |
| start_time=$(date +%s) |
|
|
| echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" |
| echo "Eval Dataset: ${dataset} at $(date)" |
|
|
| cmd="python main.py \ |
| --dataset \"${dataset}\" \ |
| --model_name_or_path \"${model_name_or_path}\" \ |
| --output_dir \"${output_dir}\" \ |
| --device \"${device}\" \ |
| --ckpt_suffix \"${ckpt_suffix}\" \ |
| --max_new_tokens ${max_new_tokens} \ |
| --num_thought_tokens ${num_thought_tokens} \ |
| --max_num_steps 0 \ |
| --verbose ${verbose}" |
|
|
| log_file_name="logs/Baseline-CoT-Unk-AIME2025-${model_name}-max_tokens${max_new_tokens}.log" |
|
|
| |
| echo "${cmd} > \"${log_file_name}\"" |
| eval "${cmd} > \"${log_file_name}\"" |
|
|
| |
| end_time=$(date +%s) |
| elapsed_time=$((end_time - start_time)) |
| echo "Evaluation for dataset ${dataset} finished at: $(date)" |
| echo "Elapsed time: ${elapsed_time} seconds" |
|
|
| |
| dataset="openai/gsm8k" |
| start_time=$(date +%s) |
|
|
| echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" |
| echo "Eval Dataset: ${dataset} at $(date)" |
|
|
| cmd="python main.py \ |
| --dataset \"${dataset}\" \ |
| --model_name_or_path \"${model_name_or_path}\" \ |
| --output_dir \"${output_dir}\" \ |
| --device \"${device}\" \ |
| --ckpt_suffix \"${ckpt_suffix}\" \ |
| --max_new_tokens ${max_new_tokens} \ |
| --num_thought_tokens ${num_thought_tokens} \ |
| --max_num_steps 0 \ |
| --verbose ${verbose}" |
|
|
| log_file_name="logs/Baseline-CoT-Unk-GSM8K-${model_name}-max_tokens${max_new_tokens}.log" |
|
|
| |
| echo "${cmd} > \"${log_file_name}\"" |
| eval "${cmd} > \"${log_file_name}\"" |
|
|
| |
| end_time=$(date +%s) |
| elapsed_time=$((end_time - start_time)) |
| echo "Evaluation for dataset ${dataset} finished at: $(date)" |
| echo "Elapsed time: ${elapsed_time} seconds" |
|
|
| |
| dataset="HuggingFaceH4/MATH-500" |
| start_time=$(date +%s) |
|
|
| echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" |
| echo "Eval Dataset: ${dataset} at $(date)" |
|
|
| cmd="python main.py \ |
| --dataset \"${dataset}\" \ |
| --model_name_or_path \"${model_name_or_path}\" \ |
| --output_dir \"${output_dir}\" \ |
| --device \"${device}\" \ |
| --ckpt_suffix \"${ckpt_suffix}\" \ |
| --max_new_tokens ${max_new_tokens} \ |
| --num_thought_tokens ${num_thought_tokens} \ |
| --max_num_steps 0 \ |
| --verbose ${verbose}" |
|
|
| log_file_name="logs/Baseline-CoT-Unk-MATH500-${model_name}-max_tokens${max_new_tokens}.log" |
|
|
| |
| echo "${cmd} > \"${log_file_name}\"" |
| eval "${cmd} > \"${log_file_name}\"" |
|
|
| |
| end_time=$(date +%s) |
| elapsed_time=$((end_time - start_time)) |
| echo "Evaluation for dataset ${dataset} finished at: $(date)" |
| echo "Elapsed time: ${elapsed_time} seconds" |
|
|
| |
| dataset="xuyige/ASDiv-Aug" |
| start_time=$(date +%s) |
|
|
| echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" |
| echo "Eval Dataset: ${dataset} at $(date)" |
|
|
| cmd="python main.py \ |
| --dataset \"${dataset}\" \ |
| --model_name_or_path \"${model_name_or_path}\" \ |
| --output_dir \"${output_dir}\" \ |
| --device \"${device}\" \ |
| --ckpt_suffix \"${ckpt_suffix}\" \ |
| --max_new_tokens ${max_new_tokens} \ |
| --num_thought_tokens ${num_thought_tokens} \ |
| --max_num_steps 0 \ |
| --verbose ${verbose}" |
|
|
| log_file_name="logs/Baseline-CoT-Unk-ASDivAug-${model_name}-max_tokens${max_new_tokens}.log" |
|
|
| |
| echo "${cmd} > \"${log_file_name}\"" |
| eval "${cmd} > \"${log_file_name}\"" |
|
|
| |
| end_time=$(date +%s) |
| elapsed_time=$((end_time - start_time)) |
| echo "Evaluation for dataset ${dataset} finished at: $(date)" |
| echo "Elapsed time: ${elapsed_time} seconds" |
|
|