| #!/bin/bash |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| . /etc/profile.d/modules.sh |
| module load anaconda/2024.02 |
| conda activate distill |
|
|
| |
| nvidia-smi |
| |
| export HF_DATASETS_CACHE="/exports/eddie/scratch/s2325053/cache" |
| export HF_HOME="/exports/eddie/scratch/s2325053/cache" |
|
|
| |
| MODEL_PATH="/exports/eddie/scratch/s2325053/model" |
| OUTPUT_BASE_PATH="/exports/eddie/scratch/s2325053/feedback_qa/ex_part4/judge_retrieval_necessary_check_eddie/data_output" |
| DATASET_PATH="/exports/eddie/scratch/s2325053/feedback_qa/datasets/merged_qa_with_facts.json" |
|
|
| |
| declare -a RETRIEVAL_METHODS=("bm25") |
|
|
| |
| declare -A MODEL_FAMILIES |
| MODEL_FAMILIES=( |
| ["llama"]="Llama-3.1-70B-Instruct" |
| ["qwen"]="Qwen2.5-72B-Instruct" |
| ["mistral"]="Mixtral-8x7B-Instruct-v0.1" |
| |
| |
| ) |
| |
| |
| |
|
|
| |
| get_batch_size() { |
| local model_name=$1 |
| local gpu_count=1 |
|
|
| |
| if [[ $model_name == *"70B"* ]] || [[ $model_name == *"72B"* ]]; then |
| echo $((40000 * gpu_count)) |
| elif [[ $model_name == *"32B"* ]] || [[ $model_name == *"27B"* ]]; then |
| echo $((40000 * gpu_count)) |
| else |
| echo $((40000 * gpu_count)) |
| fi |
| } |
|
|
| |
| evaluate_model() { |
| local model_type=$1 |
| local model_name=$2 |
| local retrieval_method=$3 |
| |
| |
| local batch_size=$(get_batch_size "$model_name") |
| |
| |
| local output_dir="${OUTPUT_BASE_PATH}/${retrieval_method}" |
| mkdir -p "$output_dir" |
| |
| echo "Evaluating ${model_type} model: ${model_name} with ${retrieval_method} retrieval" |
| |
| python -u -m predict_tool_bm25 \ |
| --input_files "$DATASET_PATH" \ |
| --data_output "${output_dir}/${model_name}.json" \ |
| --model_name_or_path "${MODEL_PATH}/${model_name}" \ |
| --use_vllm \ |
| --max_seq_length 4096 \ |
| --use_flash_attn \ |
| --retrieval "$retrieval_method" \ |
| --per_device_eval_batch_size "$batch_size" \ |
| --model_type "$model_type" |
| } |
|
|
| |
| for retrieval_method in "${RETRIEVAL_METHODS[@]}"; do |
| echo "Starting evaluations with ${retrieval_method} retrieval method" |
| |
| for model_type in "${!MODEL_FAMILIES[@]}"; do |
| for model_name in ${MODEL_FAMILIES[$model_type]}; do |
| echo "Starting evaluation for ${model_type}/${model_name} with ${retrieval_method}" |
| |
| if evaluate_model "$model_type" "$model_name" "$retrieval_method"; then |
| echo "Successfully evaluated ${model_name} with ${retrieval_method}" |
| else |
| echo "Failed to evaluate ${model_name} with ${retrieval_method}" |
| echo "${model_name} with ${retrieval_method} failed" |
| continue |
| fi |
| |
| |
| sleep 10 |
| done |
| done |
| done |