| | #!/bin/bash |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | . /etc/profile.d/modules.sh |
| | module load anaconda/2024.02 |
| | conda activate distill |
| |
|
| | |
| | nvidia-smi |
| | |
| | export HF_DATASETS_CACHE="/exports/eddie/scratch/s2325053/cache" |
| | export HF_HOME="/exports/eddie/scratch/s2325053/cache" |
| |
|
| | |
| | MODEL_PATH="/exports/eddie/scratch/s2325053/model" |
| | OUTPUT_BASE_PATH="/exports/eddie/scratch/s2325053/feedback_qa/ex_part4/judge_retrieval_necessary_check_eddie/data_output" |
| | DATASET_PATH="/exports/eddie/scratch/s2325053/feedback_qa/datasets/merged_qa_with_facts.json" |
| |
|
| | |
| | declare -a RETRIEVAL_METHODS=("contrieve") |
| |
|
| | |
| | declare -A MODEL_FAMILIES |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | MODEL_FAMILIES=( |
| | ["qwen"]="Qwen2.5-72B-Instruct" |
| | |
| | |
| | ["llama"]="Llama-3.1-70B-Instruct" |
| | ) |
| |
|
| | |
| | get_batch_size() { |
| | local model_name=$1 |
| | local gpu_count=1 |
| |
|
| | |
| | if [[ $model_name == *"70B"* ]] || [[ $model_name == *"72B"* ]]; then |
| | echo $((40000 * gpu_count)) |
| | elif [[ $model_name == *"32B"* ]] || [[ $model_name == *"27B"* ]]; then |
| | echo $((40000 * gpu_count)) |
| | else |
| | echo $((40000 * gpu_count)) |
| | fi |
| | } |
| |
|
| | |
| | evaluate_model() { |
| | local model_type=$1 |
| | local model_name=$2 |
| | local retrieval_method=$3 |
| | |
| | |
| | local batch_size=$(get_batch_size "$model_name") |
| | |
| | |
| | local output_dir="${OUTPUT_BASE_PATH}/${retrieval_method}" |
| | mkdir -p "$output_dir" |
| | |
| | echo "Evaluating ${model_type} model: ${model_name} with ${retrieval_method} retrieval" |
| | |
| | python -u -m predict_tool_contrieve_large \ |
| | --input_files "$DATASET_PATH" \ |
| | --data_output "${output_dir}/${model_name}.json" \ |
| | --model_name_or_path "${MODEL_PATH}/${model_name}" \ |
| | --use_vllm \ |
| | --max_seq_length 4096 \ |
| | --use_flash_attn \ |
| | --retrieval "$retrieval_method" \ |
| | --per_device_eval_batch_size "$batch_size" \ |
| | --model_type "$model_type" |
| | } |
| |
|
| | |
| | for retrieval_method in "${RETRIEVAL_METHODS[@]}"; do |
| | echo "Starting evaluations with ${retrieval_method} retrieval method" |
| | |
| | for model_type in "${!MODEL_FAMILIES[@]}"; do |
| | for model_name in ${MODEL_FAMILIES[$model_type]}; do |
| | echo "Starting evaluation for ${model_type}/${model_name} with ${retrieval_method}" |
| | |
| | if evaluate_model "$model_type" "$model_name" "$retrieval_method"; then |
| | echo "Successfully evaluated ${model_name} with ${retrieval_method}" |
| | else |
| | echo "Failed to evaluate ${model_name} with ${retrieval_method}" |
| | echo "${model_name} with ${retrieval_method} failed" |
| | continue |
| | fi |
| | |
| | |
| | sleep 10 |
| | done |
| | done |
| | done |