File size: 4,296 Bytes
237451b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/bin/bash

# Grid Engine options (lines prefixed with #$)
# Runtime limit of 1 hour:
#$ -l h_rt=48:00:00
#
# Set working directory to the directory where the job is submitted from:
#$ -cwd
#
# Request one GPU in the gpu queue:
#$ -q gpu 
#$ -pe gpu-a100 4
#$ -P is_courses
#-wd /exports/eddie/scratch/s2325053/
# Request 4 GB system RAM 
# the total system RAM available to the job is the value specified here multiplied by 
# the number of requested GPUs (above)
#$ -l h_vmem=300G
# -l rl9=false
# Initialise the environment modules and load CUDA version 11.0.2
. /etc/profile.d/modules.sh
module load anaconda/2024.02
conda activate distill 

# Environment Configuration
nvidia-smi
# export CUDA_VISIBLE_DEVICES=0,12,3
export HF_DATASETS_CACHE="/exports/eddie/scratch/s2325053/cache"
export HF_HOME="/exports/eddie/scratch/s2325053/cache"

# Path Configuration
MODEL_PATH="/exports/eddie/scratch/s2325053/model"
OUTPUT_BASE_PATH="/exports/eddie/scratch/s2325053/feedback_qa/ex_part4/judge_retrieval_necessary_check_eddie/data_output"
DATASET_PATH="/exports/eddie/scratch/s2325053/feedback_qa/datasets/merged_qa_with_facts.json"

# Define retrieval methods
declare -a RETRIEVAL_METHODS=("contrieve")

# Model Configuration
declare -A MODEL_FAMILIES
# MODEL_FAMILIES=(
#     ["llama"]="Llama-3.1-8B-Instruct Llama-3.1-70B-Instruct Llama-3.2-1B-Instruct Llama-3.2-3B-Instruct"
#     ["qwen"]="Qwen2.5-1.5B-Instruct Qwen2.5-3B-Instruct Qwen2.5-7B-Instruct Qwen2.5-14B-Instruct Qwen2.5-32B-Instruct Qwen2.5-72B-Instruct"
#     ["mistral"]="Mistral-7B-Instruct-v0.3 Ministral-8B-Instruct-2410 Mixtral-8x7B-Instruct-v0.1"
#     ["gemma"]="gemma-2-2b-it gemma-2-9b-it gemma-2-27b-it"
#     ["phi"]="Phi-3-mini-4k-instruct Phi-3-medium-4k-instruct Phi-3-small-8k-instruct"
# )
MODEL_FAMILIES=(
    ["qwen"]="Qwen2.5-72B-Instruct"
    # ["phi"]="Phi-3-small-8k-instruct"
    # ["mistral"]="Mixtral-8x7B-Instruct-v0.1"
    ["llama"]="Llama-3.1-70B-Instruct"
)

# Function to determine batch size based on model
get_batch_size() {
    local model_name=$1
    local gpu_count=1  # Number of GPUs being used

    # Base batch size calculation
    if [[ $model_name == *"70B"* ]] || [[ $model_name == *"72B"* ]]; then
        echo $((40000 * gpu_count))  # Smaller batch size for largest models
    elif [[ $model_name == *"32B"* ]] || [[ $model_name == *"27B"* ]]; then
        echo $((40000 * gpu_count))  # Medium batch size for large models
    else
        echo $((40000 * gpu_count))  # Larger batch size for smaller models
    fi
}

# Evaluation function
evaluate_model() {
    local model_type=$1
    local model_name=$2
    local retrieval_method=$3
    
    # Get appropriate batch size
    local batch_size=$(get_batch_size "$model_name")
    
    # Create specific output directory for each retrieval method
    local output_dir="${OUTPUT_BASE_PATH}/${retrieval_method}"
    mkdir -p "$output_dir"
    
    echo "Evaluating ${model_type} model: ${model_name} with ${retrieval_method} retrieval"
    
    python -u -m predict_tool_contrieve_large \
        --input_files "$DATASET_PATH" \
        --data_output "${output_dir}/${model_name}.json" \
        --model_name_or_path "${MODEL_PATH}/${model_name}" \
        --use_vllm \
        --max_seq_length 4096 \
        --use_flash_attn \
        --retrieval "$retrieval_method" \
        --per_device_eval_batch_size "$batch_size" \
        --model_type "$model_type"
}

# Main execution loop
for retrieval_method in "${RETRIEVAL_METHODS[@]}"; do
    echo "Starting evaluations with ${retrieval_method} retrieval method"
    
    for model_type in "${!MODEL_FAMILIES[@]}"; do
        for model_name in ${MODEL_FAMILIES[$model_type]}; do
            echo "Starting evaluation for ${model_type}/${model_name} with ${retrieval_method}"
            
            if evaluate_model "$model_type" "$model_name" "$retrieval_method"; then
                echo "Successfully evaluated ${model_name} with ${retrieval_method}"
            else
                echo "Failed to evaluate ${model_name} with ${retrieval_method}"
                echo "${model_name} with ${retrieval_method} failed" 
                continue
            fi
            
            # Optional: Add a short delay between evaluations
            sleep 10
        done
    done
done